Repository: aws-samples/serverless-pdf-chat
Branch: main
Commit: 2c9ed9c00d5f
Files: 55
Total size: 90.5 KB

Directory structure:
gitextract_8ftuqah8/

├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── backend/
│   ├── .gitignore
│   ├── __init__.py
│   ├── src/
│   │   ├── add_conversation/
│   │   │   ├── __init__.py
│   │   │   ├── main.py
│   │   │   └── requirements.txt
│   │   ├── delete_document/
│   │   │   ├── __init__.py
│   │   │   ├── main.py
│   │   │   └── requirements.txt
│   │   ├── generate_embeddings/
│   │   │   ├── __init__.py
│   │   │   ├── main.py
│   │   │   └── requirements.txt
│   │   ├── generate_presigned_url/
│   │   │   ├── __init__.py
│   │   │   ├── main.py
│   │   │   └── requirements.txt
│   │   ├── generate_response/
│   │   │   ├── __init__.py
│   │   │   ├── main.py
│   │   │   └── requirements.txt
│   │   ├── get_all_documents/
│   │   │   ├── __init__.py
│   │   │   └── main.py
│   │   ├── get_document/
│   │   │   ├── __init__.py
│   │   │   └── main.py
│   │   └── upload_trigger/
│   │       ├── __init__.py
│   │       ├── main.py
│   │       └── requirements.txt
│   └── template.yaml
└── frontend/
    ├── .eslintrc.cjs
    ├── .gitignore
    ├── index.html
    ├── package.json
    ├── postcss.config.js
    ├── src/
    │   ├── App.tsx
    │   ├── common/
    │   │   ├── types.ts
    │   │   └── utilities.ts
    │   ├── components/
    │   │   ├── ChatMessages.tsx
    │   │   ├── ChatSidebar.tsx
    │   │   ├── DocumentDetail.tsx
    │   │   ├── DocumentList.tsx
    │   │   ├── DocumentUploader.tsx
    │   │   ├── Footer.tsx
    │   │   └── Navigation.tsx
    │   ├── index.css
    │   ├── main.tsx
    │   ├── routes/
    │   │   ├── chat.tsx
    │   │   ├── documents.tsx
    │   │   └── layout.tsx
    │   └── vite-env.d.ts
    ├── tailwind.config.js
    ├── tsconfig.json
    ├── tsconfig.node.json
    └── vite.config.ts

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================

# Created by https://www.gitignore.io/api/osx,linux,python,windows,pycharm,visualstudiocode

### SAM ###
.aws-sam/
samconfig.toml

### Linux ###
*~

# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*

# KDE directory preferences
.directory

# Linux trash folder which might appear on any partition or disk
.Trash-*

# .nfs files are created when an open file is removed but is still being accessed
.nfs*

### OSX ###
*.DS_Store
.AppleDouble
.LSOverride

# Icon must end with two \r
Icon

# Thumbnails
._*

# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent

# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

### PyCharm ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff:
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/dictionaries

# Sensitive or high-churn files:
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.xml
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml

# Gradle:
.idea/**/gradle.xml
.idea/**/libraries

# CMake
cmake-build-debug/

# Mongo Explorer plugin:
.idea/**/mongoSettings.xml

## File-based project format:
*.iws

## Plugin-specific files:

# IntelliJ
/out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# Ruby plugin and RubyMine
/.rakeTasks

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

### PyCharm Patch ###
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721

# *.iml
# modules.xml
# .idea/misc.xml
# *.ipr

# Sonarlint plugin
.idea/sonarlint

### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
.pytest_cache/
nosetests.xml
coverage.xml
*.cover
.hypothesis/

# Translations
*.mo
*.pot

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule.*

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

### VisualStudioCode ###
.vscode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
.history

### Windows ###
# Windows thumbnail cache files
Thumbs.db
ehthumbs.db
ehthumbs_vista.db

# Folder config file
Desktop.ini

# Recycle Bin used on file shares
$RECYCLE.BIN/

# Windows Installer files
*.cab
*.msi
*.msm
*.msp

# Windows shortcuts
*.lnk

# Build folder

*/build/*

# End of https://www.gitignore.io/api/osx,linux,python,windows,pycharm,visualstudiocode

events/*.json
dependencies

================================================
FILE: CODE_OF_CONDUCT.md
================================================
## Code of Conduct
This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
opensource-codeofconduct@amazon.com with any additional questions or comments.


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing Guidelines

Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
documentation, we greatly value feedback and contributions from our community.

Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
information to effectively respond to your bug report or contribution.


## Reporting Bugs/Feature Requests

We welcome you to use the GitHub issue tracker to report bugs or suggest features.

When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:

* A reproducible test case or series of steps
* The version of our code being used
* Any modifications you've made relevant to the bug
* Anything unusual about your environment or deployment


## Contributing via Pull Requests
Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:

1. You are working against the latest source on the *main* branch.
2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
3. You open an issue to discuss any significant work - we would hate for your time to be wasted.

To send us a pull request, please:

1. Fork the repository.
2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
3. Ensure local tests pass.
4. Commit to your fork using clear commit messages.
5. Send us a pull request, answering any default questions in the pull request interface.
6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.

GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
[creating a pull request](https://help.github.com/articles/creating-a-pull-request/).


## Finding contributions to work on
Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.


## Code of Conduct
This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
opensource-codeofconduct@amazon.com with any additional questions or comments.


## Security issue notifications
If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.


## Licensing

See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.


================================================
FILE: LICENSE
================================================
MIT No Attribution

Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


================================================
FILE: README.md
================================================
# Serverless document chat application

This sample application allows you to ask natural language questions of any PDF document you upload. It combines the text generation and analysis capabilities of an LLM with a vector search of the document content. The solution uses serverless services such as [Amazon Bedrock](https://aws.amazon.com/bedrock/) to access foundational models, [AWS Lambda](https://aws.amazon.com/lambda/) to run [LangChain](https://github.com/langchain-ai/langchain), and [Amazon DynamoDB](https://aws.amazon.com/dynamodb/) for conversational memory.

See the [accompanying blog post on the AWS Serverless Blog](https://aws.amazon.com/blogs/compute/building-a-serverless-document-chat-with-aws-lambda-and-amazon-bedrock/) for a detailed description and follow the deployment instructions below to get started.

<p float="left">
  <img src="preview-1.png" width="49%" />
  <img src="preview-2.png" width="49%" />
</p>

> **Warning**
> This application is not ready for production use. It was written for demonstration and educational purposes. Review the [Security](#security) section of this README and consult with your security team before deploying this stack. No warranty is implied in this example.

> **Note**
> This architecture creates resources that have costs associated with them. Please see the [AWS Pricing](https://aws.amazon.com/pricing/) page for details and make sure to understand the costs before deploying this stack.

## Key features

- [Amazon Bedrock](https://aws.amazon.com/de/bedrock/) for serverless embedding and inference
- [LangChain](https://github.com/hwchase17/langchain) to orchestrate a Q&A LLM chain
- [FAISS](https://github.com/facebookresearch/faiss) vector store
- [Amazon DynamoDB](https://aws.amazon.com/dynamodb/) for serverless conversational memory
- [AWS Lambda](https://aws.amazon.com/lambda/) for serverless compute
- Frontend built in [React](https://react.dev/), [TypeScript](https://www.typescriptlang.org/), [TailwindCSS](https://tailwindcss.com/), and [Vite](https://vitejs.dev/).
- Run locally or deploy to [AWS Amplify Hosting](https://aws.amazon.com/amplify/hosting/)
- [Amazon Cognito](https://aws.amazon.com/cognito/) for authentication

## How the application works

![Serverless PDF Chat architecture](architecture.png "Serverless PDF Chat architecture")

1. A user uploads a PDF document into an [Amazon Simple Storage Service](https://aws.amazon.com/s3/) (S3) bucket through a static web application frontend.
1. This upload triggers a metadata extraction and document embedding process. The process converts the text in the document into vectors. The vectors are loaded into a vector index and stored in S3 for later use.
1. When a user chats with a PDF document and sends a prompt to the backend, a Lambda function retrieves the index from S3 and searches for information related to the prompt.
1. A LLM then uses the results of this vector search, previous messages in the conversation, and its general-purpose capabilities to formulate a response to the user.

## Deployment instructions

### Prerequisites

- [AWS SAM CLI](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/install-sam-cli.html)
- [Python](https://www.python.org/) 3.11 or greater

### Cloning the repository

Clone this repository:

```bash
git clone https://github.com/aws-samples/serverless-pdf-chat.git
```

### Amazon Bedrock setup

This application can be used with a variety of Amazon Bedrock models. See [Supported models in Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-service.html#models-supported) for a complete list.

By default, this application uses **Titan Embeddings G1 - Text** to generate embeddings and **Anthropic Claude v3 Sonnet** for responses.

> **Important -**
> Before you can use these models with this application, **you must request access in the Amazon Bedrock console**. See the [Model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) section of the Bedrock User Guide for detailed instructions.
> By default, this application is configured to use Amazon Bedrock in the `us-east-1` Region, make sure you request model access in that Region (this does not have to be the same Region that you deploy this stack to).

To select your Bedrock model, specify the `ModelId` parameter during the AWS SAM deployment, such as `anthropic.claude-3-sonnet-20240229-v1:0`. See [Amazon Bedrock model IDs](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html) for a complete list.

The `ModelId` parameter is used in the GenerateResponseFunction Lambda function of your AWS SAM template to instantiate [LangChain BedrockChat](https://js.langchain.com/v0.1/docs/integrations/chat/bedrock/) and [ConversationalRetrievalChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.conversational_retrieval.base.ConversationalRetrievalChain.html) objects, providing efficient retrieval of relevant context from large PDF datasets to enable the Bedrock model-generated response.

```python
def bedrock_chain(faiss_index, memory, human_input, bedrock_runtime):

    chat = BedrockChat(
        model_id=MODEL_ID,
        model_kwargs={'temperature': 0.0}
    )

    chain = ConversationalRetrievalChain.from_llm(
        llm=chat,
        chain_type="stuff",
        retriever=faiss_index.as_retriever(),
        memory=memory,
        return_source_documents=True,
    )

    response = chain.invoke({"question": human_input})

    return response
```

### Deploy the frontend with AWS Amplify Hosting

[AWS Amplify Hosting](https://aws.amazon.com/amplify/hosting/) enables a fully-managed deployment of the application's React frontend in an AWS-managed account using Amazon S3 and [Amazon CloudFront](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Introduction.html). You can optionally run the React frontend locally by skipping to [Deploy the application with AWS SAM](#Deploy-the-application-with-AWS-SAM).

To set up Amplify Hosting:

1. Fork this GitHub repository and take note of your repository URL, for example `https://github.com/user/serverless-pdf-chat/`.
1. Create a GitHub fine-grained access token for the new repository by following [this guide](https://docs.aws.amazon.com/amplify/latest/userguide/setting-up-GitHub-access.html). For the **Repository permissions**, select **Read and write** for **Content** and **Webhooks**.
1. Create a new secret called `serverless-pdf-chat-github-token` in AWS Secrets Manager and input your fine-grained access token as plaintext. Select the **Plaintext** tab and confirm your secret looks like this:

   ```json
   github_pat_T2wyo------------------------------------------------------------------------rs0Pp
   ```

### Deploy the application with AWS SAM

1. Change to the `backend` directory and [build](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-cli-command-reference-sam-build.html) the application:

   ```bash
   cd backend
   sam build
   ```

1. [Deploy](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-cli-command-reference-sam-deploy.html) the application into your AWS account:

   ```bash
   sam deploy --guided
   ```

1. For **Stack Name**, choose `serverless-pdf-chat`.

1. For **Frontend**, specify the environment ("local", "amplify") for the frontend of the application.

1. If you selected "amplify", specify the URL of the forked Git repository containing the application code.

1. Specify the Amazon Bedrock model ID. For example, `anthropic.claude-3-sonnet-20240229-v1:0`.

1. For the remaining options, keep the defaults by pressing the enter key.

AWS SAM will now provision the AWS resources defined in the `backend/template.yaml` template. Once the deployment is completed successfully, you will see a set of output values similar to the following:

```bash
CloudFormation outputs from deployed stack
-------------------------------------------------------------------------------
Outputs
-------------------------------------------------------------------------------
Key                 CognitoUserPool
Description         -
Value               us-east-1_gxKtRocFs

Key                 CognitoUserPoolClient
Description         -
Value               874ghcej99f8iuo0lgdpbrmi76k

Key                 ApiGatewayBaseUrl
Description         -
Value               https://abcd1234.execute-api.us-east-1.amazonaws.com/dev/
-------------------------------------------------------------------------------
```

If you selected to deploy the React frontend using Amplify Hosting, navigate to the Amplify console to check the build status. If the build does not start automatically, trigger it through the Amplify console.

If you selected to run the React frontend locally and connect to the deployed resources in AWS, you will use the CloudFormation stack outputs in the following section.

### Optional: Run the React frontend locally

Create a file named `.env.development` in the `frontend` directory. [Vite will use this file](https://vitejs.dev/guide/env-and-mode.html) to set up environment variables when we run the application locally.

Copy the following file content and replace the values with the outputs provided by AWS SAM:

```plaintext
VITE_REGION=us-east-1
VITE_API_ENDPOINT=https://abcd1234.execute-api.us-east-1.amazonaws.com/dev/
VITE_USER_POOL_ID=us-east-1_gxKtRocFs
VITE_USER_POOL_CLIENT_ID=874ghcej99f8iuo0lgdpbrmi76k
```

Next, install the frontend's dependencies by running the following command in the `frontend` directory:

```bash
npm ci
```

Finally, to start the application locally, run the following command in the `frontend` directory:

```bash
npm run dev
```

Vite will now start the application under `http://localhost:5173`.

### Create a user in the Amazon Cognito user pool

The application uses Amazon Cognito to authenticate users through a login screen. In this step, you will create a user to access the application.

Perform the following steps to create a user in the Cognito user pool:

1. Navigate to the **Amazon Cognito console**.
1. Find the user pool with an ID matching the output provided by AWS SAM above.
1. Under Users, choose **Create user**.
1. Enter an email address and a password that adheres to the password requirements.
1. Choose **Create user**.

Navigate back to your Amplify website URL or local host address to log in with the new user's credentials.

## Cleanup

1. Delete any secrets in AWS Secrets Manager created as part of this walkthrough.
1. [Empty the Amazon S3 bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/empty-bucket.html) created as part of the AWS SAM template.
1. Run the following command in the `backend` directory of the project to delete all associated resources resources:

   ```bash
   sam delete
   ```
## Troubleshooting

If you are experiencing issues when running the [`sam build`](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-cli-command-reference-sam-build.html) command, try setting the `--use-container` flag (requires Docker):

```bash
sam build --use-container
```

If you are still experiencing issues despite using `--use-container`, try switching the AWS Lambda functions from `arm64` to `x86_64` in the `backend/template.yaml` (as well as switching to the `x_86_64` version of Powertools):

```yaml
Globals:
  Function:
    Runtime: python3.11
    Handler: main.lambda_handler
    Architectures:
      - x86_64
    Tracing: Active
    Environment:
      Variables:
        LOG_LEVEL: INFO
    Layers:
      - !Sub arn:aws:lambda:${AWS::Region}:017000801446:layer:AWSLambdaPowertoolsPythonV2:51
```

## Security

This application was written for demonstration and educational purposes and not for production use. The [Security Pillar of the AWS Well-Architected Framework](https://docs.aws.amazon.com/wellarchitected/latest/security-pillar/welcome.html) can support you in further adopting the sample into a production deployment in addition to your own established processes. Take note of the following:

- The application uses encryption in transit and at rest with AWS-managed keys where applicable. Optionally, use [AWS KMS](https://aws.amazon.com/kms/) with [DynamoDB](https://docs.aws.amazon.com/kms/latest/developerguide/services-dynamodb.html), [SQS](https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-server-side-encryption.html), and [S3](https://docs.aws.amazon.com/kms/latest/developerguide/services-s3.html) for more control over encryption keys.

- This application uses [Powertools for AWS Lambda (Python)](https://github.com/aws-powertools/powertools-lambda-python) to log to inputs and ouputs to CloudWatch Logs. Per default, this can include sensitive data contained in user input. Adjust the log level and remove log statements to fit your security requirements.

- [API Gateway access logging](https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-logging.html#set-up-access-logging-using-console) and [usage plans](https://docs.aws.amazon.com/apigateway/latest/developerguide/api-gateway-api-usage-plans.html) are not activiated in this code sample. Similarly, [S3 access logging](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-s3-bucket-loggingconfig.html) is currently not enabled.

- In order to simplify the setup of the demo, this solution uses AWS managed policies associated to IAM roles that contain wildcards on resources. Please consider to further scope down the policies as you see fit according to your needs. Please note that there is a resource wildcard on the AWS managed `AWSLambdaSQSQueueExecutionRole`. This is a known behaviour, see [this GitHub issue](https://github.com/aws/serverless-application-model/issues/2118) for details.

- If your security controls require inspecting network traffic, consider [adjusting the AWS SAM template](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-resource-function.html) to attach the Lambda functions to a VPC via its [`VpcConfig`](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-lambda-function-vpcconfig.html).

See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.

## License

This library is licensed under the MIT-0 License. See the [LICENSE](LICENSE) file.


================================================
FILE: backend/.gitignore
================================================

# Created by https://www.gitignore.io/api/osx,linux,python,windows,pycharm,visualstudiocode

### SAM ###
.aws-sam/
samconfig.toml

### Linux ###
*~

# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*

# KDE directory preferences
.directory

# Linux trash folder which might appear on any partition or disk
.Trash-*

# .nfs files are created when an open file is removed but is still being accessed
.nfs*

### OSX ###
*.DS_Store
.AppleDouble
.LSOverride

# Icon must end with two \r
Icon

# Thumbnails
._*

# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent

# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

### PyCharm ###
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff:
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/dictionaries

# Sensitive or high-churn files:
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.xml
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml

# Gradle:
.idea/**/gradle.xml
.idea/**/libraries

# CMake
cmake-build-debug/

# Mongo Explorer plugin:
.idea/**/mongoSettings.xml

## File-based project format:
*.iws

## Plugin-specific files:

# IntelliJ
/out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# Ruby plugin and RubyMine
/.rakeTasks

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

### PyCharm Patch ###
# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721

# *.iml
# modules.xml
# .idea/misc.xml
# *.ipr

# Sonarlint plugin
.idea/sonarlint

### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
.pytest_cache/
nosetests.xml
coverage.xml
*.cover
.hypothesis/

# Translations
*.mo
*.pot

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule.*

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/

### VisualStudioCode ###
.vscode
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
.history

### Windows ###
# Windows thumbnail cache files
Thumbs.db
ehthumbs.db
ehthumbs_vista.db

# Folder config file
Desktop.ini

# Recycle Bin used on file shares
$RECYCLE.BIN/

# Windows Installer files
*.cab
*.msi
*.msm
*.msp

# Windows shortcuts
*.lnk

# Build folder

*/build/*

# End of https://www.gitignore.io/api/osx,linux,python,windows,pycharm,visualstudiocode

================================================
FILE: backend/__init__.py
================================================


================================================
FILE: backend/src/add_conversation/__init__.py
================================================


================================================
FILE: backend/src/add_conversation/main.py
================================================
import os, json
from datetime import datetime
import boto3
import shortuuid
from aws_lambda_powertools import Logger

DOCUMENT_TABLE = os.environ["DOCUMENT_TABLE"]
MEMORY_TABLE = os.environ["MEMORY_TABLE"]


ddb = boto3.resource("dynamodb")
document_table = ddb.Table(DOCUMENT_TABLE)
memory_table = ddb.Table(MEMORY_TABLE)
logger = Logger()


@logger.inject_lambda_context(log_event=True)
def lambda_handler(event, context):
    user_id = event["requestContext"]["authorizer"]["claims"]["sub"]
    document_id = event["pathParameters"]["documentid"]

    response = document_table.get_item(
        Key={"userid": user_id, "documentid": document_id}
    )
    conversations = response["Item"]["conversations"]
    logger.info({"conversations": conversations})

    conversation_id = shortuuid.uuid()
    timestamp = datetime.utcnow()
    timestamp_str = timestamp.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
    conversation = {
        "conversationid": conversation_id,
        "created": timestamp_str,
    }
    conversations.append(conversation)
    logger.info({"conversation_new": conversation})
    document_table.update_item(
        Key={"userid": user_id, "documentid": document_id},
        UpdateExpression="SET conversations = :conversations",
        ExpressionAttributeValues={":conversations": conversations},
    )

    conversation = {"userid": user_id, "SessionId": conversation_id, "History": []}
    memory_table.put_item(Item=conversation)

    return {
        "statusCode": 200,
        "headers": {
            "Content-Type": "application/json",
            "Access-Control-Allow-Headers": "*",
            "Access-Control-Allow-Origin": "*",
            "Access-Control-Allow-Methods": "*",
        },
        "body": json.dumps({"conversationid": conversation_id}),
    }


================================================
FILE: backend/src/add_conversation/requirements.txt
================================================
shortuuid==1.0.11

================================================
FILE: backend/src/delete_document/__init__.py
================================================


================================================
FILE: backend/src/delete_document/main.py
================================================
import os, json
import boto3
from aws_lambda_powertools import Logger


DOCUMENT_TABLE = os.environ["DOCUMENT_TABLE"]
MEMORY_TABLE = os.environ["MEMORY_TABLE"]
BUCKET = os.environ["BUCKET"]

ddb = boto3.resource("dynamodb")
document_table = ddb.Table(DOCUMENT_TABLE)
memory_table = ddb.Table(MEMORY_TABLE)
s3 = boto3.client("s3")
logger = Logger()


@logger.inject_lambda_context(log_event=True)
def lambda_handler(event, context):
    user_id = event["requestContext"]["authorizer"]["claims"]["sub"]
    document_id = event["pathParameters"]["documentid"]

    response = document_table.get_item(
        Key={"userid": user_id, "documentid": document_id}
    )
    document = response["Item"]
    logger.info({"document": document})
    logger.info("Deleting DDB items")
    with memory_table.batch_writer() as batch:
        for item in document["conversations"]:
            batch.delete_item(Key={"userid": user_id, "SessionId": item["conversationid"]})

    document_table.delete_item(
        Key={"userid": user_id, "documentid": document_id}
    )

    logger.info("Deleting S3 objects")
    filename = document["filename"]
    objects = [{"Key": f"{user_id}/{filename}/{key}"} for key in [filename, "index.faiss", "index.pkl"]]
    response = s3.delete_objects(
        Bucket=BUCKET,
        Delete={
            "Objects": objects,
            "Quiet": True,
        },
    )
    logger.info({"Response": response})

    return {
        "statusCode": 200,
        "headers": {
            "Content-Type": "application/json",
            "Access-Control-Allow-Headers": "*",
            "Access-Control-Allow-Origin": "*",
            "Access-Control-Allow-Methods": "*",
        },
        "body": json.dumps(
            {},
            default=str,
        ),
    }


================================================
FILE: backend/src/delete_document/requirements.txt
================================================
boto3==1.28.57
botocore==1.31.57


================================================
FILE: backend/src/generate_embeddings/__init__.py
================================================


================================================
FILE: backend/src/generate_embeddings/main.py
================================================
import os, json
import boto3
from aws_lambda_powertools import Logger
from langchain.indexes import VectorstoreIndexCreator
from langchain_aws.embeddings import BedrockEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS


DOCUMENT_TABLE = os.environ["DOCUMENT_TABLE"]
BUCKET = os.environ["BUCKET"]
EMBEDDING_MODEL_ID = os.environ["EMBEDDING_MODEL_ID"]

s3 = boto3.client("s3")
ddb = boto3.resource("dynamodb")
document_table = ddb.Table(DOCUMENT_TABLE)
logger = Logger()


def set_doc_status(user_id, document_id, status):
    document_table.update_item(
        Key={"userid": user_id, "documentid": document_id},
        UpdateExpression="SET docstatus = :docstatus",
        ExpressionAttributeValues={":docstatus": status},
    )


@logger.inject_lambda_context(log_event=True)
def lambda_handler(event, context):
    event_body = json.loads(event["Records"][0]["body"])
    document_id = event_body["documentid"]
    user_id = event_body["user"]
    key = event_body["key"]
    file_name_full = key.split("/")[-1]

    set_doc_status(user_id, document_id, "PROCESSING")

    s3.download_file(BUCKET, key, f"/tmp/{file_name_full}")

    loader = PyPDFLoader(f"/tmp/{file_name_full}")

    bedrock_runtime = boto3.client(
        service_name="bedrock-runtime",
        region_name="us-east-1",
    )

    embeddings = BedrockEmbeddings(
        model_id=EMBEDDING_MODEL_ID,
        client=bedrock_runtime,
        region_name="us-east-1",
    )

    index_creator = VectorstoreIndexCreator(
        vectorstore_cls=FAISS,
        embedding=embeddings,
    )

    index_from_loader = index_creator.from_loaders([loader])

    index_from_loader.vectorstore.save_local("/tmp")

    s3.upload_file(
        "/tmp/index.faiss", BUCKET, f"{user_id}/{file_name_full}/index.faiss"
    )
    s3.upload_file("/tmp/index.pkl", BUCKET, f"{user_id}/{file_name_full}/index.pkl")

    set_doc_status(user_id, document_id, "READY")


================================================
FILE: backend/src/generate_embeddings/requirements.txt
================================================
boto3
botocore
faiss-cpu==1.7.4
langchain==0.3.21
langchain-community==0.3.27
langchain-aws==0.2.17
pypdf==3.17.0
urllib3

================================================
FILE: backend/src/generate_presigned_url/__init__.py
================================================


================================================
FILE: backend/src/generate_presigned_url/main.py
================================================
import os, json
import boto3
from botocore.config import Config
import shortuuid
from aws_lambda_powertools import Logger


BUCKET = os.environ["BUCKET"]
REGION = os.environ["REGION"]


s3 = boto3.client(
    "s3",
    endpoint_url=f"https://s3.{REGION}.amazonaws.com",
    config=Config(
        s3={"addressing_style": "virtual"}, region_name=REGION, signature_version="s3v4"
    ),
)
logger = Logger()


def s3_key_exists(bucket, key):
    try:
        s3.head_object(Bucket=bucket, Key=key)
        return True
    except:
        return False


@logger.inject_lambda_context(log_event=True)
def lambda_handler(event, context):
    user_id = event["requestContext"]["authorizer"]["claims"]["sub"]
    file_name_full = event["queryStringParameters"]["file_name"]
    file_name = file_name_full.split(".pdf")[0]

    exists = s3_key_exists(BUCKET, f"{user_id}/{file_name_full}/{file_name_full}")

    logger.info(
        {
            "user_id": user_id,
            "file_name_full": file_name_full,
            "file_name": file_name,
            "exists": exists,
        }
    )

    if exists:
        suffix = shortuuid.ShortUUID().random(length=4)
        key = f"{user_id}/{file_name}-{suffix}.pdf/{file_name}-{suffix}.pdf"
    else:
        key = f"{user_id}/{file_name}.pdf/{file_name}.pdf"

    presigned_url = s3.generate_presigned_url(
        ClientMethod="put_object",
        Params={
            "Bucket": BUCKET,
            "Key": key,
            "ContentType": "application/pdf",
        },
        ExpiresIn=300,
        HttpMethod="PUT",
    )

    return {
        "statusCode": 200,
        "headers": {
            "Content-Type": "application/json",
            "Access-Control-Allow-Headers": "*",
            "Access-Control-Allow-Origin": "*",
            "Access-Control-Allow-Methods": "*",
        },
        "body": json.dumps({"presignedurl": presigned_url}),
    }


================================================
FILE: backend/src/generate_presigned_url/requirements.txt
================================================
boto3==1.28.57
botocore==1.31.57
shortuuid==1.0.11

================================================
FILE: backend/src/generate_response/__init__.py
================================================


================================================
FILE: backend/src/generate_response/main.py
================================================
import os
import json
import boto3
from aws_lambda_powertools import Logger
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_community.chat_message_histories import DynamoDBChatMessageHistory
from langchain_community.vectorstores import FAISS
from langchain_aws.chat_models import ChatBedrock
from langchain_aws.embeddings import BedrockEmbeddings


MEMORY_TABLE = os.environ["MEMORY_TABLE"]
BUCKET = os.environ["BUCKET"]
MODEL_ID = os.environ["MODEL_ID"]
EMBEDDING_MODEL_ID = os.environ["EMBEDDING_MODEL_ID"]

s3 = boto3.client("s3")
logger = Logger()


def get_embeddings():
    bedrock_runtime = boto3.client(
        service_name="bedrock-runtime",
        region_name="us-east-1",
    )

    embeddings = BedrockEmbeddings(
        model_id=EMBEDDING_MODEL_ID,
        client=bedrock_runtime,
        region_name="us-east-1",
    )
    return embeddings

def get_faiss_index(embeddings, user, file_name):
    s3.download_file(BUCKET, f"{user}/{file_name}/index.faiss", "/tmp/index.faiss")
    s3.download_file(BUCKET, f"{user}/{file_name}/index.pkl", "/tmp/index.pkl")
    faiss_index = FAISS.load_local("/tmp", embeddings, allow_dangerous_deserialization=True)
    return faiss_index

def create_memory(user_id, conversation_id):
    message_history = DynamoDBChatMessageHistory(
        table_name=MEMORY_TABLE, session_id=conversation_id, key={"userid": user_id, "SessionId":conversation_id}
    )

    memory = ConversationBufferMemory(
        memory_key="chat_history",
        chat_memory=message_history,
        input_key="question",
        output_key="answer",
        return_messages=True,
    )
    return memory

def bedrock_chain(faiss_index, memory, human_input, bedrock_runtime):

    chat = ChatBedrock(
        model_id=MODEL_ID,
        model_kwargs={'temperature': 0.0}
    )

    chain = ConversationalRetrievalChain.from_llm(
        llm=chat,
        chain_type="stuff",
        retriever=faiss_index.as_retriever(),
        memory=memory,
        return_source_documents=True,
    )

    response = chain.invoke({"question": human_input})

    return response

@logger.inject_lambda_context(log_event=True)
def lambda_handler(event, context):
    event_body = json.loads(event["body"])
    file_name = event_body["fileName"]
    human_input = event_body["prompt"]
    conversation_id = event["pathParameters"]["conversationid"]
    user = event["requestContext"]["authorizer"]["claims"]["sub"]

    embeddings = get_embeddings()
    faiss_index = get_faiss_index(embeddings, user, file_name)
    memory = create_memory(user, conversation_id)
    bedrock_runtime = boto3.client(
        service_name="bedrock-runtime",
        region_name="us-east-1",
    )

    response = bedrock_chain(faiss_index, memory, human_input, bedrock_runtime)
    if response:
        print(f"{MODEL_ID} -\nPrompt: {human_input}\n\nResponse: {response['answer']}")
    else:
        raise ValueError(f"Unsupported model ID: {MODEL_ID}")

    logger.info(str(response['answer']))

    return {
        "statusCode": 200,
        "headers": {
            "Content-Type": "application/json",
            "Access-Control-Allow-Headers": "*",
            "Access-Control-Allow-Origin": "*",
            "Access-Control-Allow-Methods": "*",
        },
        "body": json.dumps(response['answer']),
    }

================================================
FILE: backend/src/generate_response/requirements.txt
================================================
boto3
botocore
faiss-cpu==1.7.4
langchain==0.3.21
langchain-community==0.3.27
langchain-aws==0.2.17
urllib3

================================================
FILE: backend/src/get_all_documents/__init__.py
================================================


================================================
FILE: backend/src/get_all_documents/main.py
================================================
import os, json
import boto3
from boto3.dynamodb.conditions import Key
from aws_lambda_powertools import Logger


DOCUMENT_TABLE = os.environ["DOCUMENT_TABLE"]


ddb = boto3.resource("dynamodb")
document_table = ddb.Table(DOCUMENT_TABLE)
logger = Logger()


@logger.inject_lambda_context(log_event=True)
def lambda_handler(event, context):
    user_id = event["requestContext"]["authorizer"]["claims"]["sub"]

    response = document_table.query(KeyConditionExpression=Key("userid").eq(user_id))
    items = sorted(response["Items"], key=lambda item: item["created"], reverse=True)
    for item in items:
        item["conversations"] = sorted(
            item["conversations"], key=lambda conv: conv["created"], reverse=True
        )
    logger.info({"items": items})

    return {
        "statusCode": 200,
        "headers": {
            "Content-Type": "application/json",
            "Access-Control-Allow-Headers": "*",
            "Access-Control-Allow-Origin": "*",
            "Access-Control-Allow-Methods": "*",
        },
        "body": json.dumps(items, default=str),
    }


================================================
FILE: backend/src/get_document/__init__.py
================================================


================================================
FILE: backend/src/get_document/main.py
================================================
import os, json
import boto3
from boto3.dynamodb.conditions import Key
from aws_lambda_powertools import Logger


DOCUMENT_TABLE = os.environ["DOCUMENT_TABLE"]
MEMORY_TABLE = os.environ["MEMORY_TABLE"]


ddb = boto3.resource("dynamodb")
document_table = ddb.Table(DOCUMENT_TABLE)
memory_table = ddb.Table(MEMORY_TABLE)
logger = Logger()


@logger.inject_lambda_context(log_event=True)
def lambda_handler(event, context):
    user_id = event["requestContext"]["authorizer"]["claims"]["sub"]
    document_id = event["pathParameters"]["documentid"]
    conversation_id = event["pathParameters"]["conversationid"]

    response = document_table.get_item(
        Key={"userid": user_id, "documentid": document_id}
    )
    document = response["Item"]
    document["conversations"] = sorted(
        document["conversations"], key=lambda conv: conv["created"], reverse=True
    )
    logger.info({"document": document})

    response = memory_table.get_item(Key={"userid": user_id, "SessionId": conversation_id})
    if not "Item" in response:
        return {
            "statusCode": 403
        }
    messages = response["Item"]["History"]
    logger.info({"messages": messages})

    return {
        "statusCode": 200,
        "headers": {
            "Content-Type": "application/json",
            "Access-Control-Allow-Headers": "*",
            "Access-Control-Allow-Origin": "*",
            "Access-Control-Allow-Methods": "*",
        },
        "body": json.dumps(
            {
                "conversationid": conversation_id,
                "document": document,
                "messages": messages,
            },
            default=str,
        ),
    }


================================================
FILE: backend/src/upload_trigger/__init__.py
================================================


================================================
FILE: backend/src/upload_trigger/main.py
================================================
import os, json
from datetime import datetime
import boto3
import PyPDF2
import shortuuid
import urllib
from aws_lambda_powertools import Logger

DOCUMENT_TABLE = os.environ["DOCUMENT_TABLE"]
MEMORY_TABLE = os.environ["MEMORY_TABLE"]
QUEUE = os.environ["QUEUE"]
BUCKET = os.environ["BUCKET"]


ddb = boto3.resource("dynamodb")
document_table = ddb.Table(DOCUMENT_TABLE)
memory_table = ddb.Table(MEMORY_TABLE)
sqs = boto3.client("sqs")
s3 = boto3.client("s3")
logger = Logger()


@logger.inject_lambda_context(log_event=True)
def lambda_handler(event, context):
    key = urllib.parse.unquote_plus(event["Records"][0]["s3"]["object"]["key"])
    split = key.split("/")
    user_id = split[0]
    file_name = split[1]

    document_id = shortuuid.uuid()

    s3.download_file(BUCKET, key, f"/tmp/{file_name}")

    with open(f"/tmp/{file_name}", "rb") as f:
        reader = PyPDF2.PdfReader(f)
        pages = str(len(reader.pages))

    conversation_id = shortuuid.uuid()

    timestamp = datetime.utcnow()
    timestamp_str = timestamp.strftime("%Y-%m-%dT%H:%M:%S.%fZ")

    document = {
        "userid": user_id,
        "documentid": document_id,
        "filename": file_name,
        "created": timestamp_str,
        "pages": pages,
        "filesize": str(event["Records"][0]["s3"]["object"]["size"]),
        "docstatus": "UPLOADED",
        "conversations": [],
    }

    conversation = {"conversationid": conversation_id, "created": timestamp_str}
    document["conversations"].append(conversation)

    document_table.put_item(Item=document)

    conversation = {"userid": user_id, "SessionId": conversation_id, "History": []}
    memory_table.put_item(Item=conversation)

    message = {
        "documentid": document_id,
        "key": key,
        "user": user_id,
    }
    sqs.send_message(QueueUrl=QUEUE, MessageBody=json.dumps(message))


================================================
FILE: backend/src/upload_trigger/requirements.txt
================================================
boto3==1.28.57
botocore==1.31.57
PyPDF2==3.0.1
shortuuid==1.0.11


================================================
FILE: backend/template.yaml
================================================
AWSTemplateFormatVersion: "2010-09-09"
Transform: AWS::Serverless-2016-10-31
Description: >
  serverless-pdf-chat

  SAM Template for serverless-pdf-chat

Globals:
  Function:
    Runtime: python3.11
    Handler: main.lambda_handler
    Architectures:
      - arm64
    Tracing: Active
    Environment:
      Variables:
        LOG_LEVEL: INFO
    Layers:
      - !Sub arn:aws:lambda:${AWS::Region}:017000801446:layer:AWSLambdaPowertoolsPythonV3-python311-arm64:7

Parameters:
  Frontend:
    Default: amplify
    Type: String
    AllowedValues:
      - local
      - amplify
  Repository:
    Type: String
  ModelId:
    Default: "anthropic.claude-3-sonnet-20240229-v1:0"
    Type: String
  EmbeddingModelId:
    Default: "amazon.titan-embed-text-v2:0"
    Type: String

Conditions:
  DeployToAmplifyHosting: !Equals
    - !Ref Frontend
    - amplify

Resources:
  DocumentBucket:
    Type: "AWS::S3::Bucket"
    Properties:
      BucketName: !Sub "${AWS::StackName}-${AWS::Region}-${AWS::AccountId}"
      CorsConfiguration:
        CorsRules:
          - AllowedHeaders:
              - "*"
            AllowedMethods:
              - GET
              - PUT
              - HEAD
              - POST
              - DELETE
            AllowedOrigins:
              - "*"
      PublicAccessBlockConfiguration:
        BlockPublicAcls: true
        BlockPublicPolicy: true
        IgnorePublicAcls: true
        RestrictPublicBuckets: true

  DocumentBucketPolicy:
    Type: "AWS::S3::BucketPolicy"
    Properties:
      PolicyDocument:
        Id: EnforceHttpsPolicy
        Version: "2012-10-17"
        Statement:
          - Sid: EnforceHttpsSid
            Effect: Deny
            Principal: "*"
            Action: "s3:*"
            Resource:
              - !Sub "arn:aws:s3:::${DocumentBucket}/*"
              - !Sub "arn:aws:s3:::${DocumentBucket}"
            Condition:
              Bool:
                "aws:SecureTransport": "false"
      Bucket: !Ref DocumentBucket

  EmbeddingQueue:
    Type: AWS::SQS::Queue
    DeletionPolicy: Delete
    UpdateReplacePolicy: Delete
    Properties:
      VisibilityTimeout: 180
      MessageRetentionPeriod: 3600

  EmbeddingQueuePolicy:
    Type: AWS::SQS::QueuePolicy
    Properties:
      Queues:
        - !Ref EmbeddingQueue
      PolicyDocument:
        Version: "2012-10-17"
        Id: SecureTransportPolicy
        Statement:
          - Sid: AllowSecureTransportOnly
            Effect: Deny
            Principal: "*"
            Action: "SQS:*"
            Resource: "*"
            Condition:
              Bool:
                aws:SecureTransport: false

  DocumentTable:
    Type: AWS::DynamoDB::Table
    DeletionPolicy: Delete
    UpdateReplacePolicy: Delete
    Properties:
      KeySchema:
        - AttributeName: userid
          KeyType: HASH
        - AttributeName: documentid
          KeyType: RANGE
      AttributeDefinitions:
        - AttributeName: userid
          AttributeType: S
        - AttributeName: documentid
          AttributeType: S
      BillingMode: PAY_PER_REQUEST

  MemoryTable:
    Type: AWS::DynamoDB::Table
    DeletionPolicy: Delete
    UpdateReplacePolicy: Delete
    Properties:
      KeySchema:
        - AttributeName: userid
          KeyType: HASH      
        - AttributeName: SessionId
          KeyType: RANGE
      AttributeDefinitions:
        - AttributeName: userid
          AttributeType: S
        - AttributeName: SessionId
          AttributeType: S
      BillingMode: PAY_PER_REQUEST

  CognitoUserPool:
    Type: AWS::Cognito::UserPool
    DeletionPolicy: Delete
    UpdateReplacePolicy: Delete
    Properties:
      AutoVerifiedAttributes:
        - email
      UsernameAttributes:
        - email
      AdminCreateUserConfig:
        AllowAdminCreateUserOnly: true
      Policies:
        PasswordPolicy:
          MinimumLength: 8
          RequireLowercase: true
          RequireNumbers: true
          RequireSymbols: true
          RequireUppercase: true

  CognitoUserPoolClient:
    Type: AWS::Cognito::UserPoolClient
    Properties:
      UserPoolId: !Ref CognitoUserPool
      ClientName: !Ref CognitoUserPool
      GenerateSecret: false

  Api:
    Type: AWS::Serverless::Api
    Properties:
      StageName: dev
      Auth:
        DefaultAuthorizer: CognitoAuthorizer
        AddDefaultAuthorizerToCorsPreflight: false
        Authorizers:
          CognitoAuthorizer:
            UserPoolArn: !GetAtt CognitoUserPool.Arn
      Cors:
        AllowOrigin: "'*'"
        AllowHeaders: "'*'"
        AllowMethods: "'*'"

  GeneratePresignedUrlFunction:
    Type: AWS::Serverless::Function
    Properties:
      CodeUri: src/generate_presigned_url/
      Policies:
        - S3CrudPolicy:
            BucketName: !Ref DocumentBucket
      Environment:
        Variables:
          BUCKET: !Ref DocumentBucket
          REGION: !Sub ${AWS::Region}
      Events:
        Root:
          Type: Api
          Properties:
            RestApiId: !Ref Api
            Path: /generate_presigned_url
            Method: GET

  UploadTriggerFunction:
    Type: AWS::Serverless::Function
    Properties:
      CodeUri: src/upload_trigger/
      Policies:
        - DynamoDBCrudPolicy:
            TableName: !Ref DocumentTable
        - DynamoDBCrudPolicy:
            TableName: !Ref MemoryTable
        - S3ReadPolicy:
            BucketName: !Sub "${AWS::StackName}-${AWS::Region}-${AWS::AccountId}*"
        - SQSSendMessagePolicy:
            QueueName: !GetAtt EmbeddingQueue.QueueName
      Environment:
        Variables:
          DOCUMENT_TABLE: !Ref DocumentTable
          MEMORY_TABLE: !Ref MemoryTable
          QUEUE: !GetAtt EmbeddingQueue.QueueName
          BUCKET: !Sub "${AWS::StackName}-${AWS::Region}-${AWS::AccountId}"
      Events:
        S3Event:
          Type: S3
          Properties:
            Bucket: !Ref DocumentBucket
            Events:
              - s3:ObjectCreated:*
            Filter:
              S3Key:
                Rules:
                  - Name: suffix
                    Value: .pdf

  GetDocumentFunction:
    Type: AWS::Serverless::Function
    Properties:
      CodeUri: src/get_document/
      Policies:
        - DynamoDBReadPolicy:
            TableName: !Ref DocumentTable
        - DynamoDBReadPolicy:
            TableName: !Ref MemoryTable
      Environment:
        Variables:
          DOCUMENT_TABLE: !Ref DocumentTable
          MEMORY_TABLE: !Ref MemoryTable
      Events:
        Root:
          Type: Api
          Properties:
            RestApiId: !Ref Api
            Path: /doc/{documentid}/{conversationid}
            Method: GET

  GetAllDocuments:
    Type: AWS::Serverless::Function
    Properties:
      CodeUri: src/get_all_documents/
      Policies:
        - DynamoDBReadPolicy:
            TableName: !Ref DocumentTable
      Environment:
        Variables:
          DOCUMENT_TABLE: !Ref DocumentTable
      Events:
        Root:
          Type: Api
          Properties:
            RestApiId: !Ref Api
            Path: /doc
            Method: GET

  AddConversationFunction:
    Type: AWS::Serverless::Function
    Properties:
      CodeUri: src/add_conversation/
      Policies:
        - DynamoDBCrudPolicy:
            TableName: !Ref DocumentTable
        - DynamoDBCrudPolicy:
            TableName: !Ref MemoryTable
      Environment:
        Variables:
          DOCUMENT_TABLE: !Ref DocumentTable
          MEMORY_TABLE: !Ref MemoryTable
      Events:
        Root:
          Type: Api
          Properties:
            RestApiId: !Ref Api
            Path: /doc/{documentid}
            Method: POST

  GenerateEmbeddingsFunction:
    Type: AWS::Serverless::Function
    Properties:
      CodeUri: src/generate_embeddings/
      Timeout: 180
      MemorySize: 2048
      Policies:
        - SQSPollerPolicy:
            QueueName: !GetAtt EmbeddingQueue.QueueName
        - S3CrudPolicy:
            BucketName: !Ref DocumentBucket
        - DynamoDBCrudPolicy:
            TableName: !Ref DocumentTable
        - Statement:
            - Sid: "BedrockScopedAccess"
              Effect: "Allow"
              Action: "bedrock:InvokeModel"
              Resource: !Sub "arn:aws:bedrock:*::foundation-model/${EmbeddingModelId}"
      Environment:
        Variables:
          DOCUMENT_TABLE: !Ref DocumentTable
          BUCKET: !Ref DocumentBucket
          EMBEDDING_MODEL_ID: !Ref EmbeddingModelId
      Events:
        EmbeddingQueueEvent:
          Type: SQS
          Properties:
            Queue: !GetAtt EmbeddingQueue.Arn
            BatchSize: 1

  GenerateResponseFunction:
    Type: AWS::Serverless::Function
    Properties:
      CodeUri: src/generate_response/
      Timeout: 30
      MemorySize: 2048
      Policies:
        - DynamoDBCrudPolicy:
            TableName: !Ref MemoryTable
        - S3CrudPolicy:
            BucketName: !Ref DocumentBucket
        - Statement:
            - Sid: "BedrockScopedAccess"
              Effect: "Allow"
              Action: "bedrock:InvokeModel"
              Resource:
                - !Sub "arn:aws:bedrock:*::foundation-model/${ModelId}"
                - !Sub "arn:aws:bedrock:*::foundation-model/${EmbeddingModelId}"
      Environment:
        Variables:
          MEMORY_TABLE: !Ref MemoryTable
          BUCKET: !Ref DocumentBucket
          MODEL_ID: !Ref ModelId
          EMBEDDING_MODEL_ID: !Ref EmbeddingModelId
      Events:
        Root:
          Type: Api
          Properties:
            RestApiId: !Ref Api
            Path: /{documentid}/{conversationid}
            Method: POST

  DeleteDocumentFunction:
    Type: AWS::Serverless::Function
    Properties:
      CodeUri: src/delete_document/
      Policies:
        - DynamoDBCrudPolicy:
            TableName: !Ref DocumentTable
        - DynamoDBCrudPolicy:
            TableName: !Ref MemoryTable
        - S3CrudPolicy:
            BucketName: !Sub "${AWS::StackName}-${AWS::Region}-${AWS::AccountId}*"
      Environment:
        Variables:
          DOCUMENT_TABLE: !Ref DocumentTable
          MEMORY_TABLE: !Ref MemoryTable
          BUCKET: !Sub "${AWS::StackName}-${AWS::Region}-${AWS::AccountId}"
      Events:
        Root:
          Type: Api
          Properties:
            RestApiId: !Ref Api
            Path: /doc/{documentid}
            Method: DELETE

  AmplifyApp:
    Type: AWS::Amplify::App
    Condition: DeployToAmplifyHosting
    Properties:
      Name: !Sub "${AWS::StackName}-${AWS::Region}-${AWS::AccountId}"
      Repository: !Ref Repository
      BuildSpec: |
        version: 1
        applications:
          - frontend:
              phases:
                preBuild:
                  commands:
                    - npm ci
                build:
                  commands:
                    - npm run build
              artifacts:
                baseDirectory: dist
                files:
                  - '**/*'
              cache:
                paths:
                  - node_modules/**/*
            appRoot: frontend
      AccessToken: "{{resolve:secretsmanager:serverless-pdf-chat-github-token}}"
      EnvironmentVariables:
        - Name: AMPLIFY_MONOREPO_APP_ROOT
          Value: frontend
        - Name: VITE_REGION
          Value: !Ref AWS::Region
        - Name: VITE_API_ENDPOINT
          Value: !Sub "https://${Api}.execute-api.${AWS::Region}.${AWS::URLSuffix}/dev/"
        - Name: VITE_USER_POOL_ID
          Value: !Ref CognitoUserPool
        - Name: VITE_USER_POOL_CLIENT_ID
          Value: !Ref CognitoUserPoolClient

  AmplifyBranch:
    Type: AWS::Amplify::Branch
    Condition: DeployToAmplifyHosting
    Properties:
      BranchName: main
      AppId: !GetAtt AmplifyApp.AppId
      EnableAutoBuild: true
      Stage: PRODUCTION

Outputs:
  CognitoUserPool:
    Value: !Ref CognitoUserPool
  CognitoUserPoolClient:
    Value: !Ref CognitoUserPoolClient
  ApiGatewayBaseUrl:
    Value: !Sub "https://${Api}.execute-api.${AWS::Region}.${AWS::URLSuffix}/dev/"


================================================
FILE: frontend/.eslintrc.cjs
================================================
module.exports = {
  env: { browser: true, es2020: true },
  extends: [
    'eslint:recommended',
    'plugin:@typescript-eslint/recommended',
    'plugin:react-hooks/recommended',
  ],
  parser: '@typescript-eslint/parser',
  parserOptions: { ecmaVersion: 'latest', sourceType: 'module' },
  plugins: ['react-refresh'],
  rules: {
    'react-refresh/only-export-components': 'warn',
  },
}


================================================
FILE: frontend/.gitignore
================================================
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*

node_modules
dist
dist-ssr
*.local

# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

# Vite
.env.development

================================================
FILE: frontend/index.html
================================================
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>DocChat - Chat with a PDF</title>
  </head>
  <body>
    <div id="root" className="flex flex-col min-h-screen"></div>
    <script type="module" src="/src/main.tsx"></script>
  </body>
</html>

<script>
  var global = global || window;
</script>


================================================
FILE: frontend/package.json
================================================
{
  "name": "frontend",
  "private": true,
  "version": "0.0.0",
  "type": "module",
  "scripts": {
    "dev": "vite",
    "build": "tsc && vite build",
    "lint": "eslint src --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
    "preview": "vite preview"
  },
  "dependencies": {
    "@aws-amplify/ui-react": "^6.1.14",
    "@headlessui/react": "^1.7.15",
    "@heroicons/react": "^2.0.18",
    "aws-amplify": "^6.5.0",
    "date-fns": "^2.30.0",
    "filesize": "^10.0.7",
    "react": "^18.2.0",
    "react-dom": "^18.2.0",
    "react-router-dom": "^7.5.2"
  },
  "devDependencies": {
    "@tailwindcss/typography": "^0.5.9",
    "@types/react": "^18.0.37",
    "@types/react-dom": "^18.0.11",
    "@typescript-eslint/eslint-plugin": "^5.59.0",
    "@typescript-eslint/parser": "^5.59.0",
    "@vitejs/plugin-react": "^4.3.4",
    "autoprefixer": "^10.4.14",
    "eslint": "^8.38.0",
    "eslint-plugin-react-hooks": "^4.6.0",
    "eslint-plugin-react-refresh": "^0.3.4",
    "postcss": "^8.4.24",
    "tailwindcss": "^3.3.2",
    "typescript": "^5.0.2",
    "vite": "^6.4.1"
  }
}


================================================
FILE: frontend/postcss.config.js
================================================
export default {
  plugins: {
    tailwindcss: {},
    autoprefixer: {},
  },
}


================================================
FILE: frontend/src/App.tsx
================================================
import { Amplify } from "aws-amplify";
import { fetchAuthSession } from "aws-amplify/auth";
import { withAuthenticator } from "@aws-amplify/ui-react";
import { createBrowserRouter, RouterProvider } from "react-router-dom";
import "./index.css";
import Layout from "./routes/layout";
import Documents from "./routes/documents";
import Chat from "./routes/chat";

Amplify.configure({
  Auth: {
    Cognito: {
      userPoolId: import.meta.env.VITE_USER_POOL_ID,
      userPoolClientId: import.meta.env.VITE_USER_POOL_CLIENT_ID,
    },
  },
  API: {
    REST: {
      "serverless-pdf-chat": {
        endpoint: import.meta.env.VITE_API_ENDPOINT,
        region: import.meta.env.VITE_API_REGION,
      },
    },
  }}, {
  API: {
    REST: {
      headers: async () => {
        const tokens = (await fetchAuthSession()).tokens;
        const jwt = tokens?.idToken?.toString();
          return {
            "authorization": `Bearer ${jwt}`
          };
      }
    }
  }
});

const router = createBrowserRouter([
  {
    path: "/",
    element: <Layout />,
    children: [
      {
        index: true,
        Component: Documents,
      },
      {
        path: "/doc/:documentid/:conversationid",
        Component: Chat,
      },
    ],
  },
]);

function App() {
  return <RouterProvider router={router} />;
}

export default withAuthenticator(App, { hideSignUp: true });


================================================
FILE: frontend/src/common/types.ts
================================================
export interface Document {
  documentid: string;
  userid: string;
  filename: string;
  filesize: string;
  docstatus: string;
  created: string;
  pages: string;
  conversations: {
    conversationid: string;
    created: string;
  }[];
}

export interface Conversation {
  conversationid: string;
  document: Document;
  messages: {
    type: string;
    data: {
      content: string;
      example: boolean;
      additional_kwargs: {};
    };
  }[];
}


================================================
FILE: frontend/src/common/utilities.ts
================================================
import { format } from "date-fns";

export function getDateTime(date: string): string {
  return format(new Date(date), "MMMM d, yyyy - H:mm");
}


================================================
FILE: frontend/src/components/ChatMessages.tsx
================================================
import { PaperAirplaneIcon } from "@heroicons/react/24/outline";
import Loading from "../../public/loading-dots.svg";
import { Conversation } from "../common/types";

interface ChatMessagesProps {
  conversation: Conversation;
  messageStatus: string;
  handlePromptChange: (event: React.ChangeEvent<HTMLInputElement>) => void;
  handleKeyPress: (event: React.KeyboardEvent<HTMLInputElement>) => void;
  prompt: string;
  submitMessage: () => Promise<void>;
}

const ChatMessages: React.FC<ChatMessagesProps> = ({
  prompt,
  conversation,
  messageStatus,
  submitMessage,
  handlePromptChange,
  handleKeyPress,
}) => {
  return (
    <div className="flex flex-col justify-between h-full overflow-y-auto col-span-8 p-5 border-l border-gray-200">
      <div className="pb-5">
        <div className="grid gap-5">
          {conversation.messages.map((message, i) => (
            <div
              className={`${
                message.type === "ai"
                  ? "justify-self-start w-fit rounded border border-gray-100 px-5 py-3.5 text-gray-800"
                  : "" +
                    "justify-self-end w-fit bg-slate-100 rounded border border-gray-100 px-5 py-3.5 text-gray-800"
              }`}
              key={i}
            >
              <div className="prose">
                <p>{message.data.content}</p>
              </div>
            </div>
          ))}
          {messageStatus === "loading" && (
            <div className="justify-self-start w-fit rounded border border-gray-100 px-5 py-3.5 text-gray-800">
              <img src={Loading} width={40} className="py-2 mx-2" />
            </div>
          )}
        </div>
      </div>

      <div className="relative w-full">
        <div className="relative">
          <input
            disabled={messageStatus === "loading"}
            type="text"
            id="prompt"
            value={prompt}
            onChange={handlePromptChange}
            onKeyDown={handleKeyPress}
            className={
              messageStatus === "loading"
                ? "block w-full p-4 pl-4 text-sm text-gray-500 border border-gray-200 rounded-lg bg-gray-50 focus:ring-blue-500 focus:border-blue-500"
                : "block w-full p-4 pl-4 text-sm text-gray-900 border border-gray-200 rounded-lg bg-gray-50 focus:ring-blue-500 focus:border-blue-500"
            }
            placeholder={
              "Ask " + conversation.document.filename + " anything..."
            }
          />
          {messageStatus === "idle" && (
            <button
              type="submit"
              className="text-gray-700 absolute right-2 bottom-2 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-4 py-2"
              onClick={submitMessage}
            >
              <PaperAirplaneIcon className="w-6 h-6" />
            </button>
          )}
          {messageStatus === "loading" && (
            <button
              disabled
              type="submit"
              className="text-gray-700 absolute right-2 bottom-2 focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm px-4 py-2"
            >
              <svg
                aria-hidden="true"
                role="status"
                className="inline w-5 h-5 text-gray-200 animate-spin dark:text-gray-600"
                viewBox="0 0 100 101"
                fill="none"
                xmlns="http://www.w3.org/2000/svg"
              >
                <path
                  d="M100 50.5908C100 78.2051 77.6142 100.591 50 100.591C22.3858 100.591 0 78.2051 0 50.5908C0 22.9766 22.3858 0.59082 50 0.59082C77.6142 0.59082 100 22.9766 100 50.5908ZM9.08144 50.5908C9.08144 73.1895 27.4013 91.5094 50 91.5094C72.5987 91.5094 90.9186 73.1895 90.9186 50.5908C90.9186 27.9921 72.5987 9.67226 50 9.67226C27.4013 9.67226 9.08144 27.9921 9.08144 50.5908Z"
                  fill="currentColor"
                />
                <path
                  d="M93.9676 39.0409C96.393 38.4038 97.8624 35.9116 97.0079 33.5539C95.2932 28.8227 92.871 24.3692 89.8167 20.348C85.8452 15.1192 80.8826 10.7238 75.2124 7.41289C69.5422 4.10194 63.2754 1.94025 56.7698 1.05124C51.7666 0.367541 46.6976 0.446843 41.7345 1.27873C39.2613 1.69328 37.813 4.19778 38.4501 6.62326C39.0873 9.04874 41.5694 10.4717 44.0505 10.1071C47.8511 9.54855 51.7191 9.52689 55.5402 10.0491C60.8642 10.7766 65.9928 12.5457 70.6331 15.2552C75.2735 17.9648 79.3347 21.5619 82.5849 25.841C84.9175 28.9121 86.7997 32.2913 88.1811 35.8758C89.083 38.2158 91.5421 39.6781 93.9676 39.0409Z"
                  fill="#4C1D95"
                />
              </svg>
            </button>
          )}
        </div>
      </div>
    </div>
  );
};

export default ChatMessages;


================================================
FILE: frontend/src/components/ChatSidebar.tsx
================================================
import DocumentDetail from "./DocumentDetail";
import { Conversation } from "../common/types";
import { getDateTime } from "../common/utilities";
import { Params } from "react-router-dom";
import {
  ChatBubbleLeftRightIcon,
  PlusCircleIcon,
} from "@heroicons/react/24/outline";

interface ChatSidebarProps {
  conversation: Conversation;
  params: Params;
  addConversation: () => Promise<void>;
  switchConversation: (e: React.MouseEvent<HTMLButtonElement>) => void;
  conversationListStatus: "idle" | "loading";
}

const ChatSidebar: React.FC<ChatSidebarProps> = ({
  conversation,
  params,
  addConversation,
  switchConversation,
  conversationListStatus,
}) => {
  return (
    <div className="col-span-4 h-full">
      <div className="bg-gray-100 p-5">
        <DocumentDetail document={conversation.document} />
      </div>
      <div className="px-3 pt-3 pb-5">
        {conversationListStatus === "idle" && (
          <button
            onClick={addConversation}
            className="bg-gray-50 w-full inline-flex items-center px-4 py-2.5 border border-gray-100 rounded hover:bg-gray-200"
          >
            <PlusCircleIcon className="w-4 h-4 mr-2" />
            Start a new conversation
          </button>
        )}
        {conversationListStatus === "loading" && (
          <button
            disabled
            onClick={addConversation}
            className="bg-gray-50 w-full inline-flex items-center px-4 py-2.5 border border-gray-100 rounded hover:bg-gray-200"
          >
            <svg
              aria-hidden="true"
              role="status"
              className="inline w-4 h-4 mr-2 text-gray-200 animate-spin"
              viewBox="0 0 100 101"
              fill="none"
              xmlns="http://www.w3.org/2000/svg"
            >
              <path
                d="M100 50.5908C100 78.2051 77.6142 100.591 50 100.591C22.3858 100.591 0 78.2051 0 50.5908C0 22.9766 22.3858 0.59082 50 0.59082C77.6142 0.59082 100 22.9766 100 50.5908ZM9.08144 50.5908C9.08144 73.1895 27.4013 91.5094 50 91.5094C72.5987 91.5094 90.9186 73.1895 90.9186 50.5908C90.9186 27.9921 72.5987 9.67226 50 9.67226C27.4013 9.67226 9.08144 27.9921 9.08144 50.5908Z"
                fill="currentColor"
              />
              <path
                d="M93.9676 39.0409C96.393 38.4038 97.8624 35.9116 97.0079 33.5539C95.2932 28.8227 92.871 24.3692 89.8167 20.348C85.8452 15.1192 80.8826 10.7238 75.2124 7.41289C69.5422 4.10194 63.2754 1.94025 56.7698 1.05124C51.7666 0.367541 46.6976 0.446843 41.7345 1.27873C39.2613 1.69328 37.813 4.19778 38.4501 6.62326C39.0873 9.04874 41.5694 10.4717 44.0505 10.1071C47.8511 9.54855 51.7191 9.52689 55.5402 10.0491C60.8642 10.7766 65.9928 12.5457 70.6331 15.2552C75.2735 17.9648 79.3347 21.5619 82.5849 25.841C84.9175 28.9121 86.7997 32.2913 88.1811 35.8758C89.083 38.2158 91.5421 39.6781 93.9676 39.0409Z"
                fill="#4C1D95"
              />
            </svg>
            Start a new conversation
          </button>
        )}
        {conversation &&
          conversation.document.conversations.map((conversation, i) => (
            <div key={i}>
              {params.conversationid === conversation.conversationid && (
                <button
                  disabled={
                    params.conversationid === conversation.conversationid
                  }
                  className="bg-gray-500 text-white w-full inline-flex items-center mt-2 px-4 py-2.5 border border-gray-100 rounded"
                >
                  <ChatBubbleLeftRightIcon className="w-4 h-4 mr-2" />
                  {getDateTime(conversation.created)}
                </button>
              )}
              {params.conversationid !== conversation.conversationid && (
                <button
                  id={conversation.conversationid}
                  onClick={switchConversation}
                  disabled={
                    params.conversationid === conversation.conversationid
                  }
                  className="bg-gray-50 w-full inline-flex items-center mt-2 px-4 py-2.5 border border-gray-100 rounded hover:bg-gray-200"
                >
                  <ChatBubbleLeftRightIcon className="w-4 h-4 mr-2" />
                  {getDateTime(conversation.created)}
                </button>
              )}
            </div>
          ))}
      </div>
    </div>
  );
};

export default ChatSidebar;


================================================
FILE: frontend/src/components/DocumentDetail.tsx
================================================
import { Document } from "../common/types";
import { getDateTime } from "../common/utilities";
import { filesize } from "filesize";
import {
  DocumentIcon,
  CircleStackIcon,
  ClockIcon,
  CheckCircleIcon,
  CloudIcon,
  CogIcon,
  TrashIcon,
} from "@heroicons/react/24/outline";
import { del } from "aws-amplify/api";
import {useNavigate} from "react-router-dom";
import {useState} from "react";

interface DocumentDetailProps {
  document: Document;
  onDocumentDeleted?: (document?: Document) => void;
}

const DocumentDetail: React.FC<DocumentDetailProps> = ({document, onDocumentDeleted}) => {
  const navigate = useNavigate();
  const [deleteStatus, setDeleteStatus] = useState<string>("idle");

  const deleteDocument = async (event: React.MouseEvent<HTMLButtonElement>) => {
    event.preventDefault();
    setDeleteStatus("deleting");
    await del({
      apiName: "serverless-pdf-chat",
      path: `doc/${document.documentid}`,
    }).response;
    setDeleteStatus("idle");
    if (onDocumentDeleted) onDocumentDeleted(document);
    else navigate(`/`);
  };

  return (
    <>
      <h3 className="text-center mb-3 text-lg font-bold tracking-tight text-gray-900">
        {document.filename}
      </h3>
      <div className="flex flex-col space-y-2">
        <div className="inline-flex items-center">
          <DocumentIcon className="w-4 h-4 mr-2" />
          {document.pages} pages
        </div>
        <div className="inline-flex items-center">
          <CircleStackIcon className="w-4 h-4 mr-2" />
          {filesize(Number(document.filesize)).toString()}
        </div>
        <div className="inline-flex items-center">
          <ClockIcon className="w-4 h-4 mr-2" />
          {getDateTime(document.created)}
        </div>
        {document.docstatus === "UPLOADED" && (
          <div className="flex flex-row justify-center pt-4">
            <span className="inline-flex items-center self-start bg-gray-100 text-gray-800 text-xs font-medium mr-2 px-2.5 py-0.5 rounded">
              <CloudIcon className="w-4 h-4 mr-1" />
              Awaiting processing
            </span>
          </div>
        )}
        {document.docstatus === "PROCESSING" && (
          <div className="flex flex-row justify-center pt-4">
            <span className="inline-flex items-center self-start bg-blue-100 text-blue-800 text-xs font-medium mr-2 px-2.5 py-0.5 rounded">
              <CogIcon className="w-4 h-4 mr-1 animate-spin" />
              Processing document
            </span>
          </div>
        )}
        {document.docstatus === "READY" && (
          <div className="flex flex-row pt-4">
            <div className="flex flex-row justify-center flex-grow pt-2">
              <span className="inline-flex items-center self-start bg-green-100 text-green-800 text-xs font-medium mr-2 px-2.5 py-0.5 rounded">
                <CheckCircleIcon className="w-4 h-4 mr-1" />
                Ready to chat
              </span>
            </div>
            <div className="flex flex-row">
              <button
                onClick={deleteDocument}
                className="text-gray-700 hover:bg-gray-700 hover:text-white focus:ring-4 focus:outline-none focus:ring-blue-300 rounded-lg p-2"
              >
                <TrashIcon className={`"w-4 h-4 mr-1 ${deleteStatus === "deleting" ? "animate-spin" : ""}`}/>
              </button>
            </div>
          </div>
        )}
      </div>
    </>
  );
};

export default DocumentDetail;


================================================
FILE: frontend/src/components/DocumentList.tsx
================================================
import { useState, useEffect } from "react";
import { get } from "aws-amplify/api";
import { Link } from "react-router-dom";
import DocumentDetail from "./DocumentDetail";
import { ArrowPathRoundedSquareIcon } from "@heroicons/react/24/outline";
import { Document } from "../common/types";
import Loading from "../../public/loading-grid.svg";

const DocumentList: React.FC = () => {
  const [documents, setDocuments] = useState<Document[]>([]);
  const [listStatus, setListStatus] = useState<string>("idle");

  const fetchData = async () => {
    setListStatus("loading");
    const response = await get({
      apiName: "serverless-pdf-chat", path:"doc"
    }).response;
    const docs = await response.body.json() as unknown as Document[]
    setListStatus("idle");
    setDocuments(docs);
  };

  useEffect(() => {
    fetchData();
  }, []);

  return (
    <div>
      <div className="flex justify-between pt-6 pb-4">
        <h2 className="text-2xl font-bold">My documents</h2>
        <button
          onClick={fetchData}
          type="button"
          className="text-gray-700 border border-gray-700 hover:bg-gray-700 hover:text-white focus:ring-4 focus:outline-none focus:ring-blue-300 font-medium rounded-lg text-sm p-2 text-center inline-flex items-center"
        >
          <ArrowPathRoundedSquareIcon
            className={`w-5 h-5 ${
              listStatus === "loading" ? "animate-spin" : ""
            }`}
          />
        </button>
      </div>
      <div className="grid grid-cols-3 gap-4">
        {documents &&
          documents.length > 0 &&
          documents.map((document: Document) => (
            <Link
              to={`/doc/${document.documentid}/${document.conversations[0].conversationid}/`}
              key={document.documentid}
              className="block p-6 bg-white border border-gray-200 rounded hover:bg-gray-100"
            >
              <DocumentDetail document={document} onDocumentDeleted={fetchData}/>
            </Link>
          ))}
      </div>
      {listStatus === "idle" && documents.length === 0 && (
        <div className="flex flex-col items-center mt-4">
          <p className="font-bold text-lg">There's nothing here yet...</p>
          <p className="mt-1">Upload your first document to get started!</p>
        </div>
      )}
      {listStatus === "loading" && documents.length === 0 && (
        <div className="flex flex-col items-center mt-4">
          <img src={Loading} width={40} />
        </div>
      )}
    </div>
  );
};

export default DocumentList;


================================================
FILE: frontend/src/components/DocumentUploader.tsx
================================================
import { ChangeEvent, useState, useEffect } from "react";
import { get } from "aws-amplify/api";
import { filesize } from "filesize";
import {
  DocumentIcon,
  CheckCircleIcon,
  CloudArrowUpIcon,
  XCircleIcon,
  ArrowLeftCircleIcon,
} from "@heroicons/react/24/outline";

interface DocumentUploaderProps {
  onDocumentUploaded?:() => void
}
const DocumentUploader: React.FC<DocumentUploaderProps> = ({onDocumentUploaded}) => {
  const [inputStatus, setInputStatus] = useState<string>("idle");
  const [buttonStatus, setButtonStatus] = useState<string>("ready");
  const [selectedFile, setSelectedFile] = useState<File | null>(null);

  useEffect(() => {
    if (selectedFile) {
      if (selectedFile.type === "application/pdf") {
        setInputStatus("valid");
      } else {
        setSelectedFile(null);
      }
    }
  }, [selectedFile]);

  const handleFileChange = (event: ChangeEvent<HTMLInputElement>) => {
    const file = event.target.files?.[0];
    setSelectedFile(file || null);
  };

  const uploadFile = async () => {
    if(selectedFile) {
      setButtonStatus("uploading");
      const response = await get({
        apiName: "serverless-pdf-chat",
        path: "generate_presigned_url",
        options: {
          headers: { "Content-Type": "application/json" },
          queryParams: {
            "file_name": selectedFile?.name
          }
        },
      }).response
      const presignedUrl = await response.body.json() as { presignedurl: string }
      fetch(presignedUrl?.presignedurl, {
        method: "PUT",
        body: selectedFile,
        headers: { "Content-Type": "application/pdf" },
      }).then(() => {
        setButtonStatus("success");
        if (onDocumentUploaded) onDocumentUploaded();
      });
    }
  };

  const resetInput = () => {
    setSelectedFile(null);
    setInputStatus("idle");
    setButtonStatus("ready");
  };

  return (
    <div>
      <h2 className="text-2xl font-bold pb-4">Add document</h2>
      {inputStatus === "idle" && (
        <div className="flex items-center justify-center w-full">
          <label
            htmlFor="dropzone-file"
            className="flex flex-col items-center justify-center w-full h-64 border-2 border-gray-300 border-dashed rounded-lg cursor-pointer bg-gray-50 hover:bg-gray-100"
          >
            <div className="flex flex-col items-center justify-center pt-5 pb-6">
              <CloudArrowUpIcon className="w-12 h-12 mb-3 text-gray-400" />

              <p className="mb-2 text-sm text-gray-500">
                <span className="font-semibold">Click to upload</span> your
                document
              </p>
              <p className="text-xs text-gray-500">Only .pdf accepted</p>
            </div>

            <input
              onChange={handleFileChange}
              id="dropzone-file"
              type="file"
              className="hidden"
            />
          </label>
        </div>
      )}
      {inputStatus === "valid" && (
        <div className="flex items-center justify-center w-full">
          <div className="flex flex-col items-center justify-center w-full h-64 border-2 border-gray-300 border-dashed rounded-lg bg-gray-50">
            <>
              <div className="flex flex-row items-center mb-5">
                <DocumentIcon className="w-14 h-14 text-gray-400" />
                <div className="flex flex-col ml-2">
                  <p className="font-bold mb-1">{selectedFile?.name}</p>
                  <p>
                    {filesize(selectedFile ? selectedFile.size : 0).toString()}
                  </p>
                </div>
              </div>
              <div className="flex flex-row items-center">
                {buttonStatus === "ready" && (
                  <button
                    onClick={resetInput}
                    type="button"
                    className="inline-flex items-center text-gray-900 bg-white border border-gray-300 focus:outline-none hover:bg-gray-100 focus:ring-4 focus:ring-gray-200 font-medium rounded-lg px-3 py-2 text-sm mr-2 mb-2 "
                  >
                    <XCircleIcon className="w-5 h-5 mr-1.5" />
                    Cancel
                  </button>
                )}
                {buttonStatus === "uploading" && (
                  <button
                    disabled
                    onClick={resetInput}
                    type="button"
                    className="inline-flex items-center text-gray-900 bg-white border border-gray-300 focus:outline-none hover:bg-gray-100 focus:ring-4 focus:ring-gray-200 font-medium rounded-lg px-3 py-2 text-sm mr-2 mb-2 "
                  >
                    <XCircleIcon className="w-5 h-5 mr-1.5" />
                    Cancel
                  </button>
                )}
                {buttonStatus === "success" && (
                  <button
                    onClick={resetInput}
                    type="button"
                    className="inline-flex items-center text-gray-900 bg-white border border-gray-300 focus:outline-none hover:bg-gray-100 focus:ring-4 focus:ring-gray-200 font-medium rounded-lg px-3 py-2 text-sm mr-2 mb-2 "
                  >
                    <ArrowLeftCircleIcon className="w-5 h-5 mr-1.5" />
                    Upload another document
                  </button>
                )}
                {buttonStatus === "ready" && (
                  <button
                    onClick={uploadFile}
                    type="button"
                    className="inline-flex items-center bg-violet-900 text-white border border-gray-300 focus:outline-none hover:bg-violet-700 focus:ring-4 focus:ring-gray-200 font-medium rounded-lg px-3 py-2 text-sm mr-2 mb-2 "
                  >
                    <CloudArrowUpIcon className="w-5 h-5 mr-1.5" />
                    Upload document
                  </button>
                )}
                {buttonStatus === "uploading" && (
                  <button
                    disabled
                    onClick={uploadFile}
                    type="button"
                    className="inline-flex items-center bg-violet-900 text-white border border-gray-300 focus:outline-none hover:bg-violet-700 focus:ring-4 focus:ring-gray-200 font-medium rounded-lg px-3 py-2 text-sm mr-2 mb-2 "
                  >
                    <svg
                      aria-hidden="true"
                      role="status"
                      className="inline w-4 h-4 mr-3 text-white animate-spin"
                      viewBox="0 0 100 101"
                      fill="none"
                      xmlns="http://www.w3.org/2000/svg"
                    >
                      <path
                        d="M100 50.5908C100 78.2051 77.6142 100.591 50 100.591C22.3858 100.591 0 78.2051 0 50.5908C0 22.9766 22.3858 0.59082 50 0.59082C77.6142 0.59082 100 22.9766 100 50.5908ZM9.08144 50.5908C9.08144 73.1895 27.4013 91.5094 50 91.5094C72.5987 91.5094 90.9186 73.1895 90.9186 50.5908C90.9186 27.9921 72.5987 9.67226 50 9.67226C27.4013 9.67226 9.08144 27.9921 9.08144 50.5908Z"
                        fill="#E5E7EB"
                      />
                      <path
                        d="M93.9676 39.0409C96.393 38.4038 97.8624 35.9116 97.0079 33.5539C95.2932 28.8227 92.871 24.3692 89.8167 20.348C85.8452 15.1192 80.8826 10.7238 75.2124 7.41289C69.5422 4.10194 63.2754 1.94025 56.7698 1.05124C51.7666 0.367541 46.6976 0.446843 41.7345 1.27873C39.2613 1.69328 37.813 4.19778 38.4501 6.62326C39.0873 9.04874 41.5694 10.4717 44.0505 10.1071C47.8511 9.54855 51.7191 9.52689 55.5402 10.0491C60.8642 10.7766 65.9928 12.5457 70.6331 15.2552C75.2735 17.9648 79.3347 21.5619 82.5849 25.841C84.9175 28.9121 86.7997 32.2913 88.1811 35.8758C89.083 38.2158 91.5421 39.6781 93.9676 39.0409Z"
                        fill="currentColor"
                      />
                    </svg>
                    Uploading...
                  </button>
                )}
                {buttonStatus === "success" && (
                  <button
                    disabled
                    onClick={uploadFile}
                    type="button"
                    className="inline-flex items-center bg-violet-900 text-white border border-gray-300 focus:outline-none hover:bg-violet-700 focus:ring-4 focus:ring-gray-200 font-medium rounded-lg px-3 py-2 text-sm mr-2 mb-2 "
                  >
                    <CheckCircleIcon className="w-5 h-5 mr-1.5" />
                    Upload successful!
                  </button>
                )}
              </div>
            </>
          </div>
        </div>
      )}
    </div>
  );
};

export default DocumentUploader;


================================================
FILE: frontend/src/components/Footer.tsx
================================================
import { CloudIcon } from "@heroicons/react/24/outline";
import GitHub from "../../public/github.svg";

const Footer: React.FC = () => {
  return (
    <div className="bg-gray-100 mt-auto">
      <footer className="container">
        <div className=" flex flex-row justify-between py-3 text-sm">
          <div className="inline-flex items-center">
            <CloudIcon className="w-5 h-5 mr-1.5" />
            Powered by Amazon Web Services
          </div>
          <div className="inline-flex items-center hover:underline underline-offset-2">
            <img
              src={GitHub}
              alt="React Logo"
              width={20}
              className="mr-1.5 py-2 mx-2"
            />
            <a href="https://github.com/aws-samples/serverless-pdf-chat">
              Source code on GitHub
            </a>
          </div>
        </div>
      </footer>
    </div>
  );
};

export default Footer;


================================================
FILE: frontend/src/components/Navigation.tsx
================================================
import { Link } from "react-router-dom";
import { Menu } from "@headlessui/react";
import {
  ArrowLeftOnRectangleIcon,
  ChevronDownIcon,
} from "@heroicons/react/24/outline";
import { ChatBubbleLeftRightIcon } from "@heroicons/react/24/solid";

interface NavigationProps {
  userInfo: any;
  handleSignOutClick: (
    event: React.MouseEvent<HTMLButtonElement>
  ) => Promise<void>;
}

const Navigation: React.FC<NavigationProps> = ({
  userInfo,
  handleSignOutClick,
}: NavigationProps) => {
  return (
    <nav className="bg-violet-900">
      <div className="container flex flex-wrap items-center justify-between py-3">
        <Link
          to="/"
          className="inline-flex items-center self-center text-2xl font-semibold whitespace-nowrap text-white"
        >
          <ChatBubbleLeftRightIcon className="w-6 h-6 mr-1.5" />
          DocChat
        </Link>
        <div className="absolute inset-y-0 right-0 flex items-center pr-2 sm:static sm:inset-auto sm:ml-6 sm:pr-0">
          <div className="relative ml-3">
            <Menu>
              <Menu.Button className="text-center inline-flex items-center text-white text-sm underline-offset-2 hover:underline">
                {userInfo?.attributes?.email}
                <ChevronDownIcon className="w-3 h-3 ml-1 text-white" />
              </Menu.Button>
              <Menu.Items className="absolute right-0 z-10 mt-2 origin-top-right rounded-md bg-white py-1 shadow-lg ring-1 ring-black ring-opacity-5 focus:outline-none">
                <div className="px-1 py-1 ">
                  <Menu.Item>
                    <button
                      onClick={handleSignOutClick}
                      className="group w-full inline-flex items-center rounded-md px-2 py-2 text-sm underline-offset-2 hover:underline"
                    >
                      <ArrowLeftOnRectangleIcon className="w-4 h-4 mr-1" />
                      Sign Out
                    </button>
                  </Menu.Item>
                </div>
              </Menu.Items>
            </Menu>
          </div>
        </div>
      </div>
    </nav>
  );
};

export default Navigation;


================================================
FILE: frontend/src/index.css
================================================
@tailwind base;
@tailwind components;
@tailwind utilities;


================================================
FILE: frontend/src/main.tsx
================================================
import * as React from "react";
import * as ReactDOM from "react-dom/client";
import App from "./App";
import "@aws-amplify/ui-react/styles.css";

ReactDOM.createRoot(document.getElementById("root")!).render(
  <React.StrictMode>
    <App />
  </React.StrictMode>
);


================================================
FILE: frontend/src/routes/chat.tsx
================================================
import React, { useState, useEffect, KeyboardEvent } from "react";
import { useParams, useNavigate } from "react-router-dom";
import { get, post } from "aws-amplify/api";
import { Conversation } from "../common/types";
import ChatSidebar from "../components/ChatSidebar";
import ChatMessages from "../components/ChatMessages";
import LoadingGrid from "../../public/loading-grid.svg";

const Document: React.FC = () => {
  const params = useParams();
  const navigate = useNavigate();

  const [conversation, setConversation] = useState<Conversation | null>(null);
  const [loading, setLoading] = React.useState<string>("idle");
  const [messageStatus, setMessageStatus] = useState<string>("idle");
  const [conversationListStatus, setConversationListStatus] = useState<
    "idle" | "loading"
  >("idle");
  const [prompt, setPrompt] = useState("");

  const fetchData = async (conversationid = params.conversationid) => {
    setLoading("loading");
    const response = await get({
      apiName: "serverless-pdf-chat",
      path: `doc/${params.documentid}/${conversationid}`
    }).response
    const conversation = await response.body.json() as unknown as Conversation
    setConversation(conversation);
    setLoading("idle");
    console.log("Foo")
  };

  useEffect(() => {
    fetchData();
  }, []);

  const handlePromptChange = (event: React.ChangeEvent<HTMLInputElement>) => {
    setPrompt(event.target.value);
  };

  const addConversation = async () => {
    setConversationListStatus("loading");
    const response = await post({
      apiName: "serverless-pdf-chat",
      path: `doc/${params.documentid}`
    }).response;
    const newConversation = await response.body.json() as unknown as Conversation;
    fetchData(newConversation.conversationid);
    navigate(`/doc/${params.documentid}/${newConversation.conversationid}`);
    setConversationListStatus("idle");
  };

  const switchConversation = (e: React.MouseEvent<HTMLButtonElement>) => {
    const targetButton = e.target as HTMLButtonElement;
    navigate(`/doc/${params.documentid}/${targetButton.id}`);
    fetchData(targetButton.id);
  };

  const handleKeyPress = (event: KeyboardEvent<HTMLInputElement>) => {
    if (event.key == "Enter") {
      submitMessage();
    }
  };

  const submitMessage = async () => {
    setMessageStatus("loading");

    if (conversation !== null) {
      const previewMessage = {
        type: "text",
        data: {
          content: prompt,
          additional_kwargs: {},
          example: false,
        },
      };

      const updatedConversation = {
        ...conversation,
        messages: [...conversation.messages, previewMessage],
      };

      setConversation(updatedConversation);


      await post({
        apiName: "serverless-pdf-chat",
        path: `${conversation?.document.documentid}/${conversation?.conversationid}`,
        options: {
          body: {
            fileName: conversation?.document.filename,
            prompt: prompt,
          }
        }
      }).response;
      setPrompt("");
      fetchData(conversation?.conversationid);
      setMessageStatus("idle");
    }
  };

  return (
    <div className="">
      {loading === "loading" && !conversation && (
        <div className="flex flex-col items-center mt-6">
          <img src={LoadingGrid} width={40} />
        </div>
      )}
      {conversation && (
        <div className="grid grid-cols-12 border border-gray-200 rounded-lg">
          <ChatSidebar
            conversation={conversation}
            params={params}
            addConversation={addConversation}
            switchConversation={switchConversation}
            conversationListStatus={conversationListStatus}
          />
          <ChatMessages
            prompt={prompt}
            conversation={conversation}
            messageStatus={messageStatus}
            submitMessage={submitMessage}
            handleKeyPress={handleKeyPress}
            handlePromptChange={handlePromptChange}
          />
        </div>
      )}
    </div>
  );
};

export default Document;


================================================
FILE: frontend/src/routes/documents.tsx
================================================
import React, {useState} from "react";
import DocumentUploader from "../components/DocumentUploader";
import DocumentList from "../components/DocumentList";

const Documents: React.FC = () => {
  const [documentListKey, setDocumentListKey] = useState(1);
  const reloadDocuments = () => {
    setTimeout(() =>setDocumentListKey(Math.random()), 1000);
  }


  return (
    <>
      <DocumentUploader onDocumentUploaded={reloadDocuments}/>
      <DocumentList key={documentListKey}/>
    </>
  );
};

export default Documents;


================================================
FILE: frontend/src/routes/layout.tsx
================================================
import { Outlet } from "react-router-dom";
import { useEffect, useState } from "react";
import { signOut, fetchUserAttributes } from "aws-amplify/auth";
import Navigation from "../components/Navigation";
import Footer from "../components/Footer";

const Layout: React.FC = () => {
  const [userInfo, setUserInfo] = useState<any>(null);

  useEffect(() => {
    (async () => {
      const attributes = await fetchUserAttributes();
      setUserInfo({attributes})
    })();
  }, []);

  const handleSignOutClick = async (
    event: React.MouseEvent<HTMLButtonElement>
  ) => {
    event.preventDefault();
    await signOut();
  };

  return (
    <div className="flex flex-col min-h-screen">
      <div>
        <Navigation
          userInfo={userInfo}
          handleSignOutClick={handleSignOutClick}
        />
        <div className="container mt-6 mb-6">
          <Outlet />
        </div>
      </div>
      <Footer />
    </div>
  );
};

export default Layout;


================================================
FILE: frontend/src/vite-env.d.ts
================================================
/// <reference types="vite/client" />


================================================
FILE: frontend/tailwind.config.js
================================================
/** @type {import('tailwindcss').Config} */
export default {
  content: ["./index.html", "./src/**/*.{js,ts,jsx,tsx}"],
  theme: {
    extend: {},
    container: {
      padding: "7rem",
      center: true,
    },
  },
  plugins: [require("@tailwindcss/typography")],
};


================================================
FILE: frontend/tsconfig.json
================================================
{
  "compilerOptions": {
    "target": "ES2020",
    "useDefineForClassFields": true,
    "lib": ["ES2020", "DOM", "DOM.Iterable"],
    "module": "ESNext",
    "skipLibCheck": true,

    /* Bundler mode */
    "moduleResolution": "bundler",
    "allowImportingTsExtensions": true,
    "resolveJsonModule": true,
    "isolatedModules": true,
    "noEmit": true,
    "jsx": "react-jsx",

    /* Linting */
    "strict": true,
    "noUnusedLocals": true,
    "noUnusedParameters": true,
    "noFallthroughCasesInSwitch": true
  },
  "include": ["src"],
  "references": [{ "path": "./tsconfig.node.json" }]
}


================================================
FILE: frontend/tsconfig.node.json
================================================
{
  "compilerOptions": {
    "composite": true,
    "skipLibCheck": true,
    "module": "ESNext",
    "moduleResolution": "bundler",
    "allowSyntheticDefaultImports": true
  },
  "include": ["vite.config.ts"]
}


================================================
FILE: frontend/vite.config.ts
================================================
import { defineConfig } from "vite";
import react from "@vitejs/plugin-react";

// https://vitejs.dev/config/
export default defineConfig({
  plugins: [react()],
});