Repository: aws-samples/serverless-pdf-chat Branch: main Commit: 2c9ed9c00d5f Files: 55 Total size: 90.5 KB Directory structure: gitextract_8ftuqah8/ ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── backend/ │ ├── .gitignore │ ├── __init__.py │ ├── src/ │ │ ├── add_conversation/ │ │ │ ├── __init__.py │ │ │ ├── main.py │ │ │ └── requirements.txt │ │ ├── delete_document/ │ │ │ ├── __init__.py │ │ │ ├── main.py │ │ │ └── requirements.txt │ │ ├── generate_embeddings/ │ │ │ ├── __init__.py │ │ │ ├── main.py │ │ │ └── requirements.txt │ │ ├── generate_presigned_url/ │ │ │ ├── __init__.py │ │ │ ├── main.py │ │ │ └── requirements.txt │ │ ├── generate_response/ │ │ │ ├── __init__.py │ │ │ ├── main.py │ │ │ └── requirements.txt │ │ ├── get_all_documents/ │ │ │ ├── __init__.py │ │ │ └── main.py │ │ ├── get_document/ │ │ │ ├── __init__.py │ │ │ └── main.py │ │ └── upload_trigger/ │ │ ├── __init__.py │ │ ├── main.py │ │ └── requirements.txt │ └── template.yaml └── frontend/ ├── .eslintrc.cjs ├── .gitignore ├── index.html ├── package.json ├── postcss.config.js ├── src/ │ ├── App.tsx │ ├── common/ │ │ ├── types.ts │ │ └── utilities.ts │ ├── components/ │ │ ├── ChatMessages.tsx │ │ ├── ChatSidebar.tsx │ │ ├── DocumentDetail.tsx │ │ ├── DocumentList.tsx │ │ ├── DocumentUploader.tsx │ │ ├── Footer.tsx │ │ └── Navigation.tsx │ ├── index.css │ ├── main.tsx │ ├── routes/ │ │ ├── chat.tsx │ │ ├── documents.tsx │ │ └── layout.tsx │ └── vite-env.d.ts ├── tailwind.config.js ├── tsconfig.json ├── tsconfig.node.json └── vite.config.ts ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Created by https://www.gitignore.io/api/osx,linux,python,windows,pycharm,visualstudiocode ### SAM ### .aws-sam/ samconfig.toml ### Linux ### *~ # temporary files which can be created if a process still has a handle open of a deleted file .fuse_hidden* # KDE directory preferences .directory # Linux trash folder which might appear on any partition or disk .Trash-* # .nfs files are created when an open file is removed but is still being accessed .nfs* ### OSX ### *.DS_Store .AppleDouble .LSOverride # Icon must end with two \r Icon # Thumbnails ._* # Files that might appear in the root of a volume .DocumentRevisions-V100 .fseventsd .Spotlight-V100 .TemporaryItems .Trashes .VolumeIcon.icns .com.apple.timemachine.donotpresent # Directories potentially created on remote AFP share .AppleDB .AppleDesktop Network Trash Folder Temporary Items .apdisk ### PyCharm ### # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 # User-specific stuff: .idea/**/workspace.xml .idea/**/tasks.xml .idea/dictionaries # Sensitive or high-churn files: .idea/**/dataSources/ .idea/**/dataSources.ids .idea/**/dataSources.xml .idea/**/dataSources.local.xml .idea/**/sqlDataSources.xml .idea/**/dynamic.xml .idea/**/uiDesigner.xml # Gradle: .idea/**/gradle.xml .idea/**/libraries # CMake cmake-build-debug/ # Mongo Explorer plugin: .idea/**/mongoSettings.xml ## File-based project format: *.iws ## Plugin-specific files: # IntelliJ /out/ # mpeltonen/sbt-idea plugin .idea_modules/ # JIRA plugin atlassian-ide-plugin.xml # Cursive Clojure plugin .idea/replstate.xml # Ruby plugin and RubyMine /.rakeTasks # Crashlytics plugin (for Android Studio and IntelliJ) com_crashlytics_export_strings.xml crashlytics.properties crashlytics-build.properties fabric.properties ### PyCharm Patch ### # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 # *.iml # modules.xml # .idea/misc.xml # *.ipr # Sonarlint plugin .idea/sonarlint ### Python ### # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache .pytest_cache/ nosetests.xml coverage.xml *.cover .hypothesis/ # Translations *.mo *.pot # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule.* # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ ### VisualStudioCode ### .vscode .vscode/* !.vscode/settings.json !.vscode/tasks.json !.vscode/launch.json !.vscode/extensions.json .history ### Windows ### # Windows thumbnail cache files Thumbs.db ehthumbs.db ehthumbs_vista.db # Folder config file Desktop.ini # Recycle Bin used on file shares $RECYCLE.BIN/ # Windows Installer files *.cab *.msi *.msm *.msp # Windows shortcuts *.lnk # Build folder */build/* # End of https://www.gitignore.io/api/osx,linux,python,windows,pycharm,visualstudiocode events/*.json dependencies ================================================ FILE: CODE_OF_CONDUCT.md ================================================ ## Code of Conduct This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact opensource-codeofconduct@amazon.com with any additional questions or comments. ================================================ FILE: CONTRIBUTING.md ================================================ # Contributing Guidelines Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional documentation, we greatly value feedback and contributions from our community. Please read through this document before submitting any issues or pull requests to ensure we have all the necessary information to effectively respond to your bug report or contribution. ## Reporting Bugs/Feature Requests We welcome you to use the GitHub issue tracker to report bugs or suggest features. When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: * A reproducible test case or series of steps * The version of our code being used * Any modifications you've made relevant to the bug * Anything unusual about your environment or deployment ## Contributing via Pull Requests Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 1. You are working against the latest source on the *main* branch. 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. To send us a pull request, please: 1. Fork the repository. 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 3. Ensure local tests pass. 4. Commit to your fork using clear commit messages. 5. Send us a pull request, answering any default questions in the pull request interface. 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). ## Finding contributions to work on Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. ## Code of Conduct This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact opensource-codeofconduct@amazon.com with any additional questions or comments. ## Security issue notifications If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. ## Licensing See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. ================================================ FILE: LICENSE ================================================ MIT No Attribution Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Serverless document chat application This sample application allows you to ask natural language questions of any PDF document you upload. It combines the text generation and analysis capabilities of an LLM with a vector search of the document content. The solution uses serverless services such as [Amazon Bedrock](https://aws.amazon.com/bedrock/) to access foundational models, [AWS Lambda](https://aws.amazon.com/lambda/) to run [LangChain](https://github.com/langchain-ai/langchain), and [Amazon DynamoDB](https://aws.amazon.com/dynamodb/) for conversational memory. See the [accompanying blog post on the AWS Serverless Blog](https://aws.amazon.com/blogs/compute/building-a-serverless-document-chat-with-aws-lambda-and-amazon-bedrock/) for a detailed description and follow the deployment instructions below to get started.

> **Warning** > This application is not ready for production use. It was written for demonstration and educational purposes. Review the [Security](#security) section of this README and consult with your security team before deploying this stack. No warranty is implied in this example. > **Note** > This architecture creates resources that have costs associated with them. Please see the [AWS Pricing](https://aws.amazon.com/pricing/) page for details and make sure to understand the costs before deploying this stack. ## Key features - [Amazon Bedrock](https://aws.amazon.com/de/bedrock/) for serverless embedding and inference - [LangChain](https://github.com/hwchase17/langchain) to orchestrate a Q&A LLM chain - [FAISS](https://github.com/facebookresearch/faiss) vector store - [Amazon DynamoDB](https://aws.amazon.com/dynamodb/) for serverless conversational memory - [AWS Lambda](https://aws.amazon.com/lambda/) for serverless compute - Frontend built in [React](https://react.dev/), [TypeScript](https://www.typescriptlang.org/), [TailwindCSS](https://tailwindcss.com/), and [Vite](https://vitejs.dev/). - Run locally or deploy to [AWS Amplify Hosting](https://aws.amazon.com/amplify/hosting/) - [Amazon Cognito](https://aws.amazon.com/cognito/) for authentication ## How the application works ![Serverless PDF Chat architecture](architecture.png "Serverless PDF Chat architecture") 1. A user uploads a PDF document into an [Amazon Simple Storage Service](https://aws.amazon.com/s3/) (S3) bucket through a static web application frontend. 1. This upload triggers a metadata extraction and document embedding process. The process converts the text in the document into vectors. The vectors are loaded into a vector index and stored in S3 for later use. 1. When a user chats with a PDF document and sends a prompt to the backend, a Lambda function retrieves the index from S3 and searches for information related to the prompt. 1. A LLM then uses the results of this vector search, previous messages in the conversation, and its general-purpose capabilities to formulate a response to the user. ## Deployment instructions ### Prerequisites - [AWS SAM CLI](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/install-sam-cli.html) - [Python](https://www.python.org/) 3.11 or greater ### Cloning the repository Clone this repository: ```bash git clone https://github.com/aws-samples/serverless-pdf-chat.git ``` ### Amazon Bedrock setup This application can be used with a variety of Amazon Bedrock models. See [Supported models in Amazon Bedrock](https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-service.html#models-supported) for a complete list. By default, this application uses **Titan Embeddings G1 - Text** to generate embeddings and **Anthropic Claude v3 Sonnet** for responses. > **Important -** > Before you can use these models with this application, **you must request access in the Amazon Bedrock console**. See the [Model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) section of the Bedrock User Guide for detailed instructions. > By default, this application is configured to use Amazon Bedrock in the `us-east-1` Region, make sure you request model access in that Region (this does not have to be the same Region that you deploy this stack to). To select your Bedrock model, specify the `ModelId` parameter during the AWS SAM deployment, such as `anthropic.claude-3-sonnet-20240229-v1:0`. See [Amazon Bedrock model IDs](https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html) for a complete list. The `ModelId` parameter is used in the GenerateResponseFunction Lambda function of your AWS SAM template to instantiate [LangChain BedrockChat](https://js.langchain.com/v0.1/docs/integrations/chat/bedrock/) and [ConversationalRetrievalChain](https://api.python.langchain.com/en/latest/chains/langchain.chains.conversational_retrieval.base.ConversationalRetrievalChain.html) objects, providing efficient retrieval of relevant context from large PDF datasets to enable the Bedrock model-generated response. ```python def bedrock_chain(faiss_index, memory, human_input, bedrock_runtime): chat = BedrockChat( model_id=MODEL_ID, model_kwargs={'temperature': 0.0} ) chain = ConversationalRetrievalChain.from_llm( llm=chat, chain_type="stuff", retriever=faiss_index.as_retriever(), memory=memory, return_source_documents=True, ) response = chain.invoke({"question": human_input}) return response ``` ### Deploy the frontend with AWS Amplify Hosting [AWS Amplify Hosting](https://aws.amazon.com/amplify/hosting/) enables a fully-managed deployment of the application's React frontend in an AWS-managed account using Amazon S3 and [Amazon CloudFront](https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/Introduction.html). You can optionally run the React frontend locally by skipping to [Deploy the application with AWS SAM](#Deploy-the-application-with-AWS-SAM). To set up Amplify Hosting: 1. Fork this GitHub repository and take note of your repository URL, for example `https://github.com/user/serverless-pdf-chat/`. 1. Create a GitHub fine-grained access token for the new repository by following [this guide](https://docs.aws.amazon.com/amplify/latest/userguide/setting-up-GitHub-access.html). For the **Repository permissions**, select **Read and write** for **Content** and **Webhooks**. 1. Create a new secret called `serverless-pdf-chat-github-token` in AWS Secrets Manager and input your fine-grained access token as plaintext. Select the **Plaintext** tab and confirm your secret looks like this: ```json github_pat_T2wyo------------------------------------------------------------------------rs0Pp ``` ### Deploy the application with AWS SAM 1. Change to the `backend` directory and [build](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-cli-command-reference-sam-build.html) the application: ```bash cd backend sam build ``` 1. [Deploy](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-cli-command-reference-sam-deploy.html) the application into your AWS account: ```bash sam deploy --guided ``` 1. For **Stack Name**, choose `serverless-pdf-chat`. 1. For **Frontend**, specify the environment ("local", "amplify") for the frontend of the application. 1. If you selected "amplify", specify the URL of the forked Git repository containing the application code. 1. Specify the Amazon Bedrock model ID. For example, `anthropic.claude-3-sonnet-20240229-v1:0`. 1. For the remaining options, keep the defaults by pressing the enter key. AWS SAM will now provision the AWS resources defined in the `backend/template.yaml` template. Once the deployment is completed successfully, you will see a set of output values similar to the following: ```bash CloudFormation outputs from deployed stack ------------------------------------------------------------------------------- Outputs ------------------------------------------------------------------------------- Key CognitoUserPool Description - Value us-east-1_gxKtRocFs Key CognitoUserPoolClient Description - Value 874ghcej99f8iuo0lgdpbrmi76k Key ApiGatewayBaseUrl Description - Value https://abcd1234.execute-api.us-east-1.amazonaws.com/dev/ ------------------------------------------------------------------------------- ``` If you selected to deploy the React frontend using Amplify Hosting, navigate to the Amplify console to check the build status. If the build does not start automatically, trigger it through the Amplify console. If you selected to run the React frontend locally and connect to the deployed resources in AWS, you will use the CloudFormation stack outputs in the following section. ### Optional: Run the React frontend locally Create a file named `.env.development` in the `frontend` directory. [Vite will use this file](https://vitejs.dev/guide/env-and-mode.html) to set up environment variables when we run the application locally. Copy the following file content and replace the values with the outputs provided by AWS SAM: ```plaintext VITE_REGION=us-east-1 VITE_API_ENDPOINT=https://abcd1234.execute-api.us-east-1.amazonaws.com/dev/ VITE_USER_POOL_ID=us-east-1_gxKtRocFs VITE_USER_POOL_CLIENT_ID=874ghcej99f8iuo0lgdpbrmi76k ``` Next, install the frontend's dependencies by running the following command in the `frontend` directory: ```bash npm ci ``` Finally, to start the application locally, run the following command in the `frontend` directory: ```bash npm run dev ``` Vite will now start the application under `http://localhost:5173`. ### Create a user in the Amazon Cognito user pool The application uses Amazon Cognito to authenticate users through a login screen. In this step, you will create a user to access the application. Perform the following steps to create a user in the Cognito user pool: 1. Navigate to the **Amazon Cognito console**. 1. Find the user pool with an ID matching the output provided by AWS SAM above. 1. Under Users, choose **Create user**. 1. Enter an email address and a password that adheres to the password requirements. 1. Choose **Create user**. Navigate back to your Amplify website URL or local host address to log in with the new user's credentials. ## Cleanup 1. Delete any secrets in AWS Secrets Manager created as part of this walkthrough. 1. [Empty the Amazon S3 bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/empty-bucket.html) created as part of the AWS SAM template. 1. Run the following command in the `backend` directory of the project to delete all associated resources resources: ```bash sam delete ``` ## Troubleshooting If you are experiencing issues when running the [`sam build`](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-cli-command-reference-sam-build.html) command, try setting the `--use-container` flag (requires Docker): ```bash sam build --use-container ``` If you are still experiencing issues despite using `--use-container`, try switching the AWS Lambda functions from `arm64` to `x86_64` in the `backend/template.yaml` (as well as switching to the `x_86_64` version of Powertools): ```yaml Globals: Function: Runtime: python3.11 Handler: main.lambda_handler Architectures: - x86_64 Tracing: Active Environment: Variables: LOG_LEVEL: INFO Layers: - !Sub arn:aws:lambda:${AWS::Region}:017000801446:layer:AWSLambdaPowertoolsPythonV2:51 ``` ## Security This application was written for demonstration and educational purposes and not for production use. The [Security Pillar of the AWS Well-Architected Framework](https://docs.aws.amazon.com/wellarchitected/latest/security-pillar/welcome.html) can support you in further adopting the sample into a production deployment in addition to your own established processes. Take note of the following: - The application uses encryption in transit and at rest with AWS-managed keys where applicable. Optionally, use [AWS KMS](https://aws.amazon.com/kms/) with [DynamoDB](https://docs.aws.amazon.com/kms/latest/developerguide/services-dynamodb.html), [SQS](https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-server-side-encryption.html), and [S3](https://docs.aws.amazon.com/kms/latest/developerguide/services-s3.html) for more control over encryption keys. - This application uses [Powertools for AWS Lambda (Python)](https://github.com/aws-powertools/powertools-lambda-python) to log to inputs and ouputs to CloudWatch Logs. Per default, this can include sensitive data contained in user input. Adjust the log level and remove log statements to fit your security requirements. - [API Gateway access logging](https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-logging.html#set-up-access-logging-using-console) and [usage plans](https://docs.aws.amazon.com/apigateway/latest/developerguide/api-gateway-api-usage-plans.html) are not activiated in this code sample. Similarly, [S3 access logging](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-s3-bucket-loggingconfig.html) is currently not enabled. - In order to simplify the setup of the demo, this solution uses AWS managed policies associated to IAM roles that contain wildcards on resources. Please consider to further scope down the policies as you see fit according to your needs. Please note that there is a resource wildcard on the AWS managed `AWSLambdaSQSQueueExecutionRole`. This is a known behaviour, see [this GitHub issue](https://github.com/aws/serverless-application-model/issues/2118) for details. - If your security controls require inspecting network traffic, consider [adjusting the AWS SAM template](https://docs.aws.amazon.com/serverless-application-model/latest/developerguide/sam-resource-function.html) to attach the Lambda functions to a VPC via its [`VpcConfig`](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-lambda-function-vpcconfig.html). See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information. ## License This library is licensed under the MIT-0 License. See the [LICENSE](LICENSE) file. ================================================ FILE: backend/.gitignore ================================================ # Created by https://www.gitignore.io/api/osx,linux,python,windows,pycharm,visualstudiocode ### SAM ### .aws-sam/ samconfig.toml ### Linux ### *~ # temporary files which can be created if a process still has a handle open of a deleted file .fuse_hidden* # KDE directory preferences .directory # Linux trash folder which might appear on any partition or disk .Trash-* # .nfs files are created when an open file is removed but is still being accessed .nfs* ### OSX ### *.DS_Store .AppleDouble .LSOverride # Icon must end with two \r Icon # Thumbnails ._* # Files that might appear in the root of a volume .DocumentRevisions-V100 .fseventsd .Spotlight-V100 .TemporaryItems .Trashes .VolumeIcon.icns .com.apple.timemachine.donotpresent # Directories potentially created on remote AFP share .AppleDB .AppleDesktop Network Trash Folder Temporary Items .apdisk ### PyCharm ### # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 # User-specific stuff: .idea/**/workspace.xml .idea/**/tasks.xml .idea/dictionaries # Sensitive or high-churn files: .idea/**/dataSources/ .idea/**/dataSources.ids .idea/**/dataSources.xml .idea/**/dataSources.local.xml .idea/**/sqlDataSources.xml .idea/**/dynamic.xml .idea/**/uiDesigner.xml # Gradle: .idea/**/gradle.xml .idea/**/libraries # CMake cmake-build-debug/ # Mongo Explorer plugin: .idea/**/mongoSettings.xml ## File-based project format: *.iws ## Plugin-specific files: # IntelliJ /out/ # mpeltonen/sbt-idea plugin .idea_modules/ # JIRA plugin atlassian-ide-plugin.xml # Cursive Clojure plugin .idea/replstate.xml # Ruby plugin and RubyMine /.rakeTasks # Crashlytics plugin (for Android Studio and IntelliJ) com_crashlytics_export_strings.xml crashlytics.properties crashlytics-build.properties fabric.properties ### PyCharm Patch ### # Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 # *.iml # modules.xml # .idea/misc.xml # *.ipr # Sonarlint plugin .idea/sonarlint ### Python ### # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache .pytest_cache/ nosetests.xml coverage.xml *.cover .hypothesis/ # Translations *.mo *.pot # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule.* # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ ### VisualStudioCode ### .vscode .vscode/* !.vscode/settings.json !.vscode/tasks.json !.vscode/launch.json !.vscode/extensions.json .history ### Windows ### # Windows thumbnail cache files Thumbs.db ehthumbs.db ehthumbs_vista.db # Folder config file Desktop.ini # Recycle Bin used on file shares $RECYCLE.BIN/ # Windows Installer files *.cab *.msi *.msm *.msp # Windows shortcuts *.lnk # Build folder */build/* # End of https://www.gitignore.io/api/osx,linux,python,windows,pycharm,visualstudiocode ================================================ FILE: backend/__init__.py ================================================ ================================================ FILE: backend/src/add_conversation/__init__.py ================================================ ================================================ FILE: backend/src/add_conversation/main.py ================================================ import os, json from datetime import datetime import boto3 import shortuuid from aws_lambda_powertools import Logger DOCUMENT_TABLE = os.environ["DOCUMENT_TABLE"] MEMORY_TABLE = os.environ["MEMORY_TABLE"] ddb = boto3.resource("dynamodb") document_table = ddb.Table(DOCUMENT_TABLE) memory_table = ddb.Table(MEMORY_TABLE) logger = Logger() @logger.inject_lambda_context(log_event=True) def lambda_handler(event, context): user_id = event["requestContext"]["authorizer"]["claims"]["sub"] document_id = event["pathParameters"]["documentid"] response = document_table.get_item( Key={"userid": user_id, "documentid": document_id} ) conversations = response["Item"]["conversations"] logger.info({"conversations": conversations}) conversation_id = shortuuid.uuid() timestamp = datetime.utcnow() timestamp_str = timestamp.strftime("%Y-%m-%dT%H:%M:%S.%fZ") conversation = { "conversationid": conversation_id, "created": timestamp_str, } conversations.append(conversation) logger.info({"conversation_new": conversation}) document_table.update_item( Key={"userid": user_id, "documentid": document_id}, UpdateExpression="SET conversations = :conversations", ExpressionAttributeValues={":conversations": conversations}, ) conversation = {"userid": user_id, "SessionId": conversation_id, "History": []} memory_table.put_item(Item=conversation) return { "statusCode": 200, "headers": { "Content-Type": "application/json", "Access-Control-Allow-Headers": "*", "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "*", }, "body": json.dumps({"conversationid": conversation_id}), } ================================================ FILE: backend/src/add_conversation/requirements.txt ================================================ shortuuid==1.0.11 ================================================ FILE: backend/src/delete_document/__init__.py ================================================ ================================================ FILE: backend/src/delete_document/main.py ================================================ import os, json import boto3 from aws_lambda_powertools import Logger DOCUMENT_TABLE = os.environ["DOCUMENT_TABLE"] MEMORY_TABLE = os.environ["MEMORY_TABLE"] BUCKET = os.environ["BUCKET"] ddb = boto3.resource("dynamodb") document_table = ddb.Table(DOCUMENT_TABLE) memory_table = ddb.Table(MEMORY_TABLE) s3 = boto3.client("s3") logger = Logger() @logger.inject_lambda_context(log_event=True) def lambda_handler(event, context): user_id = event["requestContext"]["authorizer"]["claims"]["sub"] document_id = event["pathParameters"]["documentid"] response = document_table.get_item( Key={"userid": user_id, "documentid": document_id} ) document = response["Item"] logger.info({"document": document}) logger.info("Deleting DDB items") with memory_table.batch_writer() as batch: for item in document["conversations"]: batch.delete_item(Key={"userid": user_id, "SessionId": item["conversationid"]}) document_table.delete_item( Key={"userid": user_id, "documentid": document_id} ) logger.info("Deleting S3 objects") filename = document["filename"] objects = [{"Key": f"{user_id}/{filename}/{key}"} for key in [filename, "index.faiss", "index.pkl"]] response = s3.delete_objects( Bucket=BUCKET, Delete={ "Objects": objects, "Quiet": True, }, ) logger.info({"Response": response}) return { "statusCode": 200, "headers": { "Content-Type": "application/json", "Access-Control-Allow-Headers": "*", "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "*", }, "body": json.dumps( {}, default=str, ), } ================================================ FILE: backend/src/delete_document/requirements.txt ================================================ boto3==1.28.57 botocore==1.31.57 ================================================ FILE: backend/src/generate_embeddings/__init__.py ================================================ ================================================ FILE: backend/src/generate_embeddings/main.py ================================================ import os, json import boto3 from aws_lambda_powertools import Logger from langchain.indexes import VectorstoreIndexCreator from langchain_aws.embeddings import BedrockEmbeddings from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores import FAISS DOCUMENT_TABLE = os.environ["DOCUMENT_TABLE"] BUCKET = os.environ["BUCKET"] EMBEDDING_MODEL_ID = os.environ["EMBEDDING_MODEL_ID"] s3 = boto3.client("s3") ddb = boto3.resource("dynamodb") document_table = ddb.Table(DOCUMENT_TABLE) logger = Logger() def set_doc_status(user_id, document_id, status): document_table.update_item( Key={"userid": user_id, "documentid": document_id}, UpdateExpression="SET docstatus = :docstatus", ExpressionAttributeValues={":docstatus": status}, ) @logger.inject_lambda_context(log_event=True) def lambda_handler(event, context): event_body = json.loads(event["Records"][0]["body"]) document_id = event_body["documentid"] user_id = event_body["user"] key = event_body["key"] file_name_full = key.split("/")[-1] set_doc_status(user_id, document_id, "PROCESSING") s3.download_file(BUCKET, key, f"/tmp/{file_name_full}") loader = PyPDFLoader(f"/tmp/{file_name_full}") bedrock_runtime = boto3.client( service_name="bedrock-runtime", region_name="us-east-1", ) embeddings = BedrockEmbeddings( model_id=EMBEDDING_MODEL_ID, client=bedrock_runtime, region_name="us-east-1", ) index_creator = VectorstoreIndexCreator( vectorstore_cls=FAISS, embedding=embeddings, ) index_from_loader = index_creator.from_loaders([loader]) index_from_loader.vectorstore.save_local("/tmp") s3.upload_file( "/tmp/index.faiss", BUCKET, f"{user_id}/{file_name_full}/index.faiss" ) s3.upload_file("/tmp/index.pkl", BUCKET, f"{user_id}/{file_name_full}/index.pkl") set_doc_status(user_id, document_id, "READY") ================================================ FILE: backend/src/generate_embeddings/requirements.txt ================================================ boto3 botocore faiss-cpu==1.7.4 langchain==0.3.21 langchain-community==0.3.27 langchain-aws==0.2.17 pypdf==3.17.0 urllib3 ================================================ FILE: backend/src/generate_presigned_url/__init__.py ================================================ ================================================ FILE: backend/src/generate_presigned_url/main.py ================================================ import os, json import boto3 from botocore.config import Config import shortuuid from aws_lambda_powertools import Logger BUCKET = os.environ["BUCKET"] REGION = os.environ["REGION"] s3 = boto3.client( "s3", endpoint_url=f"https://s3.{REGION}.amazonaws.com", config=Config( s3={"addressing_style": "virtual"}, region_name=REGION, signature_version="s3v4" ), ) logger = Logger() def s3_key_exists(bucket, key): try: s3.head_object(Bucket=bucket, Key=key) return True except: return False @logger.inject_lambda_context(log_event=True) def lambda_handler(event, context): user_id = event["requestContext"]["authorizer"]["claims"]["sub"] file_name_full = event["queryStringParameters"]["file_name"] file_name = file_name_full.split(".pdf")[0] exists = s3_key_exists(BUCKET, f"{user_id}/{file_name_full}/{file_name_full}") logger.info( { "user_id": user_id, "file_name_full": file_name_full, "file_name": file_name, "exists": exists, } ) if exists: suffix = shortuuid.ShortUUID().random(length=4) key = f"{user_id}/{file_name}-{suffix}.pdf/{file_name}-{suffix}.pdf" else: key = f"{user_id}/{file_name}.pdf/{file_name}.pdf" presigned_url = s3.generate_presigned_url( ClientMethod="put_object", Params={ "Bucket": BUCKET, "Key": key, "ContentType": "application/pdf", }, ExpiresIn=300, HttpMethod="PUT", ) return { "statusCode": 200, "headers": { "Content-Type": "application/json", "Access-Control-Allow-Headers": "*", "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "*", }, "body": json.dumps({"presignedurl": presigned_url}), } ================================================ FILE: backend/src/generate_presigned_url/requirements.txt ================================================ boto3==1.28.57 botocore==1.31.57 shortuuid==1.0.11 ================================================ FILE: backend/src/generate_response/__init__.py ================================================ ================================================ FILE: backend/src/generate_response/main.py ================================================ import os import json import boto3 from aws_lambda_powertools import Logger from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from langchain_community.chat_message_histories import DynamoDBChatMessageHistory from langchain_community.vectorstores import FAISS from langchain_aws.chat_models import ChatBedrock from langchain_aws.embeddings import BedrockEmbeddings MEMORY_TABLE = os.environ["MEMORY_TABLE"] BUCKET = os.environ["BUCKET"] MODEL_ID = os.environ["MODEL_ID"] EMBEDDING_MODEL_ID = os.environ["EMBEDDING_MODEL_ID"] s3 = boto3.client("s3") logger = Logger() def get_embeddings(): bedrock_runtime = boto3.client( service_name="bedrock-runtime", region_name="us-east-1", ) embeddings = BedrockEmbeddings( model_id=EMBEDDING_MODEL_ID, client=bedrock_runtime, region_name="us-east-1", ) return embeddings def get_faiss_index(embeddings, user, file_name): s3.download_file(BUCKET, f"{user}/{file_name}/index.faiss", "/tmp/index.faiss") s3.download_file(BUCKET, f"{user}/{file_name}/index.pkl", "/tmp/index.pkl") faiss_index = FAISS.load_local("/tmp", embeddings, allow_dangerous_deserialization=True) return faiss_index def create_memory(user_id, conversation_id): message_history = DynamoDBChatMessageHistory( table_name=MEMORY_TABLE, session_id=conversation_id, key={"userid": user_id, "SessionId":conversation_id} ) memory = ConversationBufferMemory( memory_key="chat_history", chat_memory=message_history, input_key="question", output_key="answer", return_messages=True, ) return memory def bedrock_chain(faiss_index, memory, human_input, bedrock_runtime): chat = ChatBedrock( model_id=MODEL_ID, model_kwargs={'temperature': 0.0} ) chain = ConversationalRetrievalChain.from_llm( llm=chat, chain_type="stuff", retriever=faiss_index.as_retriever(), memory=memory, return_source_documents=True, ) response = chain.invoke({"question": human_input}) return response @logger.inject_lambda_context(log_event=True) def lambda_handler(event, context): event_body = json.loads(event["body"]) file_name = event_body["fileName"] human_input = event_body["prompt"] conversation_id = event["pathParameters"]["conversationid"] user = event["requestContext"]["authorizer"]["claims"]["sub"] embeddings = get_embeddings() faiss_index = get_faiss_index(embeddings, user, file_name) memory = create_memory(user, conversation_id) bedrock_runtime = boto3.client( service_name="bedrock-runtime", region_name="us-east-1", ) response = bedrock_chain(faiss_index, memory, human_input, bedrock_runtime) if response: print(f"{MODEL_ID} -\nPrompt: {human_input}\n\nResponse: {response['answer']}") else: raise ValueError(f"Unsupported model ID: {MODEL_ID}") logger.info(str(response['answer'])) return { "statusCode": 200, "headers": { "Content-Type": "application/json", "Access-Control-Allow-Headers": "*", "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "*", }, "body": json.dumps(response['answer']), } ================================================ FILE: backend/src/generate_response/requirements.txt ================================================ boto3 botocore faiss-cpu==1.7.4 langchain==0.3.21 langchain-community==0.3.27 langchain-aws==0.2.17 urllib3 ================================================ FILE: backend/src/get_all_documents/__init__.py ================================================ ================================================ FILE: backend/src/get_all_documents/main.py ================================================ import os, json import boto3 from boto3.dynamodb.conditions import Key from aws_lambda_powertools import Logger DOCUMENT_TABLE = os.environ["DOCUMENT_TABLE"] ddb = boto3.resource("dynamodb") document_table = ddb.Table(DOCUMENT_TABLE) logger = Logger() @logger.inject_lambda_context(log_event=True) def lambda_handler(event, context): user_id = event["requestContext"]["authorizer"]["claims"]["sub"] response = document_table.query(KeyConditionExpression=Key("userid").eq(user_id)) items = sorted(response["Items"], key=lambda item: item["created"], reverse=True) for item in items: item["conversations"] = sorted( item["conversations"], key=lambda conv: conv["created"], reverse=True ) logger.info({"items": items}) return { "statusCode": 200, "headers": { "Content-Type": "application/json", "Access-Control-Allow-Headers": "*", "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "*", }, "body": json.dumps(items, default=str), } ================================================ FILE: backend/src/get_document/__init__.py ================================================ ================================================ FILE: backend/src/get_document/main.py ================================================ import os, json import boto3 from boto3.dynamodb.conditions import Key from aws_lambda_powertools import Logger DOCUMENT_TABLE = os.environ["DOCUMENT_TABLE"] MEMORY_TABLE = os.environ["MEMORY_TABLE"] ddb = boto3.resource("dynamodb") document_table = ddb.Table(DOCUMENT_TABLE) memory_table = ddb.Table(MEMORY_TABLE) logger = Logger() @logger.inject_lambda_context(log_event=True) def lambda_handler(event, context): user_id = event["requestContext"]["authorizer"]["claims"]["sub"] document_id = event["pathParameters"]["documentid"] conversation_id = event["pathParameters"]["conversationid"] response = document_table.get_item( Key={"userid": user_id, "documentid": document_id} ) document = response["Item"] document["conversations"] = sorted( document["conversations"], key=lambda conv: conv["created"], reverse=True ) logger.info({"document": document}) response = memory_table.get_item(Key={"userid": user_id, "SessionId": conversation_id}) if not "Item" in response: return { "statusCode": 403 } messages = response["Item"]["History"] logger.info({"messages": messages}) return { "statusCode": 200, "headers": { "Content-Type": "application/json", "Access-Control-Allow-Headers": "*", "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Methods": "*", }, "body": json.dumps( { "conversationid": conversation_id, "document": document, "messages": messages, }, default=str, ), } ================================================ FILE: backend/src/upload_trigger/__init__.py ================================================ ================================================ FILE: backend/src/upload_trigger/main.py ================================================ import os, json from datetime import datetime import boto3 import PyPDF2 import shortuuid import urllib from aws_lambda_powertools import Logger DOCUMENT_TABLE = os.environ["DOCUMENT_TABLE"] MEMORY_TABLE = os.environ["MEMORY_TABLE"] QUEUE = os.environ["QUEUE"] BUCKET = os.environ["BUCKET"] ddb = boto3.resource("dynamodb") document_table = ddb.Table(DOCUMENT_TABLE) memory_table = ddb.Table(MEMORY_TABLE) sqs = boto3.client("sqs") s3 = boto3.client("s3") logger = Logger() @logger.inject_lambda_context(log_event=True) def lambda_handler(event, context): key = urllib.parse.unquote_plus(event["Records"][0]["s3"]["object"]["key"]) split = key.split("/") user_id = split[0] file_name = split[1] document_id = shortuuid.uuid() s3.download_file(BUCKET, key, f"/tmp/{file_name}") with open(f"/tmp/{file_name}", "rb") as f: reader = PyPDF2.PdfReader(f) pages = str(len(reader.pages)) conversation_id = shortuuid.uuid() timestamp = datetime.utcnow() timestamp_str = timestamp.strftime("%Y-%m-%dT%H:%M:%S.%fZ") document = { "userid": user_id, "documentid": document_id, "filename": file_name, "created": timestamp_str, "pages": pages, "filesize": str(event["Records"][0]["s3"]["object"]["size"]), "docstatus": "UPLOADED", "conversations": [], } conversation = {"conversationid": conversation_id, "created": timestamp_str} document["conversations"].append(conversation) document_table.put_item(Item=document) conversation = {"userid": user_id, "SessionId": conversation_id, "History": []} memory_table.put_item(Item=conversation) message = { "documentid": document_id, "key": key, "user": user_id, } sqs.send_message(QueueUrl=QUEUE, MessageBody=json.dumps(message)) ================================================ FILE: backend/src/upload_trigger/requirements.txt ================================================ boto3==1.28.57 botocore==1.31.57 PyPDF2==3.0.1 shortuuid==1.0.11 ================================================ FILE: backend/template.yaml ================================================ AWSTemplateFormatVersion: "2010-09-09" Transform: AWS::Serverless-2016-10-31 Description: > serverless-pdf-chat SAM Template for serverless-pdf-chat Globals: Function: Runtime: python3.11 Handler: main.lambda_handler Architectures: - arm64 Tracing: Active Environment: Variables: LOG_LEVEL: INFO Layers: - !Sub arn:aws:lambda:${AWS::Region}:017000801446:layer:AWSLambdaPowertoolsPythonV3-python311-arm64:7 Parameters: Frontend: Default: amplify Type: String AllowedValues: - local - amplify Repository: Type: String ModelId: Default: "anthropic.claude-3-sonnet-20240229-v1:0" Type: String EmbeddingModelId: Default: "amazon.titan-embed-text-v2:0" Type: String Conditions: DeployToAmplifyHosting: !Equals - !Ref Frontend - amplify Resources: DocumentBucket: Type: "AWS::S3::Bucket" Properties: BucketName: !Sub "${AWS::StackName}-${AWS::Region}-${AWS::AccountId}" CorsConfiguration: CorsRules: - AllowedHeaders: - "*" AllowedMethods: - GET - PUT - HEAD - POST - DELETE AllowedOrigins: - "*" PublicAccessBlockConfiguration: BlockPublicAcls: true BlockPublicPolicy: true IgnorePublicAcls: true RestrictPublicBuckets: true DocumentBucketPolicy: Type: "AWS::S3::BucketPolicy" Properties: PolicyDocument: Id: EnforceHttpsPolicy Version: "2012-10-17" Statement: - Sid: EnforceHttpsSid Effect: Deny Principal: "*" Action: "s3:*" Resource: - !Sub "arn:aws:s3:::${DocumentBucket}/*" - !Sub "arn:aws:s3:::${DocumentBucket}" Condition: Bool: "aws:SecureTransport": "false" Bucket: !Ref DocumentBucket EmbeddingQueue: Type: AWS::SQS::Queue DeletionPolicy: Delete UpdateReplacePolicy: Delete Properties: VisibilityTimeout: 180 MessageRetentionPeriod: 3600 EmbeddingQueuePolicy: Type: AWS::SQS::QueuePolicy Properties: Queues: - !Ref EmbeddingQueue PolicyDocument: Version: "2012-10-17" Id: SecureTransportPolicy Statement: - Sid: AllowSecureTransportOnly Effect: Deny Principal: "*" Action: "SQS:*" Resource: "*" Condition: Bool: aws:SecureTransport: false DocumentTable: Type: AWS::DynamoDB::Table DeletionPolicy: Delete UpdateReplacePolicy: Delete Properties: KeySchema: - AttributeName: userid KeyType: HASH - AttributeName: documentid KeyType: RANGE AttributeDefinitions: - AttributeName: userid AttributeType: S - AttributeName: documentid AttributeType: S BillingMode: PAY_PER_REQUEST MemoryTable: Type: AWS::DynamoDB::Table DeletionPolicy: Delete UpdateReplacePolicy: Delete Properties: KeySchema: - AttributeName: userid KeyType: HASH - AttributeName: SessionId KeyType: RANGE AttributeDefinitions: - AttributeName: userid AttributeType: S - AttributeName: SessionId AttributeType: S BillingMode: PAY_PER_REQUEST CognitoUserPool: Type: AWS::Cognito::UserPool DeletionPolicy: Delete UpdateReplacePolicy: Delete Properties: AutoVerifiedAttributes: - email UsernameAttributes: - email AdminCreateUserConfig: AllowAdminCreateUserOnly: true Policies: PasswordPolicy: MinimumLength: 8 RequireLowercase: true RequireNumbers: true RequireSymbols: true RequireUppercase: true CognitoUserPoolClient: Type: AWS::Cognito::UserPoolClient Properties: UserPoolId: !Ref CognitoUserPool ClientName: !Ref CognitoUserPool GenerateSecret: false Api: Type: AWS::Serverless::Api Properties: StageName: dev Auth: DefaultAuthorizer: CognitoAuthorizer AddDefaultAuthorizerToCorsPreflight: false Authorizers: CognitoAuthorizer: UserPoolArn: !GetAtt CognitoUserPool.Arn Cors: AllowOrigin: "'*'" AllowHeaders: "'*'" AllowMethods: "'*'" GeneratePresignedUrlFunction: Type: AWS::Serverless::Function Properties: CodeUri: src/generate_presigned_url/ Policies: - S3CrudPolicy: BucketName: !Ref DocumentBucket Environment: Variables: BUCKET: !Ref DocumentBucket REGION: !Sub ${AWS::Region} Events: Root: Type: Api Properties: RestApiId: !Ref Api Path: /generate_presigned_url Method: GET UploadTriggerFunction: Type: AWS::Serverless::Function Properties: CodeUri: src/upload_trigger/ Policies: - DynamoDBCrudPolicy: TableName: !Ref DocumentTable - DynamoDBCrudPolicy: TableName: !Ref MemoryTable - S3ReadPolicy: BucketName: !Sub "${AWS::StackName}-${AWS::Region}-${AWS::AccountId}*" - SQSSendMessagePolicy: QueueName: !GetAtt EmbeddingQueue.QueueName Environment: Variables: DOCUMENT_TABLE: !Ref DocumentTable MEMORY_TABLE: !Ref MemoryTable QUEUE: !GetAtt EmbeddingQueue.QueueName BUCKET: !Sub "${AWS::StackName}-${AWS::Region}-${AWS::AccountId}" Events: S3Event: Type: S3 Properties: Bucket: !Ref DocumentBucket Events: - s3:ObjectCreated:* Filter: S3Key: Rules: - Name: suffix Value: .pdf GetDocumentFunction: Type: AWS::Serverless::Function Properties: CodeUri: src/get_document/ Policies: - DynamoDBReadPolicy: TableName: !Ref DocumentTable - DynamoDBReadPolicy: TableName: !Ref MemoryTable Environment: Variables: DOCUMENT_TABLE: !Ref DocumentTable MEMORY_TABLE: !Ref MemoryTable Events: Root: Type: Api Properties: RestApiId: !Ref Api Path: /doc/{documentid}/{conversationid} Method: GET GetAllDocuments: Type: AWS::Serverless::Function Properties: CodeUri: src/get_all_documents/ Policies: - DynamoDBReadPolicy: TableName: !Ref DocumentTable Environment: Variables: DOCUMENT_TABLE: !Ref DocumentTable Events: Root: Type: Api Properties: RestApiId: !Ref Api Path: /doc Method: GET AddConversationFunction: Type: AWS::Serverless::Function Properties: CodeUri: src/add_conversation/ Policies: - DynamoDBCrudPolicy: TableName: !Ref DocumentTable - DynamoDBCrudPolicy: TableName: !Ref MemoryTable Environment: Variables: DOCUMENT_TABLE: !Ref DocumentTable MEMORY_TABLE: !Ref MemoryTable Events: Root: Type: Api Properties: RestApiId: !Ref Api Path: /doc/{documentid} Method: POST GenerateEmbeddingsFunction: Type: AWS::Serverless::Function Properties: CodeUri: src/generate_embeddings/ Timeout: 180 MemorySize: 2048 Policies: - SQSPollerPolicy: QueueName: !GetAtt EmbeddingQueue.QueueName - S3CrudPolicy: BucketName: !Ref DocumentBucket - DynamoDBCrudPolicy: TableName: !Ref DocumentTable - Statement: - Sid: "BedrockScopedAccess" Effect: "Allow" Action: "bedrock:InvokeModel" Resource: !Sub "arn:aws:bedrock:*::foundation-model/${EmbeddingModelId}" Environment: Variables: DOCUMENT_TABLE: !Ref DocumentTable BUCKET: !Ref DocumentBucket EMBEDDING_MODEL_ID: !Ref EmbeddingModelId Events: EmbeddingQueueEvent: Type: SQS Properties: Queue: !GetAtt EmbeddingQueue.Arn BatchSize: 1 GenerateResponseFunction: Type: AWS::Serverless::Function Properties: CodeUri: src/generate_response/ Timeout: 30 MemorySize: 2048 Policies: - DynamoDBCrudPolicy: TableName: !Ref MemoryTable - S3CrudPolicy: BucketName: !Ref DocumentBucket - Statement: - Sid: "BedrockScopedAccess" Effect: "Allow" Action: "bedrock:InvokeModel" Resource: - !Sub "arn:aws:bedrock:*::foundation-model/${ModelId}" - !Sub "arn:aws:bedrock:*::foundation-model/${EmbeddingModelId}" Environment: Variables: MEMORY_TABLE: !Ref MemoryTable BUCKET: !Ref DocumentBucket MODEL_ID: !Ref ModelId EMBEDDING_MODEL_ID: !Ref EmbeddingModelId Events: Root: Type: Api Properties: RestApiId: !Ref Api Path: /{documentid}/{conversationid} Method: POST DeleteDocumentFunction: Type: AWS::Serverless::Function Properties: CodeUri: src/delete_document/ Policies: - DynamoDBCrudPolicy: TableName: !Ref DocumentTable - DynamoDBCrudPolicy: TableName: !Ref MemoryTable - S3CrudPolicy: BucketName: !Sub "${AWS::StackName}-${AWS::Region}-${AWS::AccountId}*" Environment: Variables: DOCUMENT_TABLE: !Ref DocumentTable MEMORY_TABLE: !Ref MemoryTable BUCKET: !Sub "${AWS::StackName}-${AWS::Region}-${AWS::AccountId}" Events: Root: Type: Api Properties: RestApiId: !Ref Api Path: /doc/{documentid} Method: DELETE AmplifyApp: Type: AWS::Amplify::App Condition: DeployToAmplifyHosting Properties: Name: !Sub "${AWS::StackName}-${AWS::Region}-${AWS::AccountId}" Repository: !Ref Repository BuildSpec: | version: 1 applications: - frontend: phases: preBuild: commands: - npm ci build: commands: - npm run build artifacts: baseDirectory: dist files: - '**/*' cache: paths: - node_modules/**/* appRoot: frontend AccessToken: "{{resolve:secretsmanager:serverless-pdf-chat-github-token}}" EnvironmentVariables: - Name: AMPLIFY_MONOREPO_APP_ROOT Value: frontend - Name: VITE_REGION Value: !Ref AWS::Region - Name: VITE_API_ENDPOINT Value: !Sub "https://${Api}.execute-api.${AWS::Region}.${AWS::URLSuffix}/dev/" - Name: VITE_USER_POOL_ID Value: !Ref CognitoUserPool - Name: VITE_USER_POOL_CLIENT_ID Value: !Ref CognitoUserPoolClient AmplifyBranch: Type: AWS::Amplify::Branch Condition: DeployToAmplifyHosting Properties: BranchName: main AppId: !GetAtt AmplifyApp.AppId EnableAutoBuild: true Stage: PRODUCTION Outputs: CognitoUserPool: Value: !Ref CognitoUserPool CognitoUserPoolClient: Value: !Ref CognitoUserPoolClient ApiGatewayBaseUrl: Value: !Sub "https://${Api}.execute-api.${AWS::Region}.${AWS::URLSuffix}/dev/" ================================================ FILE: frontend/.eslintrc.cjs ================================================ module.exports = { env: { browser: true, es2020: true }, extends: [ 'eslint:recommended', 'plugin:@typescript-eslint/recommended', 'plugin:react-hooks/recommended', ], parser: '@typescript-eslint/parser', parserOptions: { ecmaVersion: 'latest', sourceType: 'module' }, plugins: ['react-refresh'], rules: { 'react-refresh/only-export-components': 'warn', }, } ================================================ FILE: frontend/.gitignore ================================================ # Logs logs *.log npm-debug.log* yarn-debug.log* yarn-error.log* pnpm-debug.log* lerna-debug.log* node_modules dist dist-ssr *.local # Editor directories and files .vscode/* !.vscode/extensions.json .idea .DS_Store *.suo *.ntvs* *.njsproj *.sln *.sw? # Vite .env.development ================================================ FILE: frontend/index.html ================================================ DocChat - Chat with a PDF
================================================ FILE: frontend/package.json ================================================ { "name": "frontend", "private": true, "version": "0.0.0", "type": "module", "scripts": { "dev": "vite", "build": "tsc && vite build", "lint": "eslint src --ext ts,tsx --report-unused-disable-directives --max-warnings 0", "preview": "vite preview" }, "dependencies": { "@aws-amplify/ui-react": "^6.1.14", "@headlessui/react": "^1.7.15", "@heroicons/react": "^2.0.18", "aws-amplify": "^6.5.0", "date-fns": "^2.30.0", "filesize": "^10.0.7", "react": "^18.2.0", "react-dom": "^18.2.0", "react-router-dom": "^7.5.2" }, "devDependencies": { "@tailwindcss/typography": "^0.5.9", "@types/react": "^18.0.37", "@types/react-dom": "^18.0.11", "@typescript-eslint/eslint-plugin": "^5.59.0", "@typescript-eslint/parser": "^5.59.0", "@vitejs/plugin-react": "^4.3.4", "autoprefixer": "^10.4.14", "eslint": "^8.38.0", "eslint-plugin-react-hooks": "^4.6.0", "eslint-plugin-react-refresh": "^0.3.4", "postcss": "^8.4.24", "tailwindcss": "^3.3.2", "typescript": "^5.0.2", "vite": "^6.4.1" } } ================================================ FILE: frontend/postcss.config.js ================================================ export default { plugins: { tailwindcss: {}, autoprefixer: {}, }, } ================================================ FILE: frontend/src/App.tsx ================================================ import { Amplify } from "aws-amplify"; import { fetchAuthSession } from "aws-amplify/auth"; import { withAuthenticator } from "@aws-amplify/ui-react"; import { createBrowserRouter, RouterProvider } from "react-router-dom"; import "./index.css"; import Layout from "./routes/layout"; import Documents from "./routes/documents"; import Chat from "./routes/chat"; Amplify.configure({ Auth: { Cognito: { userPoolId: import.meta.env.VITE_USER_POOL_ID, userPoolClientId: import.meta.env.VITE_USER_POOL_CLIENT_ID, }, }, API: { REST: { "serverless-pdf-chat": { endpoint: import.meta.env.VITE_API_ENDPOINT, region: import.meta.env.VITE_API_REGION, }, }, }}, { API: { REST: { headers: async () => { const tokens = (await fetchAuthSession()).tokens; const jwt = tokens?.idToken?.toString(); return { "authorization": `Bearer ${jwt}` }; } } } }); const router = createBrowserRouter([ { path: "/", element: , children: [ { index: true, Component: Documents, }, { path: "/doc/:documentid/:conversationid", Component: Chat, }, ], }, ]); function App() { return ; } export default withAuthenticator(App, { hideSignUp: true }); ================================================ FILE: frontend/src/common/types.ts ================================================ export interface Document { documentid: string; userid: string; filename: string; filesize: string; docstatus: string; created: string; pages: string; conversations: { conversationid: string; created: string; }[]; } export interface Conversation { conversationid: string; document: Document; messages: { type: string; data: { content: string; example: boolean; additional_kwargs: {}; }; }[]; } ================================================ FILE: frontend/src/common/utilities.ts ================================================ import { format } from "date-fns"; export function getDateTime(date: string): string { return format(new Date(date), "MMMM d, yyyy - H:mm"); } ================================================ FILE: frontend/src/components/ChatMessages.tsx ================================================ import { PaperAirplaneIcon } from "@heroicons/react/24/outline"; import Loading from "../../public/loading-dots.svg"; import { Conversation } from "../common/types"; interface ChatMessagesProps { conversation: Conversation; messageStatus: string; handlePromptChange: (event: React.ChangeEvent) => void; handleKeyPress: (event: React.KeyboardEvent) => void; prompt: string; submitMessage: () => Promise; } const ChatMessages: React.FC = ({ prompt, conversation, messageStatus, submitMessage, handlePromptChange, handleKeyPress, }) => { return (
{conversation.messages.map((message, i) => (

{message.data.content}

))} {messageStatus === "loading" && (
)}
{messageStatus === "idle" && ( )} {messageStatus === "loading" && ( )}
); }; export default ChatMessages; ================================================ FILE: frontend/src/components/ChatSidebar.tsx ================================================ import DocumentDetail from "./DocumentDetail"; import { Conversation } from "../common/types"; import { getDateTime } from "../common/utilities"; import { Params } from "react-router-dom"; import { ChatBubbleLeftRightIcon, PlusCircleIcon, } from "@heroicons/react/24/outline"; interface ChatSidebarProps { conversation: Conversation; params: Params; addConversation: () => Promise; switchConversation: (e: React.MouseEvent) => void; conversationListStatus: "idle" | "loading"; } const ChatSidebar: React.FC = ({ conversation, params, addConversation, switchConversation, conversationListStatus, }) => { return (
{conversationListStatus === "idle" && ( )} {conversationListStatus === "loading" && ( )} {conversation && conversation.document.conversations.map((conversation, i) => (
{params.conversationid === conversation.conversationid && ( )} {params.conversationid !== conversation.conversationid && ( )}
))}
); }; export default ChatSidebar; ================================================ FILE: frontend/src/components/DocumentDetail.tsx ================================================ import { Document } from "../common/types"; import { getDateTime } from "../common/utilities"; import { filesize } from "filesize"; import { DocumentIcon, CircleStackIcon, ClockIcon, CheckCircleIcon, CloudIcon, CogIcon, TrashIcon, } from "@heroicons/react/24/outline"; import { del } from "aws-amplify/api"; import {useNavigate} from "react-router-dom"; import {useState} from "react"; interface DocumentDetailProps { document: Document; onDocumentDeleted?: (document?: Document) => void; } const DocumentDetail: React.FC = ({document, onDocumentDeleted}) => { const navigate = useNavigate(); const [deleteStatus, setDeleteStatus] = useState("idle"); const deleteDocument = async (event: React.MouseEvent) => { event.preventDefault(); setDeleteStatus("deleting"); await del({ apiName: "serverless-pdf-chat", path: `doc/${document.documentid}`, }).response; setDeleteStatus("idle"); if (onDocumentDeleted) onDocumentDeleted(document); else navigate(`/`); }; return ( <>

{document.filename}

{document.pages} pages
{filesize(Number(document.filesize)).toString()}
{getDateTime(document.created)}
{document.docstatus === "UPLOADED" && (
Awaiting processing
)} {document.docstatus === "PROCESSING" && (
Processing document
)} {document.docstatus === "READY" && (
Ready to chat
)}
); }; export default DocumentDetail; ================================================ FILE: frontend/src/components/DocumentList.tsx ================================================ import { useState, useEffect } from "react"; import { get } from "aws-amplify/api"; import { Link } from "react-router-dom"; import DocumentDetail from "./DocumentDetail"; import { ArrowPathRoundedSquareIcon } from "@heroicons/react/24/outline"; import { Document } from "../common/types"; import Loading from "../../public/loading-grid.svg"; const DocumentList: React.FC = () => { const [documents, setDocuments] = useState([]); const [listStatus, setListStatus] = useState("idle"); const fetchData = async () => { setListStatus("loading"); const response = await get({ apiName: "serverless-pdf-chat", path:"doc" }).response; const docs = await response.body.json() as unknown as Document[] setListStatus("idle"); setDocuments(docs); }; useEffect(() => { fetchData(); }, []); return (

My documents

{documents && documents.length > 0 && documents.map((document: Document) => ( ))}
{listStatus === "idle" && documents.length === 0 && (

There's nothing here yet...

Upload your first document to get started!

)} {listStatus === "loading" && documents.length === 0 && (
)}
); }; export default DocumentList; ================================================ FILE: frontend/src/components/DocumentUploader.tsx ================================================ import { ChangeEvent, useState, useEffect } from "react"; import { get } from "aws-amplify/api"; import { filesize } from "filesize"; import { DocumentIcon, CheckCircleIcon, CloudArrowUpIcon, XCircleIcon, ArrowLeftCircleIcon, } from "@heroicons/react/24/outline"; interface DocumentUploaderProps { onDocumentUploaded?:() => void } const DocumentUploader: React.FC = ({onDocumentUploaded}) => { const [inputStatus, setInputStatus] = useState("idle"); const [buttonStatus, setButtonStatus] = useState("ready"); const [selectedFile, setSelectedFile] = useState(null); useEffect(() => { if (selectedFile) { if (selectedFile.type === "application/pdf") { setInputStatus("valid"); } else { setSelectedFile(null); } } }, [selectedFile]); const handleFileChange = (event: ChangeEvent) => { const file = event.target.files?.[0]; setSelectedFile(file || null); }; const uploadFile = async () => { if(selectedFile) { setButtonStatus("uploading"); const response = await get({ apiName: "serverless-pdf-chat", path: "generate_presigned_url", options: { headers: { "Content-Type": "application/json" }, queryParams: { "file_name": selectedFile?.name } }, }).response const presignedUrl = await response.body.json() as { presignedurl: string } fetch(presignedUrl?.presignedurl, { method: "PUT", body: selectedFile, headers: { "Content-Type": "application/pdf" }, }).then(() => { setButtonStatus("success"); if (onDocumentUploaded) onDocumentUploaded(); }); } }; const resetInput = () => { setSelectedFile(null); setInputStatus("idle"); setButtonStatus("ready"); }; return (

Add document

{inputStatus === "idle" && (
)} {inputStatus === "valid" && (
<>

{selectedFile?.name}

{filesize(selectedFile ? selectedFile.size : 0).toString()}

{buttonStatus === "ready" && ( )} {buttonStatus === "uploading" && ( )} {buttonStatus === "success" && ( )} {buttonStatus === "ready" && ( )} {buttonStatus === "uploading" && ( )} {buttonStatus === "success" && ( )}
)}
); }; export default DocumentUploader; ================================================ FILE: frontend/src/components/Footer.tsx ================================================ import { CloudIcon } from "@heroicons/react/24/outline"; import GitHub from "../../public/github.svg"; const Footer: React.FC = () => { return (
); }; export default Footer; ================================================ FILE: frontend/src/components/Navigation.tsx ================================================ import { Link } from "react-router-dom"; import { Menu } from "@headlessui/react"; import { ArrowLeftOnRectangleIcon, ChevronDownIcon, } from "@heroicons/react/24/outline"; import { ChatBubbleLeftRightIcon } from "@heroicons/react/24/solid"; interface NavigationProps { userInfo: any; handleSignOutClick: ( event: React.MouseEvent ) => Promise; } const Navigation: React.FC = ({ userInfo, handleSignOutClick, }: NavigationProps) => { return ( ); }; export default Navigation; ================================================ FILE: frontend/src/index.css ================================================ @tailwind base; @tailwind components; @tailwind utilities; ================================================ FILE: frontend/src/main.tsx ================================================ import * as React from "react"; import * as ReactDOM from "react-dom/client"; import App from "./App"; import "@aws-amplify/ui-react/styles.css"; ReactDOM.createRoot(document.getElementById("root")!).render( ); ================================================ FILE: frontend/src/routes/chat.tsx ================================================ import React, { useState, useEffect, KeyboardEvent } from "react"; import { useParams, useNavigate } from "react-router-dom"; import { get, post } from "aws-amplify/api"; import { Conversation } from "../common/types"; import ChatSidebar from "../components/ChatSidebar"; import ChatMessages from "../components/ChatMessages"; import LoadingGrid from "../../public/loading-grid.svg"; const Document: React.FC = () => { const params = useParams(); const navigate = useNavigate(); const [conversation, setConversation] = useState(null); const [loading, setLoading] = React.useState("idle"); const [messageStatus, setMessageStatus] = useState("idle"); const [conversationListStatus, setConversationListStatus] = useState< "idle" | "loading" >("idle"); const [prompt, setPrompt] = useState(""); const fetchData = async (conversationid = params.conversationid) => { setLoading("loading"); const response = await get({ apiName: "serverless-pdf-chat", path: `doc/${params.documentid}/${conversationid}` }).response const conversation = await response.body.json() as unknown as Conversation setConversation(conversation); setLoading("idle"); console.log("Foo") }; useEffect(() => { fetchData(); }, []); const handlePromptChange = (event: React.ChangeEvent) => { setPrompt(event.target.value); }; const addConversation = async () => { setConversationListStatus("loading"); const response = await post({ apiName: "serverless-pdf-chat", path: `doc/${params.documentid}` }).response; const newConversation = await response.body.json() as unknown as Conversation; fetchData(newConversation.conversationid); navigate(`/doc/${params.documentid}/${newConversation.conversationid}`); setConversationListStatus("idle"); }; const switchConversation = (e: React.MouseEvent) => { const targetButton = e.target as HTMLButtonElement; navigate(`/doc/${params.documentid}/${targetButton.id}`); fetchData(targetButton.id); }; const handleKeyPress = (event: KeyboardEvent) => { if (event.key == "Enter") { submitMessage(); } }; const submitMessage = async () => { setMessageStatus("loading"); if (conversation !== null) { const previewMessage = { type: "text", data: { content: prompt, additional_kwargs: {}, example: false, }, }; const updatedConversation = { ...conversation, messages: [...conversation.messages, previewMessage], }; setConversation(updatedConversation); await post({ apiName: "serverless-pdf-chat", path: `${conversation?.document.documentid}/${conversation?.conversationid}`, options: { body: { fileName: conversation?.document.filename, prompt: prompt, } } }).response; setPrompt(""); fetchData(conversation?.conversationid); setMessageStatus("idle"); } }; return (
{loading === "loading" && !conversation && (
)} {conversation && (
)}
); }; export default Document; ================================================ FILE: frontend/src/routes/documents.tsx ================================================ import React, {useState} from "react"; import DocumentUploader from "../components/DocumentUploader"; import DocumentList from "../components/DocumentList"; const Documents: React.FC = () => { const [documentListKey, setDocumentListKey] = useState(1); const reloadDocuments = () => { setTimeout(() =>setDocumentListKey(Math.random()), 1000); } return ( <> ); }; export default Documents; ================================================ FILE: frontend/src/routes/layout.tsx ================================================ import { Outlet } from "react-router-dom"; import { useEffect, useState } from "react"; import { signOut, fetchUserAttributes } from "aws-amplify/auth"; import Navigation from "../components/Navigation"; import Footer from "../components/Footer"; const Layout: React.FC = () => { const [userInfo, setUserInfo] = useState(null); useEffect(() => { (async () => { const attributes = await fetchUserAttributes(); setUserInfo({attributes}) })(); }, []); const handleSignOutClick = async ( event: React.MouseEvent ) => { event.preventDefault(); await signOut(); }; return (
); }; export default Layout; ================================================ FILE: frontend/src/vite-env.d.ts ================================================ /// ================================================ FILE: frontend/tailwind.config.js ================================================ /** @type {import('tailwindcss').Config} */ export default { content: ["./index.html", "./src/**/*.{js,ts,jsx,tsx}"], theme: { extend: {}, container: { padding: "7rem", center: true, }, }, plugins: [require("@tailwindcss/typography")], }; ================================================ FILE: frontend/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2020", "useDefineForClassFields": true, "lib": ["ES2020", "DOM", "DOM.Iterable"], "module": "ESNext", "skipLibCheck": true, /* Bundler mode */ "moduleResolution": "bundler", "allowImportingTsExtensions": true, "resolveJsonModule": true, "isolatedModules": true, "noEmit": true, "jsx": "react-jsx", /* Linting */ "strict": true, "noUnusedLocals": true, "noUnusedParameters": true, "noFallthroughCasesInSwitch": true }, "include": ["src"], "references": [{ "path": "./tsconfig.node.json" }] } ================================================ FILE: frontend/tsconfig.node.json ================================================ { "compilerOptions": { "composite": true, "skipLibCheck": true, "module": "ESNext", "moduleResolution": "bundler", "allowSyntheticDefaultImports": true }, "include": ["vite.config.ts"] } ================================================ FILE: frontend/vite.config.ts ================================================ import { defineConfig } from "vite"; import react from "@vitejs/plugin-react"; // https://vitejs.dev/config/ export default defineConfig({ plugins: [react()], });