Repository: getumbrel/llama-gpt Branch: master Commit: 43994a365ffb Files: 261 Total size: 305.4 KB Directory structure: gitextract_5taggak1/ ├── .gitattributes ├── .github/ │ └── workflows/ │ └── on-push.yml ├── .gitignore ├── LICENSE.md ├── README.md ├── api/ │ └── run.sh ├── cuda/ │ ├── ggml.Dockerfile │ ├── gguf.Dockerfile │ └── run.sh ├── deploy/ │ └── kubernetes/ │ ├── kustomization.yaml │ ├── llama-gpt-api-deployment.yaml │ ├── llama-gpt-api-service.yaml │ ├── llama-gpt-ui-deployment.yaml │ └── llama-gpt-ui-service.yaml ├── docker-compose-cuda-ggml.yml ├── docker-compose-cuda-gguf.yml ├── docker-compose-gguf.yml ├── docker-compose-mac.yml ├── docker-compose.yml ├── models/ │ └── .gitkeep ├── run-mac.sh ├── run.sh └── ui/ ├── .dockerignore ├── .eslintrc.json ├── .gitignore ├── CONTRIBUTING.md ├── Dockerfile ├── Makefile ├── __tests__/ │ └── utils/ │ └── app/ │ └── importExports.test.ts ├── components/ │ ├── Buttons/ │ │ └── SidebarActionButton/ │ │ ├── SidebarActionButton.tsx │ │ └── index.ts │ ├── Chat/ │ │ ├── Chat.tsx │ │ ├── ChatInput.tsx │ │ ├── ChatLoader.tsx │ │ ├── ChatMessage.tsx │ │ ├── ErrorMessageDiv.tsx │ │ ├── MemoizedChatMessage.tsx │ │ ├── ModelSelect.tsx │ │ ├── PluginSelect.tsx │ │ ├── PromptList.tsx │ │ ├── Regenerate.tsx │ │ ├── SystemPrompt.tsx │ │ ├── Temperature.tsx │ │ └── VariableModal.tsx │ ├── Chatbar/ │ │ ├── Chatbar.context.tsx │ │ ├── Chatbar.state.tsx │ │ ├── Chatbar.tsx │ │ └── components/ │ │ ├── ChatFolders.tsx │ │ ├── ChatbarSettings.tsx │ │ ├── ClearConversations.tsx │ │ ├── Conversation.tsx │ │ ├── Conversations.tsx │ │ └── PluginKeys.tsx │ ├── Folder/ │ │ ├── Folder.tsx │ │ └── index.ts │ ├── Markdown/ │ │ ├── CodeBlock.tsx │ │ └── MemoizedReactMarkdown.tsx │ ├── Mobile/ │ │ └── Navbar.tsx │ ├── Promptbar/ │ │ ├── PromptBar.context.tsx │ │ ├── Promptbar.state.tsx │ │ ├── Promptbar.tsx │ │ ├── components/ │ │ │ ├── Prompt.tsx │ │ │ ├── PromptFolders.tsx │ │ │ ├── PromptModal.tsx │ │ │ ├── PromptbarSettings.tsx │ │ │ └── Prompts.tsx │ │ └── index.ts │ ├── Search/ │ │ ├── Search.tsx │ │ └── index.ts │ ├── Settings/ │ │ ├── Import.tsx │ │ ├── Key.tsx │ │ └── SettingDialog.tsx │ ├── Sidebar/ │ │ ├── Sidebar.tsx │ │ ├── SidebarButton.tsx │ │ ├── components/ │ │ │ └── OpenCloseButton.tsx │ │ └── index.ts │ └── Spinner/ │ ├── Spinner.tsx │ └── index.ts ├── docker-compose.yml ├── docs/ │ └── google_search.md ├── hooks/ │ ├── useCreateReducer.ts │ └── useFetch.ts ├── k8s/ │ └── chatbot-ui.yaml ├── next-i18next.config.js ├── next.config.js ├── no-wait.Dockerfile ├── package.json ├── pages/ │ ├── _app.tsx │ ├── _document.tsx │ ├── api/ │ │ ├── chat.ts │ │ ├── google.ts │ │ ├── home/ │ │ │ ├── home.context.tsx │ │ │ ├── home.state.tsx │ │ │ ├── home.tsx │ │ │ └── index.ts │ │ └── models.ts │ └── index.tsx ├── postcss.config.js ├── prettier.config.js ├── public/ │ └── locales/ │ ├── ar/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── bn/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── ca/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ └── sidebar.json │ ├── de/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── en/ │ │ └── common.json │ ├── es/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── fi/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── fr/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── he/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── id/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── it/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── ja/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── ko/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── pl/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── pt/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── ro/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── ru/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── si/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── sv/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── te/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ ├── tr/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ └── sidebar.json │ ├── vi/ │ │ ├── chat.json │ │ ├── common.json │ │ ├── markdown.json │ │ ├── promptbar.json │ │ ├── settings.json │ │ └── sidebar.json │ └── zh/ │ ├── chat.json │ ├── common.json │ ├── markdown.json │ ├── promptbar.json │ ├── settings.json │ └── sidebar.json ├── services/ │ ├── errorService.ts │ └── useApiService.ts ├── styles/ │ └── globals.css ├── tailwind.config.js ├── tsconfig.json ├── types/ │ ├── chat.ts │ ├── data.ts │ ├── env.ts │ ├── error.ts │ ├── export.ts │ ├── folder.ts │ ├── google.ts │ ├── index.ts │ ├── openai.ts │ ├── plugin.ts │ ├── prompt.ts │ ├── settings.ts │ └── storage.ts ├── utils/ │ ├── app/ │ │ ├── api.ts │ │ ├── clean.ts │ │ ├── codeblock.ts │ │ ├── const.ts │ │ ├── conversation.ts │ │ ├── folders.ts │ │ ├── importExport.ts │ │ ├── prompts.ts │ │ └── settings.ts │ ├── data/ │ │ └── throttle.ts │ └── server/ │ ├── google.ts │ └── index.ts └── vitest.config.ts ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitattributes ================================================ * text=auto *.sh eol=lf *.yml text ================================================ FILE: .github/workflows/on-push.yml ================================================ name: Build Docker images on master push on: push: tags: - "*" jobs: build: runs-on: ubuntu-latest strategy: matrix: service: - api - ui env: SERVICE_DIR: ./${{ matrix.service }} IMAGE_NAME: ghcr.io/${{ github.repository }}-${{ matrix.service }} steps: - uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3 - run: docker login --username "${{ github.actor }}" --password ${{ secrets.GITHUB_TOKEN }} ghcr.io - run: docker buildx create --use - run: docker buildx build --platform linux/amd64,linux/arm64 -f ${SERVICE_DIR}/Dockerfile --tag $IMAGE_NAME:${{ github.ref_name }} --push ${SERVICE_DIR} - run: docker buildx build --platform linux/amd64,linux/arm64 -f ${SERVICE_DIR}/Dockerfile --tag $IMAGE_NAME:latest --push ${SERVICE_DIR} ================================================ FILE: .gitignore ================================================ **/.DS_Store models/*.bin models/*.gguf **/.todo ================================================ FILE: LICENSE.md ================================================ MIT License Copyright (c) 2023 Umbrel, Inc. Copyright (c) 2023 Mckay Wrigley Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================

LlamaGPT

LlamaGPT

A self-hosted, offline, ChatGPT-like chatbot, powered by Llama 2. 100% private, with no data leaving your device.
New: Support for Code Llama models and Nvidia GPUs.

umbrel.com (we're hiring) »

## Contents 1. [Demo](#demo) 2. [Supported Models](#supported-models) 3. [How to install](#how-to-install) - [On umbrelOS home server](#install-llamagpt-on-your-umbrelos-home-server) - [On M1/M2 Mac](#install-llamagpt-on-m1m2-mac) - [Anywhere else with Docker](#install-llamagpt-anywhere-else-with-docker) - [Kubernetes](#install-llamagpt-with-kubernetes) 4. [OpenAI-compatible API](#openai-compatible-api) 5. [Benchmarks](#benchmarks) 6. [Roadmap and contributing](#roadmap-and-contributing) 7. [Acknowledgements](#acknowledgements) ## Demo https://github.com/getumbrel/llama-gpt/assets/10330103/5d1a76b8-ed03-4a51-90bd-12ebfaf1e6cd ## Supported models Currently, LlamaGPT supports the following models. Support for running custom models is on the roadmap. | Model name | Model size | Model download size | Memory required | | ---------------------------------------- | ---------- | ------------------- | --------------- | | Nous Hermes Llama 2 7B Chat (GGML q4_0) | 7B | 3.79GB | 6.29GB | | Nous Hermes Llama 2 13B Chat (GGML q4_0) | 13B | 7.32GB | 9.82GB | | Nous Hermes Llama 2 70B Chat (GGML q4_0) | 70B | 38.87GB | 41.37GB | | Code Llama 7B Chat (GGUF Q4_K_M) | 7B | 4.24GB | 6.74GB | | Code Llama 13B Chat (GGUF Q4_K_M) | 13B | 8.06GB | 10.56GB | | Phind Code Llama 34B Chat (GGUF Q4_K_M) | 34B | 20.22GB | 22.72GB | ## How to install ### Install LlamaGPT on your umbrelOS home server Running LlamaGPT on an [umbrelOS](https://umbrel.com) home server is one click. Simply install it from the [Umbrel App Store](https://apps.umbrel.com/app/llama-gpt). [![LlamaGPT on Umbrel App Store](https://apps.umbrel.com/app/llama-gpt/badge-light.svg)](https://apps.umbrel.com/app/llama-gpt) ### Install LlamaGPT on M1/M2 Mac Make sure your have Docker and Xcode installed. Then, clone this repo and `cd` into it: ``` git clone https://github.com/getumbrel/llama-gpt.git cd llama-gpt ``` Run LlamaGPT with the following command: ``` ./run-mac.sh --model 7b ``` You can access LlamaGPT at http://localhost:3000. > To run 13B or 70B chat models, replace `7b` with `13b` or `70b` respectively. > To run 7B, 13B or 34B Code Llama models, replace `7b` with `code-7b`, `code-13b` or `code-34b` respectively. To stop LlamaGPT, do `Ctrl + C` in Terminal. ### Install LlamaGPT anywhere else with Docker You can run LlamaGPT on any x86 or arm64 system. Make sure you have Docker installed. Then, clone this repo and `cd` into it: ``` git clone https://github.com/getumbrel/llama-gpt.git cd llama-gpt ``` Run LlamaGPT with the following command: ``` ./run.sh --model 7b ``` Or if you have an Nvidia GPU, you can run LlamaGPT with CUDA support using the `--with-cuda` flag, like: ``` ./run.sh --model 7b --with-cuda ``` You can access LlamaGPT at `http://localhost:3000`. > To run 13B or 70B chat models, replace `7b` with `13b` or `70b` respectively. > To run Code Llama 7B, 13B or 34B models, replace `7b` with `code-7b`, `code-13b` or `code-34b` respectively. To stop LlamaGPT, do `Ctrl + C` in Terminal. > Note: On the first run, it may take a while for the model to be downloaded to the `/models` directory. You may also see lots of output like this for a few minutes, which is normal: > > ``` > llama-gpt-llama-gpt-ui-1 | [INFO wait] Host [llama-gpt-api-13b:8000] not yet available... > ``` > > After the model has been automatically downloaded and loaded, and the API server is running, you'll see an output like: > > ``` > llama-gpt-ui_1 | ready - started server on 0.0.0.0:3000, url: http://localhost:3000 > ``` > > You can then access LlamaGPT at http://localhost:3000. --- ### Install LlamaGPT with Kubernetes First, make sure you have a running Kubernetes cluster and `kubectl` is configured to interact with it. Then, clone this repo and `cd` into it. To deploy to Kubernetes first create a namespace: ```bash kubectl create ns llama ``` Then apply the manifests under the `/deploy/kubernetes` directory with ```bash kubectl apply -k deploy/kubernetes/. -n llama ``` Expose your service however you would normally do that. ## OpenAI compatible API Thanks to llama-cpp-python, a drop-in replacement for OpenAI API is available at `http://localhost:3001`. Open http://localhost:3001/docs to see the API documentation. ## Benchmarks We've tested LlamaGPT models on the following hardware with the default system prompt, and user prompt: "How does the universe expand?" at temperature 0 to guarantee deterministic results. Generation speed is averaged over the first 10 generations. Feel free to add your own benchmarks to this table by opening a pull request. #### Nous Hermes Llama 2 7B Chat (GGML q4_0) | Device | Generation speed | | ----------------------------------- | ---------------- | | M1 Max MacBook Pro (64GB RAM) | 54 tokens/sec | | GCP c2-standard-16 vCPU (64 GB RAM) | 16.7 tokens/sec | | Ryzen 5700G 4.4GHz 4c (16 GB RAM) | 11.50 tokens/sec | | GCP c2-standard-4 vCPU (16 GB RAM) | 4.3 tokens/sec | | Umbrel Home (16GB RAM) | 2.7 tokens/sec | | Raspberry Pi 4 (8GB RAM) | 0.9 tokens/sec | #### Nous Hermes Llama 2 13B Chat (GGML q4_0) | Device | Generation speed | | ----------------------------------- | ---------------- | | M1 Max MacBook Pro (64GB RAM) | 20 tokens/sec | | GCP c2-standard-16 vCPU (64 GB RAM) | 8.6 tokens/sec | | GCP c2-standard-4 vCPU (16 GB RAM) | 2.2 tokens/sec | | Umbrel Home (16GB RAM) | 1.5 tokens/sec | #### Nous Hermes Llama 2 70B Chat (GGML q4_0) | Device | Generation speed | | ----------------------------------- | ---------------- | | M1 Max MacBook Pro (64GB RAM) | 4.8 tokens/sec | | GCP e2-standard-16 vCPU (64 GB RAM) | 1.75 tokens/sec | | GCP c2-standard-16 vCPU (64 GB RAM) | 1.62 tokens/sec | #### Code Llama 7B Chat (GGUF Q4_K_M) | Device | Generation speed | | ----------------------------- | ---------------- | | M1 Max MacBook Pro (64GB RAM) | 41 tokens/sec | #### Code Llama 13B Chat (GGUF Q4_K_M) | Device | Generation speed | | ----------------------------- | ---------------- | | M1 Max MacBook Pro (64GB RAM) | 25 tokens/sec | #### Phind Code Llama 34B Chat (GGUF Q4_K_M) | Device | Generation speed | | ----------------------------- | ---------------- | | M1 Max MacBook Pro (64GB RAM) | 10.26 tokens/sec | ## Roadmap and contributing We're looking to add more features to LlamaGPT. You can see the roadmap [here](https://github.com/getumbrel/llama-gpt/issues/8#issuecomment-1681321145). The highest priorities are: - [x] Moving the model out of the Docker image and into a separate volume. - [x] Add Metal support for M1/M2 Macs. - [x] Add support for Code Llama models. - [x] Add CUDA support for NVIDIA GPUs. - [ ] Add ability to load custom models. - [ ] Allow users to switch between models. If you're a developer who'd like to help with any of these, please open an issue to discuss the best way to tackle the challenge. If you're looking to help but not sure where to begin, check out [these issues](https://github.com/getumbrel/llama-gpt/labels/good%20first%20issue) that have specifically been marked as being friendly to new contributors. ## Acknowledgements A massive thank you to the following developers and teams for making LlamaGPT possible: - [Mckay Wrigley](https://github.com/mckaywrigley) for building [Chatbot UI](https://github.com/mckaywrigley). - [Georgi Gerganov](https://github.com/ggerganov) for implementing [llama.cpp](https://github.com/ggerganov/llama.cpp). - [Andrei](https://github.com/abetlen) for building the [Python bindings for llama.cpp](https://github.com/abetlen/llama-cpp-python). - [NousResearch](https://nousresearch.com) for [fine-tuning the Llama 2 7B and 13B models](https://huggingface.co/NousResearch). - [Phind](https://www.phind.com/) for [fine-tuning the Code Llama 34B model](https://www.phind.com/blog/code-llama-beats-gpt4). - [Tom Jobbins](https://huggingface.co/TheBloke) for [quantizing the Llama 2 models](https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML). - [Meta](https://ai.meta.com/llama) for releasing Llama 2 and Code Llama under a permissive license. --- [![License](https://img.shields.io/github/license/getumbrel/llama-gpt?color=%235351FB)](https://github.com/getumbrel/llama-gpt/blob/master/LICENSE.md) [umbrel.com](https://umbrel.com) ================================================ FILE: api/run.sh ================================================ #!/bin/bash # Check if the MODEL environment variable is set if [ -z "$MODEL" ] then echo "Please set the MODEL_FILE environment variable" exit 1 fi # Check if the MODEL_DOWNLOAD_URL environment variable is set if [ -z "$MODEL_DOWNLOAD_URL" ] then echo "Please set the MODEL_DOWNLOAD_URL environment variable" exit 1 fi # Check if the model file exists if [ ! -f $MODEL ]; then echo "Model file not found. Downloading..." # Check if curl is installed if ! [ -x "$(command -v curl)" ]; then echo "curl is not installed. Installing..." apt-get update --yes --quiet apt-get install --yes --quiet curl fi # Download the model file curl -L -o $MODEL $MODEL_DOWNLOAD_URL if [ $? -ne 0 ]; then echo "Download failed. Trying with TLS 1.2..." curl -L --tlsv1.2 -o $MODEL $MODEL_DOWNLOAD_URL fi else echo "$MODEL model found." fi # Build the project make build # Get the number of available CPU threads n_threads=$(grep -c ^processor /proc/cpuinfo) # Define context window n_ctx=4096 # Offload everything to CPU n_gpu_layers=0 # Define batch size based on total RAM total_ram=$(cat /proc/meminfo | grep MemTotal | awk '{print $2}') n_batch=2096 if [ $total_ram -lt 8000000 ]; then n_batch=1024 fi # Display configuration information echo "Initializing server with:" echo "Batch size: $n_batch" echo "Number of CPU threads: $n_threads" echo "Number of GPU layers: $n_gpu_layers" echo "Context window: $n_ctx" # Run the server exec python3 -m llama_cpp.server --n_ctx $n_ctx --n_threads $n_threads --n_gpu_layers $n_gpu_layers --n_batch $n_batch ================================================ FILE: cuda/ggml.Dockerfile ================================================ ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04" FROM nvidia/cuda:${CUDA_IMAGE} # We need to set the host to 0.0.0.0 to allow outside access ENV HOST 0.0.0.0 RUN apt-get update && apt-get upgrade -y \ && apt-get install -y git build-essential \ python3 python3-pip gcc wget \ ocl-icd-opencl-dev opencl-headers clinfo \ libclblast-dev libopenblas-dev \ && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd COPY . . # setting build related env vars ENV CUDA_DOCKER_ARCH=all ENV LLAMA_CUBLAS=1 # Install depencencies RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings # Install llama-cpp-python 0.1.78 which has GGML support (build with cuda) RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python==0.1.78 # Run the server CMD python3 -m llama_cpp.server ================================================ FILE: cuda/gguf.Dockerfile ================================================ ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04" FROM nvidia/cuda:${CUDA_IMAGE} # We need to set the host to 0.0.0.0 to allow outside access ENV HOST 0.0.0.0 RUN apt-get update && apt-get upgrade -y \ && apt-get install -y git build-essential \ python3 python3-pip gcc wget \ ocl-icd-opencl-dev opencl-headers clinfo \ libclblast-dev libopenblas-dev \ && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd COPY . . # setting build related env vars ENV CUDA_DOCKER_ARCH=all ENV LLAMA_CUBLAS=1 # Install depencencies RUN python3 -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings # Install llama-cpp-python 0.1.80 which has GGUF support (build with cuda) RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python==0.1.80 # Run the server CMD python3 -m llama_cpp.server ================================================ FILE: cuda/run.sh ================================================ #!/bin/bash # Check if the MODEL environment variable is set if [ -z "$MODEL" ] then echo "Please set the MODEL_FILE environment variable" exit 1 fi # Check if the MODEL_DOWNLOAD_URL environment variable is set if [ -z "$MODEL_DOWNLOAD_URL" ] then echo "Please set the MODEL_DOWNLOAD_URL environment variable" exit 1 fi # Check if the model file exists if [ ! -f $MODEL ]; then echo "Model file not found. Downloading..." # Check if curl is installed if ! [ -x "$(command -v curl)" ]; then echo "curl is not installed. Installing..." apt-get update --yes --quiet apt-get install --yes --quiet curl fi # Download the model file curl -L -o $MODEL $MODEL_DOWNLOAD_URL if [ $? -ne 0 ]; then echo "Download failed. Trying with TLS 1.2..." curl -L --tlsv1.2 -o $MODEL $MODEL_DOWNLOAD_URL fi else echo "$MODEL model found." fi # Build the project make build # Get the number of available CPU threads n_threads=$(grep -c ^processor /proc/cpuinfo) # Define context window n_ctx=4096 # Offload layers to GPU n_gpu_layers=10 # Define batch size based on total RAM total_ram=$(cat /proc/meminfo | grep MemTotal | awk '{print $2}') n_batch=2096 if [ $total_ram -lt 8000000 ]; then n_batch=1024 fi # Display configuration information echo "Initializing server with:" echo "Batch size: $n_batch" echo "Number of CPU threads: $n_threads" echo "Number of GPU layers: $n_gpu_layers" echo "Context window: $n_ctx" # Run the server exec python3 -m llama_cpp.server --n_ctx $n_ctx --n_threads $n_threads --n_gpu_layers $n_gpu_layers --n_batch $n_batch ================================================ FILE: deploy/kubernetes/kustomization.yaml ================================================ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - llama-gpt-api-deployment.yaml - llama-gpt-api-service.yaml - llama-gpt-ui-deployment.yaml - llama-gpt-ui-service.yaml # patches: # - configMapGenerator: - name: llama-gpt literals: - DEFAULT_MODEL="/models/llama-2-7b-chat.bin" - OPENAI_API_HOST="http://llama-gpt-api:8000" - OPENAI_API_KEY="sk-XXXXXXXXXXXXXXXXXXXX" - WAIT_HOSTS="llama-gpt-api:8000" - WAIT_TIMEOUT="600" ================================================ FILE: deploy/kubernetes/llama-gpt-api-deployment.yaml ================================================ apiVersion: apps/v1 kind: Deployment metadata: labels: service: llama-gpt-api name: llama-gpt-api spec: replicas: 1 selector: matchLabels: service: llama-gpt-api template: metadata: labels: service: llama-gpt-api spec: containers: - name: llama-gpt-api image: ghcr.io/getumbrel/llama-gpt-api:1.0.1 env: - name: MODEL valueFrom: configMapKeyRef: name: llama-gpt key: DEFAULT_MODEL resources: requests: memory: 5Gi restartPolicy: Always ================================================ FILE: deploy/kubernetes/llama-gpt-api-service.yaml ================================================ apiVersion: v1 kind: Service metadata: labels: service: llama-gpt-api name: llama-gpt-api spec: ports: - name: api port: 8000 targetPort: 8000 selector: service: llama-gpt-api status: loadBalancer: {} ================================================ FILE: deploy/kubernetes/llama-gpt-ui-deployment.yaml ================================================ apiVersion: apps/v1 kind: Deployment metadata: labels: service: llama-gpt-ui name: llama-gpt-ui spec: replicas: 1 selector: matchLabels: service: llama-gpt-ui template: metadata: labels: service: llama-gpt-ui spec: containers: - name: llama-gpt-ui image: ghcr.io/getumbrel/llama-gpt-ui:latest envFrom: - configMapRef: name: llama-gpt ports: - containerPort: 3000 resources: {} restartPolicy: Always ================================================ FILE: deploy/kubernetes/llama-gpt-ui-service.yaml ================================================ apiVersion: v1 kind: Service metadata: labels: service: llama-gpt-ui name: llama-gpt-ui spec: ports: - name: ui port: 3000 targetPort: 3000 selector: service: llama-gpt-ui type: ClusterIP status: loadBalancer: {} ================================================ FILE: docker-compose-cuda-ggml.yml ================================================ version: '3.6' services: llama-gpt-api-cuda-ggml: build: context: ./cuda dockerfile: ggml.Dockerfile restart: on-failure volumes: - './models:/models' - './cuda:/cuda' ports: - 3001:8000 environment: MODEL: '/models/${MODEL_NAME:-llama-2-7b-chat.bin}' MODEL_DOWNLOAD_URL: '${MODEL_DOWNLOAD_URL:-https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin}' N_GQA: '${N_GQA:-1}' USE_MLOCK: 1 cap_add: - IPC_LOCK - SYS_RESOURCE command: '/bin/sh /cuda/run.sh' deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] llama-gpt-ui: # TODO: Use this image instead of building from source after the next release # image: 'ghcr.io/getumbrel/llama-gpt-ui:latest' build: context: ./ui dockerfile: Dockerfile ports: - 3000:3000 restart: on-failure environment: - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX' - 'OPENAI_API_HOST=http://llama-gpt-api-cuda-ggml:8000' - 'DEFAULT_MODEL=/models/${MODEL_NAME:-llama-2-7b-chat.bin}' - 'NEXT_PUBLIC_DEFAULT_SYSTEM_PROMPT=${DEFAULT_SYSTEM_PROMPT:-"You are a helpful and friendly AI assistant. Respond very concisely."}' - 'WAIT_HOSTS=llama-gpt-api-cuda-ggml:8000' - 'WAIT_TIMEOUT=${WAIT_TIMEOUT:-3600}' ================================================ FILE: docker-compose-cuda-gguf.yml ================================================ version: '3.6' services: llama-gpt-api-cuda-gguf: build: context: ./cuda dockerfile: gguf.Dockerfile restart: on-failure volumes: - './models:/models' - './cuda:/cuda' ports: - 3001:8000 environment: MODEL: '/models/${MODEL_NAME:-code-llama-2-7b-chat.gguf}' MODEL_DOWNLOAD_URL: '${MODEL_DOWNLOAD_URL:-https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf}' N_GQA: '${N_GQA:-1}' USE_MLOCK: 1 cap_add: - IPC_LOCK - SYS_RESOURCE command: '/bin/sh /cuda/run.sh' deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] llama-gpt-ui: # TODO: Use this image instead of building from source after the next release # image: 'ghcr.io/getumbrel/llama-gpt-ui:latest' build: context: ./ui dockerfile: Dockerfile ports: - 3000:3000 restart: on-failure environment: - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX' - 'OPENAI_API_HOST=http://llama-gpt-api-cuda-gguf:8000' - 'DEFAULT_MODEL=/models/${MODEL_NAME:-code-llama-2-7b-chat.gguf}' - 'NEXT_PUBLIC_DEFAULT_SYSTEM_PROMPT=${DEFAULT_SYSTEM_PROMPT:-"You are a helpful and friendly AI assistant. Respond very concisely."}' - 'WAIT_HOSTS=llama-gpt-api-cuda-gguf:8000' - 'WAIT_TIMEOUT=${WAIT_TIMEOUT:-3600}' ================================================ FILE: docker-compose-gguf.yml ================================================ version: '3.6' services: llama-gpt-api: # Pin to llama-cpp-python 0.1.80 with GGUF support image: ghcr.io/abetlen/llama-cpp-python:latest@sha256:de0fd227f348b5e43d4b5b7300f1344e712c14132914d1332182e9ecfde502b2 restart: on-failure volumes: - './models:/models' - './api:/api' ports: - 3001:8000 environment: MODEL: '/models/${MODEL_NAME:-code-llama-2-7b-chat.gguf}' MODEL_DOWNLOAD_URL: '${MODEL_DOWNLOAD_URL:-https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf}' N_GQA: '${N_GQA:-1}' USE_MLOCK: 1 cap_add: - IPC_LOCK command: '/bin/sh /api/run.sh' llama-gpt-ui: # TODO: Use this image instead of building from source after the next release # image: 'ghcr.io/getumbrel/llama-gpt-ui:latest' build: context: ./ui dockerfile: Dockerfile ports: - 3000:3000 restart: on-failure environment: - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX' - 'OPENAI_API_HOST=http://llama-gpt-api:8000' - 'DEFAULT_MODEL=/models/${MODEL_NAME:-llama-2-7b-chat.bin}' - 'NEXT_PUBLIC_DEFAULT_SYSTEM_PROMPT=${DEFAULT_SYSTEM_PROMPT:-"You are a helpful and friendly AI assistant. Respond very concisely."}' - 'WAIT_HOSTS=llama-gpt-api:8000' - 'WAIT_TIMEOUT=${WAIT_TIMEOUT:-3600}' ================================================ FILE: docker-compose-mac.yml ================================================ version: '3.6' services: llama-gpt-ui-mac: build: context: ./ui dockerfile: no-wait.Dockerfile ports: - 3000:3000 restart: on-failure environment: - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX' - 'OPENAI_API_HOST=http://host.docker.internal:3001' - 'DEFAULT_MODEL=$MODEL' - 'NEXT_PUBLIC_DEFAULT_SYSTEM_PROMPT=${DEFAULT_SYSTEM_PROMPT:-"You are a helpful and friendly AI assistant. Respond very concisely and use markdown if responding with code."}' ================================================ FILE: docker-compose.yml ================================================ version: '3.6' services: llama-gpt-api: # Pin the image to llama-cpp-python 0.1.78 to avoid ggml => gguf breaking changes image: ghcr.io/abetlen/llama-cpp-python:latest@sha256:b6d21ff8c4d9baad65e1fa741a0f8c898d68735fff3f3cd777e3f0c6a1839dd4 restart: on-failure volumes: - './models:/models' - './api:/api' ports: - 3001:8000 environment: MODEL: '/models/${MODEL_NAME:-llama-2-7b-chat.bin}' MODEL_DOWNLOAD_URL: '${MODEL_DOWNLOAD_URL:-https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin}' N_GQA: '${N_GQA:-1}' USE_MLOCK: 1 cap_add: - IPC_LOCK command: '/bin/sh /api/run.sh' llama-gpt-ui: # TODO: Use this image instead of building from source after the next release # image: 'ghcr.io/getumbrel/llama-gpt-ui:latest' build: context: ./ui dockerfile: Dockerfile ports: - 3000:3000 restart: on-failure environment: - 'OPENAI_API_KEY=sk-XXXXXXXXXXXXXXXXXXXX' - 'OPENAI_API_HOST=http://llama-gpt-api:8000' - 'DEFAULT_MODEL=/models/${MODEL_NAME:-llama-2-7b-chat.bin}' - 'NEXT_PUBLIC_DEFAULT_SYSTEM_PROMPT=${DEFAULT_SYSTEM_PROMPT:-"You are a helpful and friendly AI assistant. Respond very concisely."}' - 'WAIT_HOSTS=llama-gpt-api:8000' - 'WAIT_TIMEOUT=${WAIT_TIMEOUT:-3600}' ================================================ FILE: models/.gitkeep ================================================ ================================================ FILE: run-mac.sh ================================================ #!/bin/bash set -e # Define a function to refresh the source of .zshrc or .bashrc source_shell_rc() { # Source .zshrc or .bashrc if [ -f ~/.zshrc ]; then source ~/.zshrc elif [ -f ~/.bashrc ]; then source ~/.bashrc else echo "No .bashrc or .zshrc file found." fi } # Define a function to install conda with Miniforge3 install_conda() { # Download Miniforge3 curl -L -o /tmp/Miniforge3.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh bash /tmp/Miniforge3.sh source_shell_rc } # Define a function to install a specific version of llama-cpp-python install_llama_cpp_python() { local model_type=$1 local version=$2 local installed_version=$(pip3 show llama-cpp-python | grep -i version | awk '{print $2}') if [[ "$installed_version" != "$version" ]]; then echo "llama-cpp-python version is not $version. Installing $version version for $model_type support..." pip3 uninstall llama-cpp-python -y CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip3 install llama-cpp-python==$version --no-cache-dir pip3 install 'llama-cpp-python[server]' else echo "llama-cpp-python version is $version." fi } source_shell_rc # Check if the platform is MacOS and the architecture is arm64 if [[ "$(uname)" != "Darwin" ]] || [[ "$(uname -m)" != "arm64" ]]; then echo "This script is intended to be run on MacOS with M1/M2 chips. Exiting..." exit 1 fi # Check if Docker is installed if ! command -v docker &> /dev/null; then echo "Docker is not installed. Exiting..." exit 1 fi # Check if python3 is installed if ! command -v python3 &> /dev/null; then echo "Python3 is not installed. Exiting..." exit 1 fi # Check if Xcode is installed xcode_path=$(xcode-select -p 2>/dev/null) if [ -z "$xcode_path" ]; then echo "Xcode is not installed. Installing (this may take a long time)..." xcode-select --install else echo "Xcode is installed at $xcode_path" fi # Check if conda is installed if ! command -v conda &> /dev/null; then echo "Conda is not installed. Installing Miniforge3 which includes conda..." install_conda else echo "Conda is installed." # TODO: Check if the conda version for MacOS that supports Metal GPU is installed # conda_version=$(conda --version) # if [[ $conda_version != *"Miniforge3"* ]]; then # echo "Conda version that supports Metal GPU is not installed. Installing..." # install_conda # else # echo "Conda version that supports M1/M2 is installed." # fi fi # Check if the conda environment 'llama-gpt' exists if conda env list | grep -q 'llama-gpt'; then echo "Conda environment 'llama-gpt' already exists." else echo "Creating a conda environment called 'llama-gpt'..." conda create -n llama-gpt python=$(python3 --version | cut -d ' ' -f 2) fi # Check if the conda environment 'llama-gpt' is active if [[ "$(conda info --envs | grep '*' | awk '{print $1}')" != "llama-gpt" ]]; then echo "Activating the conda environment 'llama-gpt'..." conda activate llama-gpt else echo "Conda environment 'llama-gpt' is already active." fi # Parse command line arguments for --model while [[ "$#" -gt 0 ]]; do case $1 in --model) MODEL="$2"; shift ;; *) echo "Unknown parameter passed: $1"; exit 1 ;; esac shift done # If no model is passed, default to 7b model if [[ -z "$MODEL" ]]; then echo "No model value provided. Defaulting to 7b. If you want to change the model, exit the script and use --model to provide the model value." echo "Supported models are 7b, 13b, 70b, code-7b, code-13b, code-34b." MODEL="7b" fi # Get the number of available CPU cores and subtract 2 n_threads=$(($(sysctl -n hw.logicalcpu) - 2)) # Define context window n_ctx=4096 # Define batch size n_batch=2096 # Define number of GPU layers n_gpu_layers=1 # Define grouping factor N_GQA=1 model_type="gguf" # Set values for MODEL and MODEL_DOWNLOAD_URL based on the model passed case $MODEL in 7b) MODEL="./models/llama-2-7b-chat.bin" MODEL_DOWNLOAD_URL="https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin" model_type="ggml" ;; 13b) MODEL="./models/llama-2-13b-chat.bin" MODEL_DOWNLOAD_URL="https://huggingface.co/TheBloke/Nous-Hermes-Llama2-GGML/resolve/main/nous-hermes-llama2-13b.ggmlv3.q4_0.bin" model_type="ggml" n_gpu_layers=2 ;; 70b) MODEL="./models/llama-2-70b-chat.bin" MODEL_DOWNLOAD_URL="https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGML/resolve/main/llama-2-70b-chat.ggmlv3.q4_0.bin" model_type="ggml" n_gpu_layers=3 # Llama 2 70B's grouping factor is 8 compared to 7B and 13B's 1. N_GQA=8 ;; code-7b) MODEL="./models/code-llama-7b-chat.gguf" MODEL_DOWNLOAD_URL="https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf" DEFAULT_SYSTEM_PROMPT="You are a helpful coding assistant. Use markdown when responding with code." n_gpu_layers=1 n_ctx=8192 ;; code-13b) MODEL="./models/code-llama-13b-chat.gguf" MODEL_DOWNLOAD_URL="https://huggingface.co/TheBloke/CodeLlama-13B-Instruct-GGUF/resolve/main/codellama-13b-instruct.Q4_K_M.gguf" DEFAULT_SYSTEM_PROMPT="You are a helpful coding assistant. Use markdown when responding with code." n_gpu_layers=2 n_ctx=8192 ;; code-34b) MODEL="./models/code-llama-34b-chat.gguf" MODEL_DOWNLOAD_URL="https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v1-GGUF/resolve/main/phind-codellama-34b-v1.Q4_K_M.gguf" DEFAULT_SYSTEM_PROMPT="You are a helpful coding assistant. Use markdown when responding with code." n_gpu_layers=3 n_ctx=8192 # Code Llama 34B's grouping factor is 8 compared to 7B and 13B's 1. N_GQA=8 ;; *) echo "Invalid model passed: $MODEL"; exit 1 ;; esac # Check if llama-cpp-python is already installed llama_cpp_python_installed=$(pip3 list | grep -q llama-cpp-python && echo "installed" || echo "not installed") if [[ "$llama_cpp_python_installed" == "not installed" ]]; then echo "llama-cpp-python is not installed. Installing..." if [[ "$model_type" == "ggml" ]]; then install_llama_cpp_python "GGML" "0.1.78" else install_llama_cpp_python "GGUF" "0.1.80" fi else echo "llama-cpp-python is installed." if [[ "$model_type" == "ggml" ]]; then install_llama_cpp_python "GGML" "0.1.78" else install_llama_cpp_python "GGUF" "0.1.80" fi fi # Check if the model file exists if [ ! -f $MODEL ]; then echo "Model file not found. Downloading..." # Download the model file with a custom progress bar showing percentage, download speed, downloaded, total size, and estimated time remaining curl -L -o $MODEL $MODEL_DOWNLOAD_URL if [ $? -ne 0 ]; then echo "Download failed. Trying with TLS 1.2..." curl -L --tlsv1.2 -o $MODEL $MODEL_DOWNLOAD_URL fi else echo "$MODEL model found." fi # Display configuration information echo "Initializing server with:" echo "Batch size: $n_batch" echo "Number of CPU threads: $n_threads" echo "Number of GPU layers: $n_gpu_layers" echo "Context window: $n_ctx" echo "GQA: $n_gqa" # Export environment variables export MODEL export N_GQA export DEFAULT_SYSTEM_PROMPT # Run docker-compose with the macOS yml file docker compose -f ./docker-compose-mac.yml up --remove-orphans --build & # Run the server python3 -m llama_cpp.server --n_ctx $n_ctx --n_threads $n_threads --n_gpu_layers $n_gpu_layers --n_batch $n_batch --model $MODEL --port 3001 & # Define a function to stop docker-compose and the python3 command stop_commands() { echo "Stopping docker-compose..." docker compose -f ./docker-compose-mac.yml down echo "Stopping python server..." pkill -f "python3 -m llama_cpp.server" echo "Deactivating conda environment..." conda deactivate echo "All processes stopped." } # Set a trap to catch SIGINT and stop the commands trap stop_commands SIGINT # Wait for both commands to finish wait $DOCKER_COMPOSE_PID wait $PYTHON_PID ================================================ FILE: run.sh ================================================ #!/bin/bash # Check if docker compose is installed if ! command -v docker &> /dev/null then echo "Docker could not be found. Please install Docker and try again." exit fi # Parse command line arguments for model value and check for --with-cuda flag with_cuda=0 while [[ "$#" -gt 0 ]]; do case $1 in --model) model="$2"; shift ;; --with-cuda) with_cuda=1 ;; *) echo "Unknown parameter passed: $1"; exit 1 ;; esac shift done # Check if model value is provided if [ -z "$model" ] then echo "No model value provided. Defaulting to 7b. If you want to change the model, exit the script and use --model to provide the model value." echo "Supported models are 7b, 13b, 70b, code-7b, code-13b, code-34b." model="7b" fi model_type="gguf" # Export the model value as an environment variable case $model in 7b) export MODEL_NAME="llama-2-7b-chat.bin" export MODEL_DOWNLOAD_URL="https://huggingface.co/TheBloke/Nous-Hermes-Llama-2-7B-GGML/resolve/main/nous-hermes-llama-2-7b.ggmlv3.q4_0.bin" export WAIT_TIMEOUT=3600 export N_GQA=1 model_type="ggml" ;; 13b) export MODEL_NAME="llama-2-13b-chat.bin" export MODEL_DOWNLOAD_URL="https://huggingface.co/TheBloke/Nous-Hermes-Llama2-GGML/resolve/main/nous-hermes-llama2-13b.ggmlv3.q4_0.bin" export WAIT_TIMEOUT=10800 export N_GQA=1 model_type="ggml" ;; 70b) export MODEL_NAME="llama-2-70b-chat.bin" export MODEL_DOWNLOAD_URL="https://huggingface.co/TheBloke/Nous-Hermes-Llama2-70B-GGML/resolve/main/nous-hermes-llama2-70b.ggmlv3.Q4_0.bin" export WAIT_TIMEOUT=21600 # Llama 2 70B's grouping factor is 8 compared to 7B and 13B's 1. Currently, # it's not possible to change this using --n_gqa with llama-cpp-python in # run.sh, so we expose it as an environment variable. # See: https://github.com/abetlen/llama-cpp-python/issues/528 # and: https://github.com/facebookresearch/llama/issues/407 export N_GQA=8 model_type="ggml" ;; code-7b) export MODEL_NAME="code-llama-7b-chat.gguf" export MODEL_DOWNLOAD_URL="https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_M.gguf" export WAIT_TIMEOUT=3600 export DEFAULT_SYSTEM_PROMPT="You are a helpful coding assistant. Use markdown when responding with code." export N_GQA=1 ;; code-13b) export MODEL_NAME="code-llama-13b-chat.gguf" export MODEL_DOWNLOAD_URL="https://huggingface.co/TheBloke/CodeLlama-13B-Instruct-GGUF/resolve/main/codellama-13b-instruct.Q4_K_M.gguf" export DEFAULT_SYSTEM_PROMPT="You are a helpful coding assistant. Use markdown when responding with code." export WAIT_TIMEOUT=10800 export N_GQA=1 ;; code-34b) export MODEL_NAME="code-llama-34b-chat.gguf" export MODEL_DOWNLOAD_URL="https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v1-GGUF/resolve/main/phind-codellama-34b-v1.Q4_K_M.gguf" export DEFAULT_SYSTEM_PROMPT="You are a helpful coding assistant. Use markdown when responding with code." export WAIT_TIMEOUT=21600 # Code Llama 34B's grouping factor is 8 compared to 7B and 13B's 1. Currently, # it's not possible to change this using --n_gqa with llama-cpp-python in # run.sh, so we expose it as an environment variable. # See: https://github.com/abetlen/llama-cpp-python/issues/528 export N_GQA=8 ;; *) echo "Invalid model value provided. Supported models are 7b, 13b, 70b, code-7b, code-13b, code-34b." exit 1 ;; esac # Run docker compose with docker-compose-ggml.yml or docker-compose-gguf.yml if [ "$with_cuda" -eq 1 ] then if [ "$model_type" = "ggml" ] then docker compose -f docker-compose-cuda-ggml.yml up --build else docker compose -f docker-compose-cuda-gguf.yml up --build fi else if [ "$model_type" = "ggml" ] then docker compose -f docker-compose.yml up --build else docker compose -f docker-compose-gguf.yml up --build fi fi ================================================ FILE: ui/.dockerignore ================================================ .env .env.local node_modules test-results ================================================ FILE: ui/.eslintrc.json ================================================ { "extends": "next/core-web-vitals" } ================================================ FILE: ui/.gitignore ================================================ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. # dependencies /node_modules /.pnp .pnp.js # testing /coverage /test-results # next.js /.next/ /out/ /dist # production /build # misc .DS_Store *.pem # debug npm-debug.log* yarn-debug.log* yarn-error.log* .pnpm-debug.log* # local env files .env*.local # vercel .vercel # typescript *.tsbuildinfo next-env.d.ts .idea pnpm-lock.yaml ================================================ FILE: ui/CONTRIBUTING.md ================================================ # Contributing Guidelines **Welcome to Chatbot UI!** We appreciate your interest in contributing to our project. Before you get started, please read our guidelines for contributing. ## Types of Contributions We welcome the following types of contributions: - Bug fixes - New features - Documentation improvements - Code optimizations - Translations - Tests ## Getting Started To get started, fork the project on GitHub and clone it locally on your machine. Then, create a new branch to work on your changes. ``` git clone https://github.com/mckaywrigley/chatbot-ui.git cd chatbot-ui git checkout -b my-branch-name ``` Before submitting your pull request, please make sure your changes pass our automated tests and adhere to our code style guidelines. ## Pull Request Process 1. Fork the project on GitHub. 2. Clone your forked repository locally on your machine. 3. Create a new branch from the main branch. 4. Make your changes on the new branch. 5. Ensure that your changes adhere to our code style guidelines and pass our automated tests. 6. Commit your changes and push them to your forked repository. 7. Submit a pull request to the main branch of the main repository. ## Contact If you have any questions or need help getting started, feel free to reach out to me on [Twitter](https://twitter.com/mckaywrigley). ================================================ FILE: ui/Dockerfile ================================================ # ---- Base Node ---- FROM node:19-alpine AS base WORKDIR /app COPY package*.json ./ # ---- Dependencies ---- FROM base AS dependencies RUN npm ci # ---- Build ---- FROM dependencies AS build COPY . . RUN npm run build # ---- Production ---- FROM node:19-alpine AS production WORKDIR /app COPY --from=dependencies /app/node_modules ./node_modules COPY --from=build /app/.next ./.next COPY --from=build /app/public ./public COPY --from=build /app/package*.json ./ COPY --from=build /app/next.config.js ./next.config.js COPY --from=build /app/next-i18next.config.js ./next-i18next.config.js ## Add the wait script to the image COPY --from=ghcr.io/ufoscout/docker-compose-wait:latest /wait /wait # Expose the port the app will run on EXPOSE 3000 # Start the application after the API is ready CMD /wait && npm start ================================================ FILE: ui/Makefile ================================================ include .env .PHONY: all build: docker build -t chatbot-ui . run: export $(cat .env | xargs) docker stop chatbot-ui || true && docker rm chatbot-ui || true docker run --name chatbot-ui --rm -e OPENAI_API_KEY=${OPENAI_API_KEY} -p 3000:3000 chatbot-ui logs: docker logs -f chatbot-ui push: docker tag chatbot-ui:latest ${DOCKER_USER}/chatbot-ui:${DOCKER_TAG} docker push ${DOCKER_USER}/chatbot-ui:${DOCKER_TAG} ================================================ FILE: ui/__tests__/utils/app/importExports.test.ts ================================================ import { DEFAULT_SYSTEM_PROMPT, DEFAULT_TEMPERATURE } from '@/utils/app/const'; import { cleanData, isExportFormatV1, isExportFormatV2, isExportFormatV3, isExportFormatV4, isLatestExportFormat, } from '@/utils/app/importExport'; import { ExportFormatV1, ExportFormatV2, ExportFormatV4 } from '@/types/export'; import { OpenAIModelID, OpenAIModels } from '@/types/openai'; import { describe, expect, it } from 'vitest'; describe('Export Format Functions', () => { describe('isExportFormatV1', () => { it('should return true for v1 format', () => { const obj = [{ id: 1 }]; expect(isExportFormatV1(obj)).toBe(true); }); it('should return false for non-v1 formats', () => { const obj = { version: 3, history: [], folders: [] }; expect(isExportFormatV1(obj)).toBe(false); }); }); describe('isExportFormatV2', () => { it('should return true for v2 format', () => { const obj = { history: [], folders: [] }; expect(isExportFormatV2(obj)).toBe(true); }); it('should return false for non-v2 formats', () => { const obj = { version: 3, history: [], folders: [] }; expect(isExportFormatV2(obj)).toBe(false); }); }); describe('isExportFormatV3', () => { it('should return true for v3 format', () => { const obj = { version: 3, history: [], folders: [] }; expect(isExportFormatV3(obj)).toBe(true); }); it('should return false for non-v3 formats', () => { const obj = { version: 4, history: [], folders: [] }; expect(isExportFormatV3(obj)).toBe(false); }); }); describe('isExportFormatV4', () => { it('should return true for v4 format', () => { const obj = { version: 4, history: [], folders: [], prompts: [] }; expect(isExportFormatV4(obj)).toBe(true); }); it('should return false for non-v4 formats', () => { const obj = { version: 5, history: [], folders: [], prompts: [] }; expect(isExportFormatV4(obj)).toBe(false); }); }); }); describe('cleanData Functions', () => { describe('cleaning v1 data', () => { it('should return the latest format', () => { const data = [ { id: 1, name: 'conversation 1', messages: [ { role: 'user', content: "what's up ?", }, { role: 'assistant', content: 'Hi', }, ], }, ] as ExportFormatV1; const obj = cleanData(data); expect(isLatestExportFormat(obj)).toBe(true); expect(obj).toEqual({ version: 4, history: [ { id: 1, name: 'conversation 1', messages: [ { role: 'user', content: "what's up ?", }, { role: 'assistant', content: 'Hi', }, ], model: OpenAIModels[OpenAIModelID.GPT_3_5], prompt: DEFAULT_SYSTEM_PROMPT, temperature: DEFAULT_TEMPERATURE, folderId: null, }, ], folders: [], prompts: [], }); }); }); describe('cleaning v2 data', () => { it('should return the latest format', () => { const data = { history: [ { id: '1', name: 'conversation 1', messages: [ { role: 'user', content: "what's up ?", }, { role: 'assistant', content: 'Hi', }, ], }, ], folders: [ { id: 1, name: 'folder 1', }, ], } as ExportFormatV2; const obj = cleanData(data); expect(isLatestExportFormat(obj)).toBe(true); expect(obj).toEqual({ version: 4, history: [ { id: '1', name: 'conversation 1', messages: [ { role: 'user', content: "what's up ?", }, { role: 'assistant', content: 'Hi', }, ], model: OpenAIModels[OpenAIModelID.GPT_3_5], prompt: DEFAULT_SYSTEM_PROMPT, temperature: DEFAULT_TEMPERATURE, folderId: null, }, ], folders: [ { id: '1', name: 'folder 1', type: 'chat', }, ], prompts: [], }); }); }); describe('cleaning v4 data', () => { it('should return the latest format', () => { const data = { version: 4, history: [ { id: '1', name: 'conversation 1', messages: [ { role: 'user', content: "what's up ?", }, { role: 'assistant', content: 'Hi', }, ], model: OpenAIModels[OpenAIModelID.GPT_3_5], prompt: DEFAULT_SYSTEM_PROMPT, temperature: DEFAULT_TEMPERATURE, folderId: null, }, ], folders: [ { id: '1', name: 'folder 1', type: 'chat', }, ], prompts: [ { id: '1', name: 'prompt 1', description: '', content: '', model: OpenAIModels[OpenAIModelID.GPT_3_5], folderId: null, }, ], } as ExportFormatV4; const obj = cleanData(data); expect(isLatestExportFormat(obj)).toBe(true); expect(obj).toEqual({ version: 4, history: [ { id: '1', name: 'conversation 1', messages: [ { role: 'user', content: "what's up ?", }, { role: 'assistant', content: 'Hi', }, ], model: OpenAIModels[OpenAIModelID.GPT_3_5], prompt: DEFAULT_SYSTEM_PROMPT, temperature: DEFAULT_TEMPERATURE, folderId: null, }, ], folders: [ { id: '1', name: 'folder 1', type: 'chat', }, ], prompts: [ { id: '1', name: 'prompt 1', description: '', content: '', model: OpenAIModels[OpenAIModelID.GPT_3_5], folderId: null, }, ], }); }); }); }); ================================================ FILE: ui/components/Buttons/SidebarActionButton/SidebarActionButton.tsx ================================================ import { MouseEventHandler, ReactElement } from 'react'; interface Props { handleClick: MouseEventHandler; children: ReactElement; } const SidebarActionButton = ({ handleClick, children }: Props) => ( ); export default SidebarActionButton; ================================================ FILE: ui/components/Buttons/SidebarActionButton/index.ts ================================================ export { default } from './SidebarActionButton'; ================================================ FILE: ui/components/Chat/Chat.tsx ================================================ import { IconClearAll, IconSettings } from '@tabler/icons-react'; import { MutableRefObject, memo, useCallback, useContext, useEffect, useRef, useState, } from 'react'; import toast from 'react-hot-toast'; import { useTranslation } from 'next-i18next'; import { getEndpoint } from '@/utils/app/api'; import { saveConversation, saveConversations, updateConversation, } from '@/utils/app/conversation'; import { throttle } from '@/utils/data/throttle'; import { ChatBody, Conversation, Message } from '@/types/chat'; import { Plugin } from '@/types/plugin'; import HomeContext from '@/pages/api/home/home.context'; import Spinner from '../Spinner'; import { ChatInput } from './ChatInput'; import { ChatLoader } from './ChatLoader'; import { ErrorMessageDiv } from './ErrorMessageDiv'; import { ModelSelect } from './ModelSelect'; import { SystemPrompt } from './SystemPrompt'; import { TemperatureSlider } from './Temperature'; import { MemoizedChatMessage } from './MemoizedChatMessage'; interface Props { stopConversationRef: MutableRefObject; } export const Chat = memo(({ stopConversationRef }: Props) => { const { t } = useTranslation('chat'); const { state: { selectedConversation, conversations, models, apiKey, pluginKeys, serverSideApiKeyIsSet, messageIsStreaming, modelError, loading, prompts, }, handleUpdateConversation, dispatch: homeDispatch, } = useContext(HomeContext); const [currentMessage, setCurrentMessage] = useState(); const [autoScrollEnabled, setAutoScrollEnabled] = useState(true); const [showSettings, setShowSettings] = useState(false); const [showScrollDownButton, setShowScrollDownButton] = useState(false); const messagesEndRef = useRef(null); const chatContainerRef = useRef(null); const textareaRef = useRef(null); const handleSend = useCallback( async (message: Message, deleteCount = 0, plugin: Plugin | null = null) => { if (selectedConversation) { let updatedConversation: Conversation; if (deleteCount) { const updatedMessages = [...selectedConversation.messages]; for (let i = 0; i < deleteCount; i++) { updatedMessages.pop(); } updatedConversation = { ...selectedConversation, messages: [...updatedMessages, message], }; } else { updatedConversation = { ...selectedConversation, messages: [...selectedConversation.messages, message], }; } homeDispatch({ field: 'selectedConversation', value: updatedConversation, }); homeDispatch({ field: 'loading', value: true }); homeDispatch({ field: 'messageIsStreaming', value: true }); const chatBody: ChatBody = { model: updatedConversation.model, messages: updatedConversation.messages, key: apiKey, prompt: updatedConversation.prompt, temperature: updatedConversation.temperature, }; const endpoint = getEndpoint(plugin); let body; if (!plugin) { body = JSON.stringify(chatBody); } else { body = JSON.stringify({ ...chatBody, googleAPIKey: pluginKeys .find((key) => key.pluginId === 'google-search') ?.requiredKeys.find((key) => key.key === 'GOOGLE_API_KEY')?.value, googleCSEId: pluginKeys .find((key) => key.pluginId === 'google-search') ?.requiredKeys.find((key) => key.key === 'GOOGLE_CSE_ID')?.value, }); } const controller = new AbortController(); const response = await fetch(endpoint, { method: 'POST', headers: { 'Content-Type': 'application/json', }, signal: controller.signal, body, }); if (!response.ok) { homeDispatch({ field: 'loading', value: false }); homeDispatch({ field: 'messageIsStreaming', value: false }); toast.error(response.statusText); return; } const data = response.body; if (!data) { homeDispatch({ field: 'loading', value: false }); homeDispatch({ field: 'messageIsStreaming', value: false }); return; } if (!plugin) { if (updatedConversation.messages.length === 1) { const { content } = message; const customName = content.length > 30 ? content.substring(0, 30) + '...' : content; updatedConversation = { ...updatedConversation, name: customName, }; } homeDispatch({ field: 'loading', value: false }); const reader = data.getReader(); const decoder = new TextDecoder(); let done = false; let isFirst = true; let text = ''; while (!done) { if (stopConversationRef.current === true) { controller.abort(); done = true; break; } const { value, done: doneReading } = await reader.read(); done = doneReading; const chunkValue = decoder.decode(value); text += chunkValue; if (isFirst) { isFirst = false; const updatedMessages: Message[] = [ ...updatedConversation.messages, { role: 'assistant', content: chunkValue }, ]; updatedConversation = { ...updatedConversation, messages: updatedMessages, }; homeDispatch({ field: 'selectedConversation', value: updatedConversation, }); } else { const updatedMessages: Message[] = updatedConversation.messages.map((message, index) => { if (index === updatedConversation.messages.length - 1) { return { ...message, content: text, }; } return message; }); updatedConversation = { ...updatedConversation, messages: updatedMessages, }; homeDispatch({ field: 'selectedConversation', value: updatedConversation, }); } } saveConversation(updatedConversation); const updatedConversations: Conversation[] = conversations.map( (conversation) => { if (conversation.id === selectedConversation.id) { return updatedConversation; } return conversation; }, ); if (updatedConversations.length === 0) { updatedConversations.push(updatedConversation); } homeDispatch({ field: 'conversations', value: updatedConversations }); saveConversations(updatedConversations); homeDispatch({ field: 'messageIsStreaming', value: false }); } else { const { answer } = await response.json(); const updatedMessages: Message[] = [ ...updatedConversation.messages, { role: 'assistant', content: answer }, ]; updatedConversation = { ...updatedConversation, messages: updatedMessages, }; homeDispatch({ field: 'selectedConversation', value: updateConversation, }); saveConversation(updatedConversation); const updatedConversations: Conversation[] = conversations.map( (conversation) => { if (conversation.id === selectedConversation.id) { return updatedConversation; } return conversation; }, ); if (updatedConversations.length === 0) { updatedConversations.push(updatedConversation); } homeDispatch({ field: 'conversations', value: updatedConversations }); saveConversations(updatedConversations); homeDispatch({ field: 'loading', value: false }); homeDispatch({ field: 'messageIsStreaming', value: false }); } } }, [ apiKey, conversations, pluginKeys, selectedConversation, stopConversationRef, ], ); const scrollToBottom = useCallback(() => { if (autoScrollEnabled) { messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' }); textareaRef.current?.focus(); } }, [autoScrollEnabled]); const handleScroll = () => { if (chatContainerRef.current) { const { scrollTop, scrollHeight, clientHeight } = chatContainerRef.current; const bottomTolerance = 30; if (scrollTop + clientHeight < scrollHeight - bottomTolerance) { setAutoScrollEnabled(false); setShowScrollDownButton(true); } else { setAutoScrollEnabled(true); setShowScrollDownButton(false); } } }; const handleScrollDown = () => { chatContainerRef.current?.scrollTo({ top: chatContainerRef.current.scrollHeight, behavior: 'smooth', }); }; const handleSettings = () => { setShowSettings(!showSettings); }; const onClearAll = () => { if ( confirm(t('Are you sure you want to clear all messages?')) && selectedConversation ) { handleUpdateConversation(selectedConversation, { key: 'messages', value: [], }); } }; const scrollDown = () => { if (autoScrollEnabled) { messagesEndRef.current?.scrollIntoView(true); } }; const throttledScrollDown = throttle(scrollDown, 250); // useEffect(() => { // console.log('currentMessage', currentMessage); // if (currentMessage) { // handleSend(currentMessage); // homeDispatch({ field: 'currentMessage', value: undefined }); // } // }, [currentMessage]); useEffect(() => { throttledScrollDown(); selectedConversation && setCurrentMessage( selectedConversation.messages[selectedConversation.messages.length - 2], ); }, [selectedConversation, throttledScrollDown]); useEffect(() => { const observer = new IntersectionObserver( ([entry]) => { setAutoScrollEnabled(entry.isIntersecting); if (entry.isIntersecting) { textareaRef.current?.focus(); } }, { root: null, threshold: 0.5, }, ); const messagesEndElement = messagesEndRef.current; if (messagesEndElement) { observer.observe(messagesEndElement); } return () => { if (messagesEndElement) { observer.unobserve(messagesEndElement); } }; }, [messagesEndRef]); return (
{!(apiKey || serverSideApiKeyIsSet) ? (
LlamaGPT
LlamaGPT 100% unaffiliated with OpenAI.
LlamaGPT allows you to self-host your own LLM.
) : modelError ? ( ) : ( <>
{selectedConversation?.messages.length === 0 ? ( <>
{models.length === 0 ? (
) : ( 'LlamaGPT' )}
{models.length > 0 && (
handleUpdateConversation(selectedConversation, { key: 'prompt', value: prompt, }) } /> handleUpdateConversation(selectedConversation, { key: 'temperature', value: temperature, }) } />
)}
) : ( <>
{t('Model')}: {selectedConversation?.model.name} | {t('Temp')} : {selectedConversation?.temperature} |
{showSettings && (
)} {selectedConversation?.messages.map((message, index) => ( { setCurrentMessage(editedMessage); // discard edited message and the ones that come after then resend handleSend( editedMessage, selectedConversation?.messages.length - index, ); }} /> ))} {loading && }
)}
{ setCurrentMessage(message); handleSend(message, 0, plugin); }} onScrollDownClick={handleScrollDown} onRegenerate={() => { if (currentMessage) { handleSend(currentMessage, 2, null); } }} showScrollDownButton={showScrollDownButton} /> )}
); }); Chat.displayName = 'Chat'; ================================================ FILE: ui/components/Chat/ChatInput.tsx ================================================ import { IconArrowDown, IconBolt, IconBrandGoogle, IconPlayerStop, IconRepeat, IconSend, } from '@tabler/icons-react'; import { KeyboardEvent, MutableRefObject, useCallback, useContext, useEffect, useRef, useState, } from 'react'; import { useTranslation } from 'next-i18next'; import { Message } from '@/types/chat'; import { Plugin } from '@/types/plugin'; import { Prompt } from '@/types/prompt'; import HomeContext from '@/pages/api/home/home.context'; import { PluginSelect } from './PluginSelect'; import { PromptList } from './PromptList'; import { VariableModal } from './VariableModal'; interface Props { onSend: (message: Message, plugin: Plugin | null) => void; onRegenerate: () => void; onScrollDownClick: () => void; stopConversationRef: MutableRefObject; textareaRef: MutableRefObject; showScrollDownButton: boolean; } export const ChatInput = ({ onSend, onRegenerate, onScrollDownClick, stopConversationRef, textareaRef, showScrollDownButton, }: Props) => { const { t } = useTranslation('chat'); const { state: { selectedConversation, messageIsStreaming, prompts }, dispatch: homeDispatch, } = useContext(HomeContext); const [content, setContent] = useState(); const [isTyping, setIsTyping] = useState(false); const [showPromptList, setShowPromptList] = useState(false); const [activePromptIndex, setActivePromptIndex] = useState(0); const [promptInputValue, setPromptInputValue] = useState(''); const [variables, setVariables] = useState([]); const [isModalVisible, setIsModalVisible] = useState(false); const [showPluginSelect, setShowPluginSelect] = useState(false); const [plugin, setPlugin] = useState(null); const promptListRef = useRef(null); const filteredPrompts = prompts.filter((prompt) => prompt.name.toLowerCase().includes(promptInputValue.toLowerCase()), ); const handleChange = (e: React.ChangeEvent) => { const value = e.target.value; const maxLength = selectedConversation?.model.maxLength; if (maxLength && value.length > maxLength) { alert( t( `Message limit is {{maxLength}} characters. You have entered {{valueLength}} characters.`, { maxLength, valueLength: value.length }, ), ); return; } setContent(value); updatePromptListVisibility(value); }; const handleSend = () => { if (messageIsStreaming) { return; } if (!content) { alert(t('Please enter a message')); return; } onSend({ role: 'user', content }, plugin); setContent(''); setPlugin(null); if (window.innerWidth < 640 && textareaRef && textareaRef.current) { textareaRef.current.blur(); } }; const handleStopConversation = () => { stopConversationRef.current = true; setTimeout(() => { stopConversationRef.current = false; }, 1000); }; const isMobile = () => { const userAgent = typeof window.navigator === 'undefined' ? '' : navigator.userAgent; const mobileRegex = /Android|webOS|iPhone|iPad|iPod|BlackBerry|IEMobile|Opera Mini|Mobile|mobile|CriOS/i; return mobileRegex.test(userAgent); }; const handleInitModal = () => { const selectedPrompt = filteredPrompts[activePromptIndex]; if (selectedPrompt) { setContent((prevContent) => { const newContent = prevContent?.replace( /\/\w*$/, selectedPrompt.content, ); return newContent; }); handlePromptSelect(selectedPrompt); } setShowPromptList(false); }; const handleKeyDown = (e: KeyboardEvent) => { if (showPromptList) { if (e.key === 'ArrowDown') { e.preventDefault(); setActivePromptIndex((prevIndex) => prevIndex < prompts.length - 1 ? prevIndex + 1 : prevIndex, ); } else if (e.key === 'ArrowUp') { e.preventDefault(); setActivePromptIndex((prevIndex) => prevIndex > 0 ? prevIndex - 1 : prevIndex, ); } else if (e.key === 'Tab') { e.preventDefault(); setActivePromptIndex((prevIndex) => prevIndex < prompts.length - 1 ? prevIndex + 1 : 0, ); } else if (e.key === 'Enter') { e.preventDefault(); handleInitModal(); } else if (e.key === 'Escape') { e.preventDefault(); setShowPromptList(false); } else { setActivePromptIndex(0); } } else if (e.key === 'Enter' && !isTyping && !isMobile() && !e.shiftKey) { e.preventDefault(); handleSend(); } else if (e.key === '/' && e.metaKey) { e.preventDefault(); setShowPluginSelect(!showPluginSelect); } }; const parseVariables = (content: string) => { const regex = /{{(.*?)}}/g; const foundVariables = []; let match; while ((match = regex.exec(content)) !== null) { foundVariables.push(match[1]); } return foundVariables; }; const updatePromptListVisibility = useCallback((text: string) => { const match = text.match(/\/\w*$/); if (match) { setShowPromptList(true); setPromptInputValue(match[0].slice(1)); } else { setShowPromptList(false); setPromptInputValue(''); } }, []); const handlePromptSelect = (prompt: Prompt) => { const parsedVariables = parseVariables(prompt.content); setVariables(parsedVariables); if (parsedVariables.length > 0) { setIsModalVisible(true); } else { setContent((prevContent) => { const updatedContent = prevContent?.replace(/\/\w*$/, prompt.content); return updatedContent; }); updatePromptListVisibility(prompt.content); } }; const handleSubmit = (updatedVariables: string[]) => { const newContent = content?.replace(/{{(.*?)}}/g, (match, variable) => { const index = variables.indexOf(variable); return updatedVariables[index]; }); setContent(newContent); if (textareaRef && textareaRef.current) { textareaRef.current.focus(); } }; useEffect(() => { if (promptListRef.current) { promptListRef.current.scrollTop = activePromptIndex * 30; } }, [activePromptIndex]); useEffect(() => { if (textareaRef && textareaRef.current) { textareaRef.current.style.height = 'inherit'; textareaRef.current.style.height = `${textareaRef.current?.scrollHeight}px`; textareaRef.current.style.overflow = `${ textareaRef?.current?.scrollHeight > 400 ? 'auto' : 'hidden' }`; } }, [content]); useEffect(() => { const handleOutsideClick = (e: MouseEvent) => { if ( promptListRef.current && !promptListRef.current.contains(e.target as Node) ) { setShowPromptList(false); } }; window.addEventListener('click', handleOutsideClick); return () => { window.removeEventListener('click', handleOutsideClick); }; }, []); return (
{messageIsStreaming && ( )} {!messageIsStreaming && selectedConversation && selectedConversation.messages.length > 0 && ( )}
{showPluginSelect && (
{ if (e.key === 'Escape') { e.preventDefault(); setShowPluginSelect(false); textareaRef.current?.focus(); } }} onPluginChange={(plugin: Plugin) => { setPlugin(plugin); setShowPluginSelect(false); if (textareaRef && textareaRef.current) { textareaRef.current.focus(); } }} />
)}