Repository: kuvaus/LlamaGPTJ-chat
Branch: main
Commit: e022976f0460
Files: 32
Total size: 196.2 KB
Directory structure:
gitextract_vuj2yh60/
├── .github/
│ └── workflows/
│ ├── cmake-release.yml
│ ├── cmake.yml
│ └── cmake_branch.yml
├── .gitignore
├── .gitmodules
├── CHANGELOG.md
├── CMakeLists.txt
├── LICENSE
├── README.md
├── cmake/
│ └── config.h.in
├── gpt4all-backend/
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── gptj/
│ │ └── placeholder
│ ├── gptj.cpp
│ ├── gptj.h
│ ├── llama/
│ │ └── placeholder
│ ├── llamamodel.cpp
│ ├── llamamodel.h
│ ├── llmodel.h
│ ├── llmodel_c.cpp
│ ├── llmodel_c.h
│ ├── mpt.cpp
│ ├── mpt.h
│ ├── scripts/
│ │ └── convert_mpt_hf_to_ggml.py
│ ├── utils.cpp
│ └── utils.h
├── prompt_template_sample.txt
└── src/
├── CMakeLists.txt
├── chat.cpp
├── header.h
├── parse_json.h
└── utils.h
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/workflows/cmake-release.yml
================================================
name: CMake-release
on:
push:
tags:
- 'v*'
env:
BUILD_TYPE: Release
permissions:
contents: read
actions: write
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- macos-latest
- windows-latest
instructions:
- avx
- avx2
steps:
- uses: actions/checkout@v3
with:
submodules: recursive
- name: Setup MinGW
if: matrix.os == 'windows-latest'
run: |
choco install mingw -y -libwinpthread
echo "C:\ProgramData\chocolatey\lib\mingw\tools\install\mingw64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Configure CMake
run: |
if ("${{ matrix.os }}" -eq "windows-latest") {
$env:PATH += ";C:\ProgramData\chocolatey\lib\mingw\tools\install\mingw64\bin"
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DAVX2=${{ matrix.instructions == 'avx2' && 'ON' || 'OFF' }} -G "MinGW Makefiles"
} else {
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DAVX2=${{ matrix.instructions == 'avx2' && 'ON' || 'OFF' }}
}
shell: pwsh
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Test
working-directory: ${{github.workspace}}/build
run: ctest -C ${{env.BUILD_TYPE}}
- name: Prepare binary
run: |
if ("${{ matrix.instructions }}" -eq "avx"){
if ("${{ matrix.os }}" -eq "windows-latest") {
cp ${{github.workspace}}\build\bin\chat.exe chat.exe
mv chat.exe chat-windows-latest-avx.exe
shasum -a 256 -b chat-windows-latest-avx.exe > shasum-chat-windows-latest-avx.sha256
} else {
cp ${{github.workspace}}/build/bin/chat chat
mv chat chat-${{ matrix.os }}-${{ matrix.instructions }}
shasum -a 256 -b chat-${{ matrix.os }}-${{ matrix.instructions }} > shasum-chat-${{ matrix.os }}-${{ matrix.instructions }}.sha256
}
} else {
if ("${{ matrix.os }}" -eq "windows-latest") {
cp ${{github.workspace}}\build\bin\chat.exe chat.exe
mv chat.exe chat-windows-latest-avx2.exe
shasum -a 256 -b chat-windows-latest-avx2.exe > shasum-chat-windows-latest-avx2.sha256
} else {
cp ${{github.workspace}}/build/bin/chat chat
mv chat chat-${{ matrix.os }}-${{ matrix.instructions }}
shasum -a 256 -b chat-${{ matrix.os }}-${{ matrix.instructions }} > shasum-chat-${{ matrix.os }}-${{ matrix.instructions }}.sha256
}
}
shell: pwsh
- name: Upload binary
uses: actions/upload-artifact@v2
with:
name: chat-${{ matrix.os }}-${{ matrix.instructions }}
path: chat-${{ matrix.os }}-${{ matrix.instructions }}*
- name: Upload shasums
uses: actions/upload-artifact@v2
with:
name: shasum-chat-${{ matrix.os }}-${{ matrix.instructions }}
path: shasum-chat-${{ matrix.os }}-${{ matrix.instructions }}*
release:
needs: build
runs-on: ubuntu-latest
steps:
- name: Create Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.DEPLOY_KEY }}
with:
tag_name: ${{ github.ref }}
release_name: Release ${{ github.ref }}
draft: false
prerelease: false
- name: Download artifacts
uses: actions/download-artifact@v2
with:
path: artifacts
- name: Upload artifacts
uses: softprops/action-gh-release@v1
env:
GITHUB_TOKEN: ${{ secrets.DEPLOY_KEY }}
with:
tag_name: ${{ github.ref_name }}
name: Release ${{ github.ref_name }}
draft: false
prerelease: false
files: |
artifacts/**/*
#
# This part filters the CHANGELOG.md using python
# Then it adds FILTERED_CHANGELOG.md to release notes
#
- name: Checkout repository
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.x
- name: Filter CHANGELOG.md
uses: jannekem/run-python-script-action@v1
with:
script: |
filtered_lines = []
start_processing = False
with open('CHANGELOG.md', 'r') as file:
for line in file:
if line.startswith("#### [v"):
if start_processing:
break
else:
file.readline()
file.readline()
start_processing = True
continue
if start_processing:
filtered_lines.append(line)
with open('FILTERED_CHANGELOG.md', 'w') as file:
file.writelines(filtered_lines)
- name: Generate release notes
uses: softprops/action-gh-release@v1
env:
GITHUB_TOKEN: ${{ secrets.DEPLOY_KEY }}
with:
tag_name: ${{ github.ref_name }}
name: Release ${{ github.ref_name }}
body_path: FILTERED_CHANGELOG.md
draft: false
prerelease: false
================================================
FILE: .github/workflows/cmake.yml
================================================
name: CMake
on:
push:
branches: [ "main" ]
env:
BUILD_TYPE: Release
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
- macos-latest
- windows-latest
instructions:
- avx
- avx2
steps:
- uses: actions/checkout@v3
with:
submodules: recursive
- name: Setup MinGW
if: matrix.os == 'windows-latest'
run: |
choco install mingw -y -libwinpthread
echo "C:\ProgramData\chocolatey\lib\mingw\tools\install\mingw64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Configure CMake
run: |
if ("${{ matrix.os }}" -eq "windows-latest") {
$env:PATH += ";C:\ProgramData\chocolatey\lib\mingw\tools\install\mingw64\bin"
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DAVX2=${{ matrix.instructions == 'avx2' && 'ON' || 'OFF' }} -G "MinGW Makefiles"
} else {
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DAVX2=${{ matrix.instructions == 'avx2' && 'ON' || 'OFF' }}
}
shell: pwsh
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Test
working-directory: ${{github.workspace}}/build
run: ctest -C ${{env.BUILD_TYPE}}
================================================
FILE: .github/workflows/cmake_branch.yml
================================================
name: CMake
on:
push:
branches:
- '*'
- '!main'
env:
BUILD_TYPE: Release
jobs:
build:
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- { os: 'ubuntu-latest', instructions: 'avx' }
- { os: 'ubuntu-latest', instructions: 'avx2' }
- { os: 'macos-latest', instructions: 'avx' }
- { os: 'macos-latest', instructions: 'avx2' }
- { os: 'windows-latest', build: 'msvc', instructions: 'avx' }
- { os: 'windows-latest', build: 'msvc', instructions: 'avx2' }
- { os: 'windows-latest', build: 'mingw', instructions: 'avx' }
- { os: 'windows-latest', build: 'mingw', instructions: 'avx2' }
steps:
- uses: actions/checkout@v3
with:
submodules: recursive
- name: Configure CMake
if: matrix.build == 'msvc'
run: cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
- name: Build
if: matrix.build == 'msvc'
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Test
if: matrix.build == 'msvc'
working-directory: ${{github.workspace}}/build
run: ctest -C ${{env.BUILD_TYPE}}
- name: Prepare binary
if: matrix.build == 'msvc'
run: |
if ("${{ matrix.os }}" -eq "windows-latest") {
cp ${{github.workspace}}\build\bin\Release\chat.exe chat-msvc.exe
mv chat-msvc.exe chat-windows-latest-msvc.exe
}
shell: pwsh
- name: Setup MinGW
if: matrix.os == 'windows-latest'
run: |
choco install mingw -y -libwinpthread
echo "C:\ProgramData\chocolatey\lib\mingw\tools\install\mingw64\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Configure CMake
run: |
if ("${{ matrix.os }}" -eq "windows-latest") {
$env:PATH += ";C:\ProgramData\chocolatey\lib\mingw\tools\install\mingw64\bin"
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DAVX2=${{ matrix.instructions == 'avx2' && 'ON' || 'OFF' }} -G "MinGW Makefiles"
} elseif ("${{ matrix.arch }}" -eq "aarch64") {
} else {
cmake -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DAVX2=${{ matrix.instructions == 'avx2' && 'ON' || 'OFF' }}
}
shell: pwsh
- name: Build
run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
- name: Test
working-directory: ${{github.workspace}}/build
run: ctest -C ${{env.BUILD_TYPE}}
================================================
FILE: .gitignore
================================================
# Folders
build/
tmp/
# Visual Studio Code
.vscode
# MacOS
.DS_Store
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app
.cache
================================================
FILE: .gitmodules
================================================
[submodule "llama.cpp"]
path = gpt4all-backend/llama.cpp
url = https://github.com/manyoso/llama.cpp
#url = https://github.com/ggerganov/llama.cpp
================================================
FILE: CHANGELOG.md
================================================
## Changelog
#### [Upcoming](https://github.com/kuvaus/LlamaGPTJ-chat/compare/v0.3.0...HEAD)
#### [v0.3.0](https://github.com/kuvaus/LlamaGPTJ-chat/releases/tag/v0.3.0)
> 26 June 2023
- Add this [changelog](https://github.com/kuvaus/LlamaGPTJ-chat/blob/main/CHANGELOG.md) :)
- Add sha256 hashes on release so you can verify the binaries
- All binaries are automatically generated with Github actions
- Add signal handling for SIGHUP (macOS, Linux) and CTRL_CLOSE_EVENT (Windows) to fix issue [`#16`](https://github.com/kuvaus/LlamaGPTJ-chat/issues/16)
- This allows you to run chat as a subprocess. The chat subprocess now quits properly if parent app is closed.
- Version information
- Fix segfault on`/help`
#### [v0.2.9](https://github.com/kuvaus/LlamaGPTJ-chat/releases/tag/v0.2.9)
> 22 June 2023
- [Pull request](https://github.com/kuvaus/LlamaGPTJ-chat/pull/18) from [@154pinkchairs](https://github.com/154pinkchairs/) merged. Thanks. :)
- The pull request [`#18`](https://github.com/kuvaus/LlamaGPTJ-chat/pull/18) has the two fixes below:
- Properly handle file paths including tildes [`18e9f36`](https://github.com/kuvaus/LlamaGPTJ-chat/commit/18e9f36)
- Handle buffer allocation errors [`6800dfb`](https://github.com/kuvaus/LlamaGPTJ-chat/commit/6800dfb)
- Better debug mode compilation. May fix issue [`#9`](https://github.com/kuvaus/LlamaGPTJ-chat/issues/9)
#### [v0.2.8](https://github.com/kuvaus/LlamaGPTJ-chat/releases/tag/v0.2.8)
> 16 June 2023
- Adds `--save_dir` option so you can change save directory location
- Default location is `./saves` on the same directory as the chat binary
- See issue [`#13`](https://github.com/kuvaus/LlamaGPTJ-chat/issues/13) for more details
#### [v0.2.7](https://github.com/kuvaus/LlamaGPTJ-chat/releases/tag/v0.2.7)
> 15 June 2023
- Fixes for old macOS.
- Use `-DOLD_MACOS=ON` option when compiling with CMake.
- Tested to compile on High Sierra and Xcode 10
#### [v0.2.6](https://github.com/kuvaus/LlamaGPTJ-chat/releases/tag/v0.2.6)
> 14 June 2023
- You can name saves with `./save NAME` and `./load NAME`
- You can toggle saving and loading off with `--no-saves` flag
#### [v0.2.5](https://github.com/kuvaus/LlamaGPTJ-chat/releases/tag/v0.2.5)
> 13 June 2023
- Save/load state with `./save` and `./load`
- Reset context with `./reset`, help with `./help`
- Makes a `./saves` folder
- Note that a single save can take up to 2Gb
- You can wrap the AI response with tokens using `--b_token` and `--e_token`
- See issue [`#12`](https://github.com/kuvaus/LlamaGPTJ-chat/issues/12) for more details
#### [v0.2.4](https://github.com/kuvaus/LlamaGPTJ-chat/releases/tag/v0.2.4)
> 5 June 2023
- Fix when using json to specify names for logfiles. Fixes issue [`#11`](https://github.com/kuvaus/LlamaGPTJ-chat/issues/11)
#### [v0.2.3](https://github.com/kuvaus/LlamaGPTJ-chat/releases/tag/v0.2.3)
> 4 June 2023
- Fix said ability to reset context... :)
#### [v0.2.2](https://github.com/kuvaus/LlamaGPTJ-chat/releases/tag/v0.2.2)
> 3 June 2023
- Ability to reset context
#### [v0.2.1](https://github.com/kuvaus/LlamaGPTJ-chat/releases/tag/v0.2.1)
> 30 May 2023
- Save and load chat logs
- Use `--save_log` and `--load_log`
- AVX512 option for compilation `-DAVX512=ON`
#### [v0.2.0](https://github.com/kuvaus/LlamaGPTJ-chat/releases/tag/v0.2.0)
> 17 May 2023
- Update gpt4all backend to v0.1.1 [`61a963a`](https://github.com/kuvaus/LlamaGPTJ-chat/commit/61a963a3d220ef157a8504ddde708f33dc2946eb)
- Full Windows Visual Studio compatibility. Finally fixes issue [`#1`](https://github.com/kuvaus/LlamaGPTJ-chat/issues/1)
- Builds from source on aarch64 Linux. Fixes issue [`#3`](https://github.com/kuvaus/LlamaGPTJ-chat/issues/3)
- Full MPT support. Fixes issue [`#4`](https://github.com/kuvaus/LlamaGPTJ-chat/issues/4)
#### v0.1.9
> 16 May 2023
- Code cleaning and reordering
- `llmodel_create_model()` function
#### v0.1.8
> 13 May 2023
- Add support for MPT models
- Uses [gpt4all-backend](https://github.com/nomic-ai/gpt4all)
#### v0.1.7
> 12 May 2023
- First [pull request](https://github.com/kuvaus/LlamaGPTJ-chat/pull/2) from [@itz-coffee](https://github.com/itz-coffee/) merged. Thanks. :)
- The pull request [`#2`](https://github.com/kuvaus/LlamaGPTJ-chat/pull/2) adds the feature below:
- Add --no-animation flag [`fdc2ac3`](https///github.com/kuvaus/LlamaGPTJ-chat/commit/fdc2ac3)
- Support for old macOS
#### v0.1.6
> 4 May 2023
- Parse parameters from json files
- Use `-j FNAME` or`--load_json FNAME`
#### v0.1.5
> 3 May 2023
- MinGW compilation on Windows
#### v0.1.4
> 1 May 2023
- v0.1.4 had no tags
- It was part of `cmake-release.yml` rewrite to enable MinGW [`e7e1ebf`](https://github.com/kuvaus/LlamaGPTJ-chat/commit/e7e1ebf97d696d069bbc0ae7f0ed078739fb6642)
#### v0.1.3
> 1 May 2023
- Add loading of prompt template files
- Use `--load_template` for loading
- See `prompt_template_sample.txt` for a sample
#### v0.1.2
> 30 April 2023
- Automatic memory handling for the model
#### v0.1.1
> 29 April 2023
- Windows compilation fixes
#### v0.1.0
> 29 April 2023
- Before this, progress was in [GPTJ-chat](https://github.com/kuvaus/GPTJ-chat/) and [Llama-chat](https://github.com/kuvaus/Llama-chat/)
- First version
================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required (VERSION 3.2)
if(APPLE)
option(OLD_MACOS "Using old macos" OFF)
option(BUILD_UNIVERSAL "Build a Universal binary on macOS" ON)
if(BUILD_UNIVERSAL AND NOT OLD_MACOS)
# Build a Universal binary on macOS
set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64" CACHE STRING "" FORCE)
else()
# Build for the host architecture on macOS
set(CMAKE_OSX_ARCHITECTURES "${CMAKE_HOST_SYSTEM_PROCESSOR}" CACHE STRING "" FORCE)
endif()
if (OLD_MACOS)
add_definitions(-DOLD_MACOS)
endif()
endif()
project(LlamaGPTJ-chat)
set(VERSION_MAJOR 0)
set(VERSION_MINOR 3)
set(VERSION_PATCH 0)
set(VERSION "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}")
set(CMAKE_EXPORT_COMPILE_COMMANDS "on")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
set(LLAMA_STANDALONE ON)
else()
set(LLAMA_STANDALONE OFF)
endif()
# options
option(AVX2 "enable AVX2" ON)
option(AVX512 "enable AVX512" OFF)
option(LLAMA_AVX "llama: enable AVX" ON)
option(LLAMA_AVX2 "llama: enable AVX2" ${AVX2})
option(LLAMA_AVX512 "llama: enable AVX512" ${AVX512})
option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" ${AVX512})
option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" ${AVX512})
option(LLAMA_FMA "llama: enable FMA" ${AVX2})
# sanitizers
#set(LLAMA_BUILD_EXAMPLES ON CACHE BOOL "llama: build examples" FORCE)
if(APPLE)
elseif(UNIX)
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
option(AVX2 "enable AVX2" OFF)
option(LLAMA_AVX "llama: enable AVX" OFF)
option(LLAMA_AVX2 "llama: enable AVX2" OFF)
option(LLAMA_AVX512 "llama: enable AVX512" OFF)
option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF)
option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF)
set(BUILD_SHARED_LIBS ON FORCE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -mno-outline-atomics")
endif()
endif()
if (GGML_SANITIZE_THREAD)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
endif()
if (GGML_SANITIZE_ADDRESS)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
endif()
if (GGML_SANITIZE_UNDEFINED)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
endif()
if (AVX512)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512vl")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512vl")
endif()
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffast-math")
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native")
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=native")
# dependencies
set(CMAKE_C_STANDARD 17)
set(CMAKE_CXX_STANDARD 20)
find_package(Threads REQUIRED)
# main
# Include static libs for compatibility:
if(APPLE)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-search_paths_first -lSystem")
elseif(UNIX)
if(NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++ -static")
endif()
elseif(WIN32)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++ -static")
endif()
# Generate a header file with the version number
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/config.h.in"
"${CMAKE_CURRENT_BINARY_DIR}/config.h"
)
# Include the binary directory for the generated header file
include_directories("${CMAKE_CURRENT_BINARY_DIR}")
add_subdirectory(gpt4all-backend/llama.cpp)
add_subdirectory(gpt4all-backend)
add_subdirectory(src)
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2023 Jukka Maatta
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
[](https://github.com/kuvaus/LlamaGPTJ-chat/actions/workflows/cmake.yml)
# LlamaGPTJ-chat
Simple command line chat program for [GPT-J](https://en.wikipedia.org/wiki/GPT-J), [LLaMA](https://en.wikipedia.org/wiki/LLaMA) and [MPT](https://www.mosaicml.com/blog/mpt-7b) models written in C++. Based on [llama.cpp](https://github.com/ggerganov/llama.cpp) and uses [gpt4all-backend](https://github.com/nomic-ai/gpt4all) for full compatibility.
> **Warning**
> Very early progress, might have bugs
# Table of contents
* [Installation](#installation)
* [Usage](#usage)
* [GPT-J, LLaMA, and MPT models](#gpt-j-llama-and-mpt-models)
* [Detailed command list](#detailed-command-list)
* [Useful features](#useful-features)
* [License](#license)
## Installation
Since the program is made using c++ it should build and run on most Linux, MacOS and Windows systems. The [Releases](https://github.com/kuvaus/LlamaGPTJ-chat/releases) link has ready-made binaries. AVX2 is faster and works on most newer computers. If you run the program, it will check and print if your computer has AVX2 support.
### Download
```sh
git clone --recurse-submodules https://github.com/kuvaus/LlamaGPTJ-chat
cd LlamaGPTJ-chat
```
You need to also download a model file, see [supported models](#gpt-j-llama-and-mpt-models) for details and links.
### Build
Since the program is made using c++ it should build and run on most Linux, MacOS and Windows systems.
On most systems, you only need this to build:
```sh
mkdir build
cd build
cmake ..
cmake --build . --parallel
```
> **Note**
>
> If you have an old processor, you can turn AVX2 instructions OFF in the build step with `-DAVX2=OFF` flag.
>
> If you have a new processor, you can turn AVX512 instructions ON in the build step with `-DAVX512=ON` flag.
>
> On old macOS, set `-DBUILD_UNIVERSAL=OFF` to make the build x86 only instead of the universal Intel/ARM64 binary.
> On really old macOS, set `-DOLD_MACOS=ON`. This disables `/save` and `/load` but compiles on old Xcode.
>
> On Windows you can now use Visual Studio (MSVC) or MinGW. If you want MinGW build instead, set `-G "MinGW Makefiles"`.
>
> On ARM64 Linux there are no ready-made binaries, but you can now build it from source.
## Usage
After compiling, the binary is located at:
```sh
build/bin/chat
```
But you're free to move it anywhere. Simple command for 4 threads to get started:
```sh
./chat -m "/path/to/modelfile/ggml-vicuna-13b-1.1-q4_2.bin" -t 4
```
or
```sh
./chat -m "/path/to/modelfile/ggml-gpt4all-j-v1.3-groovy.bin" -t 4
```
Happy chatting!
## GPT-J, LLaMA, and MPT models
Current backend supports the GPT-J, LLaMA and MPT models.
### GPT-J model
You need to download a GPT-J model first. Here are direct links to models:
>- The default version is **v1.0**: [ggml-gpt4all-j.bin](https://gpt4all.io/models/ggml-gpt4all-j.bin)
>- At the time of writing the newest is **1.3-groovy**: [ggml-gpt4all-j-v1.3-groovy.bin](https://gpt4all.io/models/ggml-gpt4all-j-v1.3-groovy.bin)
They're around 3.8 Gb each. The chat program stores the model in RAM on runtime so you need enough memory to run. You can get more details on GPT-J models from [gpt4all.io](https://gpt4all.io/) or [nomic-ai/gpt4all](https://github.com/nomic-ai/gpt4all) github.
### LLaMA model
Alternatively you need to download a LLaMA model first. The original weights are for research purposes and you can apply for access [here](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/). Below are direct links to derived models:
>- Vicuna 7b **v1.1**: [ggml-vicuna-7b-1.1-q4_2.bin](https://gpt4all.io/models/ggml-vicuna-7b-1.1-q4_2.bin)
>- Vicuna 13b **v1.1**: [ggml-vicuna-13b-1.1-q4_2.bin](https://gpt4all.io/models/ggml-vicuna-13b-1.1-q4_2.bin)
>- GPT-4-All **l13b-snoozy**: [ggml-gpt4all-l13b-snoozy.bin](https://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin)
The LLaMA models are quite large: the 7B parameter versions are around 4.2 Gb and 13B parameter 8.2 Gb each. The chat program stores the model in RAM on runtime so you need enough memory to run. You can get more details on LLaMA models from the [whitepaper](https://arxiv.org/abs/2302.13971) or META AI [website](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/).
### MPT model
You can also download and use an MPT model instead. Here are direct links to MPT-7B models:
>- MPT-7B base model pre-trained by Mosaic ML: [ggml-mpt-7b-base.bin](https://gpt4all.io/models/ggml-mpt-7b-base.bin)
>- MPT-7B instruct model trained by Mosaic ML: [ggml-mpt-7b-instruct.bin](https://gpt4all.io/models/ggml-mpt-7b-instruct.bin)
>- Non-commercial MPT-7B chat model trained by Mosaic ML: [ggml-mpt-7b-chat.bin](https://gpt4all.io/models/ggml-mpt-7b-chat.bin)
They're around 4.9 Gb each. The chat program stores the model in RAM on runtime so you need enough memory to run. You can get more details on MPT models from MosaicML [website](https://www.mosaicml.com/blog/mpt-7b) or [mosaicml/llm-foundry](https://github.com/mosaicml/llm-foundry) github.
## Detailed command list
You can view the help and full parameter list with:
`
./chat -h
`
```sh
usage: ./bin/chat [options]
A simple chat program for GPT-J, LLaMA, and MPT models.
You can set specific initial prompt with the -p flag.
Runs default in interactive and continuous mode.
Type '/reset' to reset the chat context.
Type '/save','/load' to save network state into a binary file.
Type '/save NAME','/load NAME' to rename saves. Default: --save_name NAME.
Type '/help' to show this help dialog.
Type 'quit', 'exit' or, 'Ctrl+C' to quit.
options:
-h, --help show this help message and exit
-v, --version show version and license information
--run-once disable continuous mode
--no-interactive disable interactive mode altogether (uses given prompt only)
--no-animation disable chat animation
--no-saves disable '/save','/load' functionality
-s SEED, --seed SEED RNG seed for --random-prompt (default: -1)
-t N, --threads N number of threads to use during computation (default: 4)
-p PROMPT, --prompt PROMPT
prompt to start generation with (default: empty)
--random-prompt start with a randomized prompt.
-n N, --n_predict N number of tokens to predict (default: 200)
--top_k N top-k sampling (default: 40)
--top_p N top-p sampling (default: 0.9)
--temp N temperature (default: 0.9)
--n_ctx N number of tokens in context window (default: 0)
-b N, --batch_size N batch size for prompt processing (default: 20)
--repeat_penalty N repeat_penalty (default: 1.1)
--repeat_last_n N last n tokens to penalize (default: 64)
--context_erase N percent of context to erase (default: 0.8)
--b_token optional beginning wrap token for response (default: empty)
--e_token optional end wrap token for response (default: empty)
-j, --load_json FNAME
load options instead from json at FNAME (default: empty/no)
--load_template FNAME
load prompt template from a txt file at FNAME (default: empty/no)
--save_log FNAME
save chat log to a file at FNAME (default: empty/no)
--load_log FNAME
load chat log from a file at FNAME (default: empty/no)
--save_dir DIR
directory for saves (default: ./saves)
--save_name NAME
save/load model state binary at save_dir/NAME.bin (current: model_state)
context is saved to save_dir/NAME.ctx (current: model_state)
-m FNAME, --model FNAME
model path (current: ./models/ggml-vicuna-13b-1.1-q4_2.bin)
```
## Useful features
Here are some handy features and details on how to achieve them using command line options.
### Save/load chat log and read output from other apps
By default, the program prints the chat to standard (stdout) output, so if you're including the program into your app, it only needs to read stdout. You can also save the whole chat log to a text file with `--save_log` option. There's an elementary way to remember your past conversation by simply loading the saved chat log with `--load_log` option when you start a new session.
### Run the program once without user interaction
If you only need the program to run once without any user interactions, one way is to set prompt with `-p "prompt"` and using `--no-interactive` and `--no-animation` flags. The program will read the prompt, print the answer, and close.
### Add AI personalities and characters
If you want a personality for your AI, you can change `prompt_template_sample.txt` and use `--load_template` to load the modified file. The only constant is that your input during chat will be put on the `%1` line. Instructions, prompt, response, and everything else can be replaced any way you want. Having different `personality_template.txt` files is an easy way to add different AI characters. With _some_ models, giving both AI and user names instead of `Prompt:` and `Response:`, can make the conversation flow more naturally as the AI tries to mimic a conversation between two people.
### Ability to reset chat context
You can reset the chat at any time during chatting by typing `/reset` in the input field. This will clear the AI's memory of past conversations, logits, and tokens. You can then start the chat from a blank slate without having to reload the whole model again.
### Load all parameters using JSON
You can also fetch parameters from a json file with `--load_json "/path/to/file.json"` flag. Different models might perform better or worse with different input parameters so using json files is a handy way to store and load all the settings at once. The JSON file loader is designed to be simple in order to prevent any external dependencies, and as a result, the JSON file must follow a specific format. Here is a simple example:
```javascript
{"top_p": 1.0, "top_k": 50400, "temp": 0.9, "n_batch": 9}
```
This is useful when you want to store different temperature and sampling settings.
And a more detailed one:
```javascript
{
"top_p": 1.0,
"top_k": 50400,
"temp": 0.9,
"n_batch": 20,
"threads": 12,
"prompt": "Once upon a time",
"load_template": "/path/to/prompt_template_sample.txt",
"model": "/path/to/ggml-gpt4all-j-v1.3-groovy.bin",
"no-interactive": "true"
}
```
This one loads the prompt from the json, uses a specific template, and runs the program once in no-interactive mode so user does not have to press any input.
## License
This project is licensed under the MIT [License](https://github.com/kuvaus/LlamaGPTJ-chat/blob/main/LICENSE)
================================================
FILE: cmake/config.h.in
================================================
#ifndef CONFIG_H
#define CONFIG_H
#define VERSION "@VERSION_MAJOR@" "." "@VERSION_MINOR@" "." "@VERSION_PATCH@"
#endif // CONFIG_H
================================================
FILE: gpt4all-backend/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.16)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
#if(APPLE)
# option(BUILD_UNIVERSAL "Build a Universal binary on macOS" ON)
# if(BUILD_UNIVERSAL)
# # Build a Universal binary on macOS
# # This requires that the found Qt library is compiled as Universal binaries.
# set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64" CACHE STRING "" FORCE)
# else()
# # Build for the host architecture on macOS
# set(CMAKE_OSX_ARCHITECTURES "${CMAKE_HOST_SYSTEM_PROCESSOR}" CACHE STRING "" FORCE)
# endif()
#endif()
# Include the binary directory for the generated header file
#include_directories("${CMAKE_CURRENT_BINARY_DIR}")
#set(LLMODEL_VERSION_MAJOR 0)
#set(LLMODEL_VERSION_MINOR 1)
#set(LLMODEL_VERSION_PATCH 1)
#set(LLMODEL_VERSION "${LLMODEL_VERSION_MAJOR}.${LLMODEL_VERSION_MINOR}.${LLMODEL_VERSION_PATCH}")
#project(llmodel VERSION ${LLMODEL_VERSION} LANGUAGES CXX C)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
#set(LLAMA_BUILD_EXAMPLES ON CACHE BOOL "llama: build examples" FORCE)
#set(BUILD_SHARED_LIBS ON FORCE)
set(CMAKE_VERBOSE_MAKEFILE ON)
if (GPT4ALL_AVX_ONLY)
set(LLAMA_AVX2 OFF CACHE BOOL "llama: enable AVX2" FORCE)
set(LLAMA_F16C OFF CACHE BOOL "llama: enable F16C" FORCE)
set(LLAMA_FMA OFF CACHE BOOL "llama: enable FMA" FORCE)
endif()
#add_subdirectory(llama.cpp)
add_library(llmodel
gptj.h gptj.cpp
llamamodel.h llamamodel.cpp
llama.cpp/examples/common.cpp
llmodel.h llmodel_c.h llmodel_c.cpp
mpt.h mpt.cpp
utils.h utils.cpp
)
target_link_libraries(llmodel
PRIVATE llama)
#set_target_properties(llmodel PROPERTIES
# VERSION ${PROJECT_VERSION}
# SOVERSION ${PROJECT_VERSION_MAJOR})
#set(COMPONENT_NAME_MAIN ${PROJECT_NAME})
#set(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/install)
================================================
FILE: gpt4all-backend/README.md
================================================
# GPT4ALL Backend
This directory contains the C/C++ model backend used by GPT4All for inference on the CPU. This backend acts as a universal library/wrapper for all models that the GPT4All ecosystem supports. Language bindings are built on top of this universal library. The native GPT4all Chat application directly uses this library for all inference.
# What models are supported by the GPT4All ecosystem?
Currently, there are three different model architectures that are supported:
1. GPTJ - Based off of the GPT-J architecture with examples found [here](https://huggingface.co/EleutherAI/gpt-j-6b)
2. LLAMA - Based off of the LLAMA architecture with examples found [here](https://huggingface.co/models?sort=downloads&search=llama)
3. MPT - Based off of Mosaic ML's MPT architecture with examples found [here](https://huggingface.co/mosaicml/mpt-7b)
# Why so many different architectures? What differentiates them?
One of the major differences is license. Currently, the LLAMA based models are subject to a non-commercial license, whereas the GPTJ and MPT base models allow commercial usage. In the early advent of the recent explosion of activity in open source local models, the llama models have generally been seen as performing better, but that is changing quickly. Every week - even every day! - new models are released with some of the GPTJ and MPT models competitive in performance/quality with LLAMA. What's more, there are some very nice architectural innovations with the MPT models that could lead to new performance/quality gains.
# How does GPT4All make these models available for CPU inference?
By leveraging the ggml library written by Georgi Gerganov and a growing community of developers. There are currently multiple different versions of this library. The original github repo can be found [here](https://github.com/ggerganov/ggml), but the developer of the library has also created a LLAMA based version [here](https://github.com/ggerganov/llama.cpp). Currently, this backend is using the latter as a submodule.
# Does that mean GPT4All is compatible with all llama.cpp models and vice versa?
Unfortunately, no for three reasons:
1. The upstream [llama.cpp](https://github.com/ggerganov/llama.cpp) project has introduced [a compatibility breaking](https://github.com/ggerganov/llama.cpp/commit/b9fd7eee57df101d4a3e3eabc9fd6c2cb13c9ca1) re-quantization method recently. This is a breaking change that renders all previous models (including the ones that GPT4All uses) inoperative with newer versions of llama.cpp since that change.
2. The GPT4All backend has the llama.cpp submodule specifically pinned to a version prior to this breaking change.
3. The GPT4All backend currently supports MPT based models as an added feature. Neither llama.cpp nor the original ggml repo support this architecture as of this writing, however efforts are underway to make MPT available in the ggml repo which you can follow [here.](https://github.com/ggerganov/ggml/pull/145)
# What is being done to make them more compatible?
A few things. Number one, we are maintaining compatibility with our current model zoo by way of the submodule pinning. However, we are also exploring how we can update to newer versions of llama.cpp without breaking our current models. This might involve an additional magic header check or it could possibly involve keeping the currently pinned submodule and also adding a new submodule with later changes and differienting them with namespaces or some other manner. Investigations continue.
# What about GPU inference?
In newer versions of llama.cpp, there has been some added support for NVIDIA GPU's for inference. We're investigating how to incorporate this into our downloadable installers.
# Ok, so bottom line... how do I make my model on huggingface compatible with GPT4All ecosystem right now?
1. Check to make sure the huggingface model is available in one of our three supported architectures
2. If it is, then you can use the conversion script inside of our pinned llama.cpp submodule for GPTJ and LLAMA based models
3. Or if your model is an MPT model you can use the conversion script located directly in this backend directory under the scripts subdirectory
# Check back for updates as we'll try to keep this updated as things change!
================================================
FILE: gpt4all-backend/gptj/placeholder
================================================
================================================
FILE: gpt4all-backend/gptj.cpp
================================================
#include "gptj.h"
#include "llama.cpp/ggml.h"
#include "utils.h"
#include
#include
#include
#include
#include
#include