Full Code of mlc-ai/web-llm for AI

main 926ab86e275a cached

235 files

821.6 KB

222.2k tokens

629 symbols

1 requests

Download .txt

Showing preview only (883K chars total). Download the full file or copy to clipboard to get everything.

Repository: mlc-ai/web-llm
Branch: main
Commit: 926ab86e275a
Files: 235
Total size: 821.6 KB

Directory structure:
gitextract_o4yltvbb/

├── .github/
│   └── workflows/
│       ├── build-site.yaml
│       ├── build.yaml
│       ├── linter.yaml
│       ├── security.yaml
│       └── tests.yaml
├── .gitignore
├── .gitmodules
├── .husky/
│   └── pre-commit
├── .lintstagedrc.json
├── .nvmrc
├── .prettierignore
├── .prettierrc
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── SECURITY.md
├── cleanup-index-js.sh
├── docs/
│   ├── Makefile
│   ├── README.md
│   ├── conf.py
│   ├── developer/
│   │   ├── add_models.rst
│   │   └── building_from_source.rst
│   ├── index.rst
│   ├── make.bat
│   ├── requirements.txt
│   └── user/
│       ├── advanced_usage.rst
│       ├── api_reference.rst
│       ├── basic_usage.rst
│       └── get_started.rst
├── eslint.config.cjs
├── examples/
│   ├── .gitignore
│   ├── README.md
│   ├── abort-reload/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── get_started.html
│   │       └── get_started.js
│   ├── cache-usage/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── cache_usage.html
│   │       └── cache_usage.ts
│   ├── chrome-extension/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── content.js
│   │       ├── example.html
│   │       ├── manifest.json
│   │       ├── manifest_v2.json
│   │       ├── popup.css
│   │       ├── popup.html
│   │       └── popup.ts
│   ├── chrome-extension-webgpu-service-worker/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── background.ts
│   │       ├── content.js
│   │       ├── example.html
│   │       ├── manifest.json
│   │       ├── popup.css
│   │       ├── popup.html
│   │       └── popup.ts
│   ├── embeddings/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── embeddings.html
│   │       └── embeddings.ts
│   ├── function-calling/
│   │   ├── README.md
│   │   ├── function-calling-manual/
│   │   │   ├── README.md
│   │   │   ├── package.json
│   │   │   └── src/
│   │   │       ├── function_calling_manual.html
│   │   │       └── function_calling_manual.ts
│   │   └── function-calling-openai/
│   │       ├── README.md
│   │       ├── package.json
│   │       └── src/
│   │           ├── function_calling_openai.html
│   │           └── function_calling_openai.ts
│   ├── get-started/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── get_started.html
│   │       └── get_started.ts
│   ├── get-started-latency-breakdown/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── get_started_latency_breakdown.html
│   │       └── get_started_latency_breakdown.ts
│   ├── get-started-web-worker/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── get_started.html
│   │       ├── main.ts
│   │       └── worker.ts
│   ├── json-mode/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── json_mode.html
│   │       └── json_mode.ts
│   ├── json-schema/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── json_schema.html
│   │       └── json_schema.ts
│   ├── logit-processor/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── logit_processor.html
│   │       ├── logit_processor.ts
│   │       ├── my_logit_processor.ts
│   │       └── worker.ts
│   ├── multi-models/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── main.ts
│   │       ├── multi_models.html
│   │       └── worker.ts
│   ├── multi-round-chat/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── multi_round_chat.html
│   │       └── multi_round_chat.ts
│   ├── next-simple-chat/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── next.config.js
│   │   ├── package.json
│   │   ├── postcss.config.js
│   │   ├── src/
│   │   │   ├── pages/
│   │   │   │   ├── _app.tsx
│   │   │   │   ├── _document.tsx
│   │   │   │   ├── api/
│   │   │   │   │   └── hello.ts
│   │   │   │   └── index.tsx
│   │   │   ├── styles/
│   │   │   │   └── globals.css
│   │   │   └── utils/
│   │   │       ├── chat_component.tsx
│   │   │       └── chat_ui.ts
│   │   ├── tailwind.config.js
│   │   └── tsconfig.json
│   ├── qwen3/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── qwen3_example.html
│   │       └── qwen3_example.ts
│   ├── seed-to-reproduce/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── seed.html
│   │       └── seed.ts
│   ├── service-worker/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── index.html
│   │       ├── main.ts
│   │       └── sw.ts
│   ├── simple-chat-js/
│   │   ├── index.css
│   │   ├── index.html
│   │   └── index.js
│   ├── simple-chat-ts/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── gh-config.js
│   │       ├── llm_chat.css
│   │       ├── llm_chat.html
│   │       ├── simple_chat.ts
│   │       └── worker.ts
│   ├── simple-chat-upload/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── gh-config.js
│   │       ├── llm_chat.css
│   │       ├── llm_chat.html
│   │       ├── simple_chat.ts
│   │       └── worker.ts
│   ├── streaming/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── streaming.html
│   │       └── streaming.ts
│   ├── structural-tag-tool-use/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── mcp_structural_tag.html
│   │       └── mcp_structural_tag.ts
│   ├── text-completion/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── text_completion.html
│   │       └── text_completion.ts
│   └── vision-model/
│       ├── README.md
│       ├── package.json
│       └── src/
│           ├── utils.ts
│           ├── vision_model.html
│           ├── vision_model.ts
│           └── worker.ts
├── jest.config.cjs
├── licenses/
│   └── license.openai_node.txt
├── package.json
├── rollup.config.js
├── scripts/
│   ├── gh_deploy_site.sh
│   ├── local_deploy_site.sh
│   ├── prep_deps.sh
│   └── serve_mlc_llm_dist.sh
├── site/
│   ├── .gitignore
│   ├── _config.yml
│   ├── _includes/
│   │   ├── head.html
│   │   └── hero.html
│   ├── assets/
│   │   ├── css/
│   │   │   └── hero.scss
│   │   └── video/
│   │       ├── Code.webm
│   │       └── Pittsburgh.webm
│   └── index.md
├── src/
│   ├── cache_util.ts
│   ├── config.ts
│   ├── conversation.ts
│   ├── embedding.ts
│   ├── engine.ts
│   ├── error.ts
│   ├── extension_service_worker.ts
│   ├── index.ts
│   ├── llm_chat.ts
│   ├── message.ts
│   ├── openai_api_protocols/
│   │   ├── chat_completion.ts
│   │   ├── completion.ts
│   │   ├── embedding.ts
│   │   └── index.ts
│   ├── service_worker.ts
│   ├── support.ts
│   ├── types.ts
│   ├── utils.ts
│   └── web_worker.ts
├── tests/
│   ├── .gitignore
│   ├── cache_util.test.ts
│   ├── constants.ts
│   ├── conversation.test.ts
│   ├── embedding_stats.test.ts
│   ├── engine_integration.test.ts
│   ├── extension_service_worker.test.ts
│   ├── function_calling.test.ts
│   ├── generation_config.test.ts
│   ├── llm_chat_pipeline.test.ts
│   ├── multi_round_chat.test.ts
│   ├── openai_chat_completion.test.ts
│   ├── openai_completion.test.ts
│   ├── openai_embeddings.test.ts
│   ├── scripts/
│   │   └── sanity_checks/
│   │       ├── README.md
│   │       ├── package.json
│   │       ├── sanity_checks.html
│   │       └── sanity_checks.ts
│   ├── service_worker.test.ts
│   ├── util.test.ts
│   └── web_worker_handler.test.ts
├── tsconfig.json
└── utils/
    ├── .gitignore
    └── vram_requirements/
        ├── .gitignore
        ├── README.md
        ├── package.json
        └── src/
            ├── gh-config.js
            ├── vram_requirements.html
            └── vram_requirements.ts

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/build-site.yaml
================================================
name: Build site and push to gh-pages

on:
  push:
    branches:
      - main

jobs:
  build:
    name: Build site
    runs-on: ubuntu-latest

    steps:
    - uses: actions/checkout@v2

    - name: Configuring build Environment
      run: |
        sudo apt-get update
        python -m pip install -U pip

    - name: Setup Ruby
      uses: ruby/setup-ruby@v1
      with:
        ruby-version: '3.0'

    - name: Installing dependencies
      run: |
        python -m pip install -r docs/requirements.txt
        gem install jekyll jekyll-remote-theme jekyll-sass-converter

    - name: Build and deploy site
      if: github.ref == 'refs/heads/main'
      run: |
        git remote set-url origin https://x-access-token:${{ secrets.MLC_GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY
        git config --global user.email "mlc-gh-actions-bot@nomail"
        git config --global user.name "mlc-gh-actions-bot"

        ./scripts/gh_deploy_site.sh

================================================
FILE: .github/workflows/build.yaml
================================================
name: Build

on:
  pull_request:
    branches:
      - main
  push:
    branches:
      - main
  workflow_dispatch:

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

jobs:
  build:
    runs-on: ubuntu-latest
    timeout-minutes: 10

    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version-file: ".nvmrc"
          cache: npm

      - name: Install dependencies
        run: npm ci

      - name: Build package
        run: npm run build

      - name: Validate package contents
        run: npm pack --dry-run


================================================
FILE: .github/workflows/linter.yaml
================================================
name: Linter

on:
  push:
    branches:
      - main
  pull_request:
    branches:
      - main

jobs:
  lint:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v3

      - name: Set up Node.js
        uses: actions/setup-node@v3
        with:
          node-version-file: ".nvmrc"

      - name: Install dependencies
        run: npm install

      - name: Run lint
        run: npm run lint


================================================
FILE: .github/workflows/security.yaml
================================================
name: Security

on:
  pull_request:
    branches:
      - main
  push:
    branches:
      - main
  schedule:
    - cron: "25 5 * * 1"
  workflow_dispatch:

permissions:
  contents: read

jobs:
  dependency-review:
    if: github.event_name == 'pull_request'
    runs-on: ubuntu-latest
    permissions:
      contents: read
      pull-requests: read

    steps:
      - name: Dependency review
        uses: actions/dependency-review-action@v4
        with:
          fail-on-severity: high

  npm-audit:
    runs-on: ubuntu-latest
    timeout-minutes: 20

    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version-file: ".nvmrc"
          cache: npm

      - name: Install dependencies
        run: npm ci

      - name: Run npm audit (production dependencies)
        run: npm audit --omit=dev --audit-level=high

  codeql:
    if: github.event_name != 'pull_request'
    runs-on: ubuntu-latest
    timeout-minutes: 30
    permissions:
      actions: read
      contents: read
      security-events: write

    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Initialize CodeQL
        uses: github/codeql-action/init@v3
        with:
          languages: javascript-typescript

      - name: Autobuild
        uses: github/codeql-action/autobuild@v3

      - name: Analyze
        uses: github/codeql-action/analyze@v3


================================================
FILE: .github/workflows/tests.yaml
================================================
name: Tests

on:
  pull_request:
    branches:
      - main
  push:
    branches:
      - main
  workflow_dispatch:

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

jobs:
  test:
    runs-on: ubuntu-latest
    timeout-minutes: 10

    steps:
      - name: Checkout code
        uses: actions/checkout@v4

      - name: Set up Node.js
        uses: actions/setup-node@v4
        with:
          node-version-file: ".nvmrc"
          cache: npm

      - name: Install dependencies
        run: npm ci

      - name: Run test suite
        env:
          CI: "true"
        run: npm run test -- --ci

      - name: Upload coverage artifact
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: coverage-${{ github.run_id }}
          path: coverage
          if-no-files-found: ignore


================================================
FILE: .gitignore
================================================
scratch/
dist/
params/
*.bak
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

*.S
# C extensions
*.so


*.ll
.npm
# Distribution / packaging
.Python
env/
build/
build-*/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

.conda/
# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Generated by python/gen_requirements.py
python/requirements/*.txt

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/
docs/_staging/

# PyBuilder
target/
/target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject
*~
*.pyc
*~
config.mk
config.cmake
Win32
*.dir
perf
*.wasm
.emscripten

## IOS
DerivedData/

## Java
*.class
jvm/*/target/
jvm/*/*/target/
jvm/native/*/generated
jvm/native/src/main/native/org_apache_tvm_native_c_api.h
*.worksheet
*.idea
*.iml
*.classpath
*.project
*.settings
*/node_modules/

## Various settings
*.pbxuser
!default.pbxuser
*.mode1v3
!default.mode1v3
*.mode2v3
!default.mode2v3
*.perspectivev3
!default.perspectivev3
xcuserdata/
.pkl_memoize_*

.emscripten*
.m2

# Compiled Dynamic libraries
*.so
*.dylib
*.dll

# Compiled Object files
*.slo
*.lo
*.o
*.obj

# Precompiled Headers
*.gch
*.pch

# Compiled Static libraries
*.lai
*.la
*.a
*.lib

# Executables
*.exe
*.out
*.app

## Other
*.moved-aside
*.xccheckout
*.xcscmblueprint
.DS_Store
tags
cscope*
*.lock

# vim temporary files
*.swp
*.swo

# TVM generated code
perf
.bash_history
# *.json
*.params
*.ro
*.onnx
*.h5
synset.txt
cat.jpg
cat.png
docs.tgz
cat.png
*.mlmodel
tvm_u.*
tvm_t.*
# Mac OS X
.DS_Store

# Jetbrain
.idea
.ipython
.jupyter
.nv
.pylint.d
.python_history
.pytest_cache
.local
cmake-build-debug

# Visual Studio
.vs

# Visual Studio Code
.vscode

# tmp file
.nfs*

# keys
*.pem
*.p12
*.pfx
*.cer
*.crt
*.der

# patch sentinel
patched.txt

# Python type checking
.mypy_cache/
.pyre/

# pipenv files
Pipfile
Pipfile.lock

# conda package artifacts
conda/Dockerfile.cuda*
conda/pkg
.node_repl_history
# nix files
.envrc
*.nix

# Docker files
.sudo_as_admin_successful

# Downloaded models/datasets
.tvm_test_data
.dgl
.caffe2

# Local docs build
_docs/
jvm/target
.config/configstore/
.ci-py-scripts/

# Generated Hexagon files
src/runtime/hexagon/rpc/hexagon_rpc.h
src/runtime/hexagon/rpc/hexagon_rpc_skel.c
src/runtime/hexagon/rpc/hexagon_rpc_stub.c

# Local tvm-site checkout
tvm-site/

# Generated docs files
gallery/how_to/work_with_microtvm/micro_tvmc.py

# Test sample data files
!tests/python/ci/sample_prs/*.json

# Used in CI to communicate between Python and Jenkins
.docker-image-names/

# Printed TIR code on disk
*.tir

# GDB history file
.gdb_history

3rdparty
dist
tvm_home
node_modules
lib
.parcel-cache

**/.next
coverage

================================================
FILE: .gitmodules
================================================


================================================
FILE: .husky/pre-commit
================================================
npx lint-staged


================================================
FILE: .lintstagedrc.json
================================================
{
  "./**/*.{js,ts,jsx,tsx,json}": ["eslint --fix", "prettier --write"]
}


================================================
FILE: .nvmrc
================================================
v24.11.1

================================================
FILE: .prettierignore
================================================
dist
debug
lib
build
node_modules
3rdparty
.eslintrc.cjs
**/.next

================================================
FILE: .prettierrc
================================================
{
  "trailingComma": "all"
}


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to WebLLM

Thank you for your interest in contributing to WebLLM. This guide helps contributors get set up quickly and make high-impact changes that are easy to review and merge.

## Ways To Contribute

We welcome contributions across the project, including:

- Bug reports with clear reproduction steps
- Bug fixes and reliability improvements
- New features and API improvements
- Performance and memory optimizations
- Tests and test coverage improvements
- Documentation updates and tutorials
- New or improved examples in `examples/`
- Model integration and configuration improvements
- Code review and issue triage support

If you are unsure where to start, look for open issues in the repository and propose a plan in the issue thread before implementation.

## Community Principles

WebLLM is part of a broader open-source ecosystem and follows collaborative, public-first development norms.

- Keep technical discussion in public, archivable channels (issues and pull requests)
- Use clear technical reasoning and seek consensus on non-trivial changes
- For major design changes, start with an issue or RFC-style proposal before coding
- Review other contributors' PRs when possible

Additional reference: Apache TVM community guidelines

- https://tvm.apache.org/docs/contribute/community.html

## Development Setup

### Prerequisites

- Node.js (see `.nvmrc` for the required version)
- npm
- Git

Optional:

- Python 3 (for docs build)
- Emscripten/toolchain setup

### Local Setup

```bash
git clone https://github.com/mlc-ai/web-llm.git
cd web-llm
npm install
```

### Build, Lint, and Test

```bash
npm run build
npm run lint
npm test
```

Notes:

- `npm test` runs Jest with coverage thresholds.
- For quick iteration on a single test file, you can run:

```bash
npx jest --coverage=false tests/<file>.test.ts
```

### Auto-formatting

If lint or style checks fail, run:

```bash
npm run format
```

Pre-commit hooks (Husky + lint-staged) are configured in this repo.

## Testing Changes In Examples

To test local package changes inside an example app:

1. Edit `examples/<example>/package.json` and set `"@mlc-ai/web-llm"` to `"../.."` (or `"file:../.."` if needed).
2. Install and run the example.

```bash
cd examples/<example>
npm install
npm run start
```

## Documentation Contributions

Docs are in `docs/` and built with Sphinx.

```bash
cd docs
pip3 install -r requirements.txt
make html
```

Open the built docs from `docs/_build/html`.

## Pull Request Guidelines

Before opening a PR:

1. Keep the change scoped to one problem or feature.
2. Add or update tests for behavior changes.
3. Update docs/examples for user-facing changes.
4. Run `npm run lint` and `npm test` locally.
5. Include a clear PR description with:
   - Problem statement
   - Proposed solution
   - Validation steps and results
   - Backward-compatibility considerations

During review:

- Respond to comments with concrete follow-ups
- Prefer additional tests over assumptions
- Keep commit history understandable (small, logical commits)

## Reporting Bugs and Requesting Features

- Use GitHub Issues for bug reports and feature requests.
- Include environment details, expected vs. actual behavior, and minimal reproduction steps.
- For substantial feature additions, open an issue first to align on design and scope.

## Security Reporting

Please do not report security vulnerabilities in public issues. Report vulnerabilities via email to `mlc-llm-private@googlegroups.com`.

Reference:

- https://github.com/mlc-ai/web-llm/blob/main/SECURITY.md

## License

By contributing, you agree that your contributions are provided under the repository's Apache-2.0 license.


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.

------------------------------------------------------------------------------------
This product bundles various third-party components under other open source licenses.
This section summarizes those components and their licenses. See licenses/
for text of these licenses.

Apache Software Foundation License 2.0
--------------------------------------

src/openai_api_protocols


================================================
FILE: README.md
================================================
<div align="center" id="top">

# WebLLM
[![NPM Package](https://img.shields.io/badge/NPM_Package-Published-cc3534)](https://www.npmjs.com/package/@mlc-ai/web-llm)
[!["WebLLM Chat Deployed"](https://img.shields.io/badge/WebLLM_Chat-Deployed-%2332a852)](https://chat.webllm.ai/)
[![Join Discord](https://img.shields.io/badge/Join-Discord-7289DA?logo=discord&logoColor=white)](https://discord.gg/9Xpy2HGBuD)
[![Related Repository: WebLLM Chat](https://img.shields.io/badge/Related_Repo-WebLLM_Chat-fafbfc?logo=github)](https://github.com/mlc-ai/web-llm-chat/)
[![Related Repository: MLC LLM](https://img.shields.io/badge/Related_Repo-MLC_LLM-fafbfc?logo=github)](https://github.com/mlc-ai/mlc-llm/)

**High-Performance In-Browser LLM Inference Engine.**


[Documentation](https://webllm.mlc.ai/docs/) | [Blogpost](https://blog.mlc.ai/2024/06/13/webllm-a-high-performance-in-browser-llm-inference-engine) | [Paper](https://arxiv.org/abs/2412.15803) | [Examples](examples)

</div>

## Overview
WebLLM is a high-performance in-browser LLM inference engine that brings language model inference directly onto web browsers with hardware acceleration.
Everything runs inside the browser with no server support and is accelerated with WebGPU.

WebLLM is **fully compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/chat).**
That is, you can use the same OpenAI API on **any open source models** locally, with functionalities
including streaming, JSON-mode, function-calling (WIP), etc.

We can bring a lot of fun opportunities to build AI assistants for everyone and enable privacy while enjoying GPU acceleration.

You can use WebLLM as a base [npm package](https://www.npmjs.com/package/@mlc-ai/web-llm) and build your own web application on top of it by following the examples below. This project is a companion project of [MLC LLM](https://github.com/mlc-ai/mlc-llm), which enables universal deployment of LLM across hardware environments.

<div align="center">

**[Check out WebLLM Chat to try it out!](https://chat.webllm.ai/)**

</div>

## Key Features
- **In-Browser Inference**: WebLLM is a high-performance, in-browser language model inference engine that leverages WebGPU for hardware acceleration, enabling powerful LLM operations directly within web browsers without server-side processing.

- [**Full OpenAI API Compatibility**](#full-openai-compatibility): Seamlessly integrate your app with WebLLM using OpenAI API with functionalities such as streaming, JSON-mode, logit-level control, seeding, and more.

- **Structured JSON Generation**: WebLLM supports state-of-the-art JSON mode structured generation, implemented in the WebAssembly portion of the model library for optimal performance. Check [WebLLM JSON Playground](https://huggingface.co/spaces/mlc-ai/WebLLM-JSON-Playground) on HuggingFace to try generating JSON output with custom JSON schema.

- [**Extensive Model Support**](#built-in-models): WebLLM natively supports a range of models including Llama 3, Phi 3, Gemma, Mistral, Qwen(通义千问), and many others, making it versatile for various AI tasks. For the complete supported model list, check [MLC Models](https://mlc.ai/models).

- [**Custom Model Integration**](#custom-models): Easily integrate and deploy custom models in MLC format, allowing you to adapt WebLLM to specific needs and scenarios, enhancing flexibility in model deployment.

- **Plug-and-Play Integration**: Easily integrate WebLLM into your projects using package managers like NPM and Yarn, or directly via CDN, complete with comprehensive [examples](./examples/) and a modular design for connecting with UI components.

- **Streaming & Real-Time Interactions**: Supports streaming chat completions, allowing real-time output generation which enhances interactive applications like chatbots and virtual assistants.

- **Web Worker & Service Worker Support**: Optimize UI performance and manage the lifecycle of models efficiently by offloading computations to separate worker threads or service workers.

- **Chrome Extension Support**: Extend the functionality of web browsers through custom Chrome extensions using WebLLM, with examples available for building both basic and advanced extensions.

## Built-in Models

Check the complete list of available models on [MLC Models](https://mlc.ai/models). WebLLM supports a subset of these available models and the list can be accessed at [`prebuiltAppConfig.model_list`](https://github.com/mlc-ai/web-llm/blob/main/src/config.ts#L293).

Here are the primary families of models currently supported:

- **Llama**: Llama 3, Llama 2, Hermes-2-Pro-Llama-3
- **Phi**: Phi 3, Phi 2, Phi 1.5
- **Gemma**: Gemma-2B
- **Mistral**: Mistral-7B-v0.3, Hermes-2-Pro-Mistral-7B, NeuralHermes-2.5-Mistral-7B, OpenHermes-2.5-Mistral-7B
- **Qwen (通义千问)**: Qwen2 0.5B, 1.5B, 7B

If you need more models, [request a new model via opening an issue](https://github.com/mlc-ai/web-llm/issues/new/choose) or check [Custom Models](#custom-models) for how to compile and use your own models with WebLLM.

## Jumpstart with Examples

Learn how to use WebLLM to integrate large language models into your application and generate chat completions through this simple Chatbot example: 

[![Example Chatbot on JSFiddle](https://img.shields.io/badge/Example-JSFiddle-blue?logo=jsfiddle&logoColor=white)](https://jsfiddle.net/neetnestor/4nmgvsa2/)
[![Example Chatbot on Codepen](https://img.shields.io/badge/Example-Codepen-gainsboro?logo=codepen)](https://codepen.io/neetnestor/pen/vYwgZaG)

For an advanced example of a larger, more complicated project, check [WebLLM Chat](https://github.com/mlc-ai/web-llm-chat/blob/main/app/client/webllm.ts).

More examples for different use cases are available in the [examples](./examples/) folder.

## Get Started

WebLLM offers a minimalist and modular interface to access the chatbot in the browser.
The package is designed in a modular way to hook to any of the UI components.

### Installation

#### Package Manager

```sh
# npm
npm install @mlc-ai/web-llm
# yarn
yarn add @mlc-ai/web-llm
# or pnpm
pnpm install @mlc-ai/web-llm
```

Then import the module in your code.

```typescript
// Import everything
import * as webllm from "@mlc-ai/web-llm";
// Or only import what you need
import { CreateMLCEngine } from "@mlc-ai/web-llm";
```

#### CDN Delivery

Thanks to [jsdelivr.com](https://www.jsdelivr.com/package/npm/@mlc-ai/web-llm), WebLLM can be imported directly through URL and work out-of-the-box on cloud development platforms like [jsfiddle.net](https://jsfiddle.net/), [Codepen.io](https://codepen.io/), and [Scribbler](https://scribbler.live):

```javascript
import * as webllm from "https://esm.run/@mlc-ai/web-llm";
```
It can also be dynamically imported as:
```javascript
const webllm = await import ("https://esm.run/@mlc-ai/web-llm");
```

### Create MLCEngine

Most operations in WebLLM are invoked through the `MLCEngine` interface. You can create an `MLCEngine` instance and loading the model by calling the `CreateMLCEngine()` factory function.

(Note that loading models requires downloading and it can take a significant amount of time for the very first run without caching previously. You should properly handle this asynchronous call.)

```typescript
import { CreateMLCEngine } from "@mlc-ai/web-llm";

// Callback function to update model loading progress
const initProgressCallback = (initProgress) => {
  console.log(initProgress);
}
const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";

const engine = await CreateMLCEngine(
  selectedModel,
  { initProgressCallback: initProgressCallback }, // engineConfig
);
```

Under the hood, this factory function does the following steps for first creating an engine instance (synchronous) and then loading the model (asynchronous). You can also do them separately in your application.

```typescript
import { MLCEngine } from "@mlc-ai/web-llm";

// This is a synchronous call that returns immediately
const engine = new MLCEngine({
  initProgressCallback: initProgressCallback
});

// This is an asynchronous call and can take a long time to finish
await engine.reload(selectedModel);
```

### Chat Completion
After successfully initializing the engine, you can now invoke chat completions using OpenAI style chat APIs through the `engine.chat.completions` interface. For the full list of parameters and their descriptions, check [section below](#full-openai-compatibility) and [OpenAI API reference](https://platform.openai.com/docs/api-reference/chat/create).

(Note: The `model` parameter is not supported and will be ignored here. Instead, call `CreateMLCEngine(model)` or `engine.reload(model)` instead as shown in the [Create MLCEngine](#create-mlcengine) above.)


```typescript
const messages = [
  { role: "system", content: "You are a helpful AI assistant." },
  { role: "user", content: "Hello!" },
]

const reply = await engine.chat.completions.create({
  messages,
});
console.log(reply.choices[0].message);
console.log(reply.usage);
```

### Streaming

WebLLM also supports streaming chat completion generating. To use it, simply pass `stream: true` to the `engine.chat.completions.create` call.

```typescript
const messages = [
  { role: "system", content: "You are a helpful AI assistant." },
  { role: "user", content: "Hello!" },
]

// Chunks is an AsyncGenerator object
const chunks = await engine.chat.completions.create({
  messages,
  temperature: 1,
  stream: true, // <-- Enable streaming
  stream_options: { include_usage: true },
});

let reply = "";
for await (const chunk of chunks) {
  reply += chunk.choices[0]?.delta.content || "";
  console.log(reply);
  if (chunk.usage) {
    console.log(chunk.usage); // only last chunk has usage
  }
}

const fullReply = await engine.getMessage();
console.log(fullReply);
```

## Advanced Usage

### Using Workers

You can put the heavy computation in a worker script to optimize your application performance. To do so, you need to:

1. Create a handler in the worker thread that communicates with the frontend while handling the requests.
2. Create a Worker Engine in your main application, which under the hood sends messages to the handler in the worker thread.

For detailed implementations of different kinds of Workers, check the following sections.

#### Dedicated Web Worker

WebLLM comes with API support for WebWorker so you can hook
the generation process into a separate worker thread so that
the computing in the worker thread won't disrupt the UI.

We create a handler in the worker thread that communicates with the frontend while handling the requests.

```typescript
// worker.ts
import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";

// A handler that resides in the worker thread
const handler = new WebWorkerMLCEngineHandler();
self.onmessage = (msg: MessageEvent) => {
  handler.onmessage(msg);
};
```

In the main logic, we create a `WebWorkerMLCEngine` that
implements the same `MLCEngineInterface`. The rest of the logic remains the same.

```typescript
// main.ts
import { CreateWebWorkerMLCEngine } from "@mlc-ai/web-llm";

async function main() {
  // Use a WebWorkerMLCEngine instead of MLCEngine here
  const engine = await CreateWebWorkerMLCEngine(
    new Worker(
      new URL("./worker.ts", import.meta.url), 
      {
        type: "module",
      }
    ),
    selectedModel,
    { initProgressCallback }, // engineConfig
  );

  // everything else remains the same
}
```

### Use Service Worker

WebLLM comes with API support for ServiceWorker so you can hook the generation process
into a service worker to avoid reloading the model in every page visit and optimize
your application's offline experience.

(Note, Service Worker's life cycle is managed by the browser and can be killed any time without notifying the webapp. `ServiceWorkerMLCEngine` will try to keep the service worker thread alive by periodically sending heartbeat events, but your application should also include proper error handling. Check `keepAliveMs` and `missedHeatbeat` in [`ServiceWorkerMLCEngine`](https://github.com/mlc-ai/web-llm/blob/main/src/service_worker.ts#L234) for more details.)

We create a handler in the worker thread that communicates with the frontend while handling the requests.


```typescript
// sw.ts
import { ServiceWorkerMLCEngineHandler } from "@mlc-ai/web-llm";

let handler: ServiceWorkerMLCEngineHandler;

self.addEventListener("activate", function (event) {
  handler = new ServiceWorkerMLCEngineHandler();
  console.log("Service Worker is ready");
});
```

Then in the main logic, we register the service worker and create the engine using
`CreateServiceWorkerMLCEngine` function. The rest of the logic remains the same.

```typescript
// main.ts
import { MLCEngineInterface, CreateServiceWorkerMLCEngine } from "@mlc-ai/web-llm";

if ("serviceWorker" in navigator) {
  navigator.serviceWorker.register(
    new URL("sw.ts", import.meta.url),  // worker script
    { type: "module" },
  );
}

const engine: MLCEngineInterface =
  await CreateServiceWorkerMLCEngine(
    selectedModel,
    { initProgressCallback }, // engineConfig
  );
```

You can find a complete example on how to run WebLLM in service worker in [examples/service-worker](examples/service-worker/).

### Chrome Extension
You can also find examples of building Chrome extension with WebLLM in [examples/chrome-extension](examples/chrome-extension/) and [examples/chrome-extension-webgpu-service-worker](examples/chrome-extension-webgpu-service-worker/). The latter one leverages service worker, so the extension is persistent in the background. Additionally, you can explore another full project of a Chrome extension, WebLLM Assistant, which leverages WebLLM [here](https://github.com/mlc-ai/web-llm-assistant).

## Full OpenAI Compatibility
WebLLM is designed to be fully compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/chat). Thus, besides building a simple chatbot, you can also have the following functionalities with WebLLM:

- [streaming](examples/streaming): return output as chunks in real-time in the form of an AsyncGenerator
- [json-mode](examples/json-mode): efficiently ensure output is in JSON format, see [OpenAI Reference](https://platform.openai.com/docs/guides/text-generation/chat-completions-api) for more.
- [seed-to-reproduce](examples/seed-to-reproduce): use seeding to ensure a reproducible output with fields `seed`.
- [function-calling](examples/function-calling) (WIP): function calling with fields `tools` and `tool_choice` (with preliminary support); or manual function calling without `tools` or `tool_choice` (keeps the most flexibility).

## Custom Models

WebLLM works as a companion project of [MLC LLM](https://github.com/mlc-ai/mlc-llm) and it supports custom models in MLC format. 
It reuses the model artifact and builds the flow of MLC LLM. To compile and use your own models with WebLLM, please check out
[MLC LLM document](https://llm.mlc.ai/docs/deploy/webllm.html)
on how to compile and deploy new model weights and libraries to WebLLM. 

Here, we go over the high-level idea. There are two elements of the WebLLM package that enable new models and weight variants.

- `model`: Contains a URL to model artifacts, such as weights and meta-data.
- `model_lib`: A URL to the web assembly library (i.e. wasm file) that contains the executables to accelerate the model computations.

Both are customizable in the WebLLM.

```typescript
import { CreateMLCEngine } from "@mlc-ai/web-llm";

async main() {
  const appConfig = {
    "model_list": [
      {
        "model": "/url/to/my/llama",
        "model_id": "MyLlama-3b-v1-q4f32_0",
        "model_lib": "/url/to/myllama3b.wasm",
      }
    ],
  };
  // override default
  const chatOpts = {
    "repetition_penalty": 1.01
  };

  // load a prebuilt model
  // with a chat option override and app config
  // under the hood, it will load the model from myLlamaUrl
  // and cache it in the browser cache
  // The chat will also load the model library from "/url/to/myllama3b.wasm",
  // assuming that it is compatible to the model in myLlamaUrl.
  const engine = await CreateMLCEngine(
    "MyLlama-3b-v1-q4f32_0",
    { appConfig }, // engineConfig
    chatOpts,
  );
}
```

In many cases, we only want to supply the model weight variant, but
not necessarily a new model (e.g. `NeuralHermes-Mistral` can reuse `Mistral`'s
model library). For examples of how a model library can be shared by different model variants,
see `webllm.prebuiltAppConfig`.

## Build WebLLM Package From Source

NOTE: you don't need to build from source unless you would like to modify the WebLLM package.
To use the npm, simply follow [Get Started](#get-started) or any of the [examples](examples) instead.

To build from source, simply run:

```bash
npm install
npm run build
```

Then, to test the effects of your code change in an example, inside `examples/get-started/package.json`, change from `"@mlc-ai/web-llm": "^0.2.82"` to `"@mlc-ai/web-llm": ../..`.

Then run:

```bash
cd examples/get-started
npm install
npm start
```

Note that sometimes you would need to switch between `file:../..` and `../..` to trigger npm to recognize new changes. In the worst case, you can run:

```bash
cd examples/get-started
rm -rf node_modules dist package-lock.json .parcel-cache
npm install
npm start
```

### In case you need to build TVMjs from source

WebLLM's runtime largely depends on TVMjs: https://github.com/apache/tvm/tree/main/web

While it is also available as an npm package: https://www.npmjs.com/package/@mlc-ai/web-runtime, you can build it from source if needed by following the steps below.

1. Install [emscripten](https://emscripten.org). It is an LLVM-based compiler that compiles C/C++ source code to WebAssembly.
    - Follow the [installation instruction](https://emscripten.org/docs/getting_started/downloads.html#installation-instructions-using-the-emsdk-recommended) to install the latest emsdk.
    - Source `emsdk_env.sh` by `source path/to/emsdk_env.sh`, so that `emcc` is reachable from PATH and the command `emcc` works.

    We can verify the successful installation by trying out `emcc` terminal.

    Note: We recently found that using the latest `emcc` version may run into issues during runtime. Use `./emsdk install 3.1.56` instead of `./emsdk install latest` for now as a workaround. The error may look like
    ```
    Init error, LinkError: WebAssembly.instantiate(): Import #6 module="wasi_snapshot_preview1"
    function="proc_exit": function import requires a callable
    ```

2. In `./package.json`, change from `"@mlc-ai/web-runtime": "0.18.0-dev2",` to `"@mlc-ai/web-runtime": "file:./tvm_home/web",`.

3. Setup necessary environment

   Prepare all the necessary dependencies for web build:

   ```shell
   ./scripts/prep_deps.sh
   ```

   In this step, if `$TVM_SOURCE_DIR` is not defined in the environment, we will execute the following line to build `tvmjs` dependency:
   ```shell
   git clone https://github.com/mlc-ai/relax 3rdparty/tvm-unity --recursive
   ```

   This clones the current HEAD of `mlc-ai/relax`. However, it may not always be the correct branch or commit to clone. To build a specific npm version from source, refer to the version bump PR, which states which branch (i.e. `mlc-ai/relax` or `apache/tvm`) and which commit the current WebLLM version depends on. For instance, version 0.2.52, according to its version bump PR https://github.com/mlc-ai/web-llm/pull/521, is built by checking out the following commit https://github.com/apache/tvm/commit/e6476847753c80e054719ac47bc2091c888418b6 in `apache/tvm`, rather than the HEAD of `mlc-ai/relax`.

   Besides, `--recursive` is necessary and important. Otherwise, you may encounter errors like `fatal error: 'dlpack/dlpack.h' file not found`.

4. Build WebLLM Package

   ```shell
   npm run build
   ```

5. Validate some of the sub-packages

   You can then go to the subfolders in [examples](examples) to validate some of the sub-packages.
   We use Parcelv2 for bundling. Although Parcel is not very good at tracking parent directory
   changes sometimes. When you make a change in the WebLLM package, try to edit the `package.json`
   of the subfolder and save it, which will trigger Parcel to rebuild.

## Links

- [Demo App: WebLLM Chat](https://chat.webllm.ai/)
- If you want to run LLM on native runtime, check out [MLC-LLM](https://github.com/mlc-ai/mlc-llm)
- You might also be interested in [Web Stable Diffusion](https://github.com/mlc-ai/web-stable-diffusion/).

## Acknowledgement

This project is initiated by members from CMU Catalyst, UW SAMPL, SJTU, OctoML, and the MLC community. We would love to continue developing and supporting the open-source ML community.

This project is only possible thanks to the shoulders open-source ecosystems that we stand on. We want to thank the Apache TVM community and developers of the TVM Unity effort. The open-source ML community members made these models publicly available. PyTorch and Hugging Face communities make these models accessible. We would like to thank the teams behind Vicuna, SentencePiece, LLaMA, and Alpaca. We also would like to thank the WebAssembly, Emscripten, and WebGPU communities. Finally, thanks to Dawn and WebGPU developers.

## Citation
If you find this project to be useful, please cite:

```
@misc{ruan2024webllmhighperformanceinbrowserllm,
      title={WebLLM: A High-Performance In-Browser LLM Inference Engine}, 
      author={Charlie F. Ruan and Yucheng Qin and Xun Zhou and Ruihang Lai and Hongyi Jin and Yixin Dong and Bohan Hou and Meng-Shiun Yu and Yiyan Zhai and Sudeep Agarwal and Hangrui Cao and Siyuan Feng and Tianqi Chen},
      year={2024},
      eprint={2412.15803},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2412.15803}, 
}
```

## Contributors

<a href="https://github.com/mlc-ai/web-llm/graphs/contributors">
  <img alt="contributors" src="https://contrib.rocks/image?repo=mlc-ai/web-llm"/>
</a>

<p align="right">
  <a href="#top">⬆ Back to Top ⬆</a>
</p>


================================================
FILE: SECURITY.md
================================================
# Security Policy

## Reporting a Vulnerability

For security concerns or vulnerability reports, please send email to `mlc-llm-private@googlegroups.com`.


================================================
FILE: cleanup-index-js.sh
================================================
# Remove instances of string "const{createRequire:createRequire}=await import('module');"
# This is required to allow background workers packaged with Parcel for the chrome extension
# to run the `ChatModule`.
sed -e s/"const{createRequire:createRequire}=await import('module');"//g -i.backup lib/index.js
sed -e s/"const{createRequire:createRequire}=await import('module');"//g -i.backup lib/index.js.map

# Replace scriptDirectory init that Parcel cannot resolve ("new URL('./', import.meta.url)") with a plain relative string
sed -e s~"require(\\\"url\\\").fileURLToPath(new URL(\\\"\\.\\/\\\",import.meta.url))"~"\\\"./\\\""~g -i.backup lib/index.js
sed -e s~"require(\\\"url\\\").fileURLToPath(new URL(\\\"\\.\\/\\\",import.meta.url))"~'\\\".\\\"'~g -i.backup lib/index.js.map

# Replace string "new (require('u' + 'rl').URL)('file:' + __filename).href" with "MLC_DUMMY_PATH"
# This is required for building nextJS projects -- its compile time would complain about `require()`
# See https://github.com/mlc-ai/web-llm/issues/383 and the fixing PR's description for more.
sed -e s/"new (require('u' + 'rl').URL)('file:' + __filename).href"/"\"MLC_DUMMY_PATH\""/g -i.backup lib/index.js
# Replace with \"MLC_DUMMY_PATH\"
sed -e s/"new (require('u' + 'rl').URL)('file:' + __filename).href"/'\\\"MLC_DUMMY_PATH\\\"'/g -i.backup lib/index.js.map

# Replace "import require$$3 from 'perf_hooks';" with a string "const require$$3 = "MLC_DUMMY_REQUIRE_VAR""
# This is to prevent `perf_hooks` not found error
# For more see https://github.com/mlc-ai/web-llm/issues/258 and https://github.com/mlc-ai/web-llm/issues/127
sed -e s/"import require\$\$3 from 'perf_hooks';"/"const require\$\$3 = \"MLC_DUMMY_REQUIRE_VAR\""/g -i.backup lib/index.js
# Similarly replace `const performanceNode = require(\"perf_hooks\")` with `const performanceNode = \"MLC_DUMMY_REQUIRE_VAR\"`
sed -e s/'require(\\\"perf_hooks\\\")'/'\\\"MLC_DUMMY_REQUIRE_VAR\\\"'/g -i.backup lib/index.js.map

# Below is added when we include dependency @mlc-ai/web-runtime, rather than using local tvm_home
# Replace "import require$$4 from 'ws'" with a string "const require$$3 = "MLC_DUMMY_REQUIRE_VAR""
# This is to prevent error `Cannot find module 'ws'`
sed -e s/"import require\$\$4 from 'ws';"/"const require\$\$4 = \"MLC_DUMMY_REQUIRE_VAR\""/g -i.backup lib/index.js
# Similarly replace `const WebSocket = require(\"ws\")` with `const WebSocket = \"MLC_DUMMY_REQUIRE_VAR\"`
sed -e s/'require(\\\"ws\\\")'/'\\\"MLC_DUMMY_REQUIRE_VAR\\\"'/g -i.backup lib/index.js.map

# Cleanup backup files
rm -f lib/index.js.backup
rm -f lib/index.js.map.backup


================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS    ?=
SPHINXBUILD   ?= python -m sphinx
SOURCEDIR     = .
BUILDDIR      = _build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: docs/README.md
================================================
# WebLLM Documentation

The documentation was built upon [Sphinx](https://www.sphinx-doc.org/en/master/).

## Dependencies

Run the following command in this directory to install dependencies first:

```bash
pip3 install -r requirements.txt
```

## Build the Documentation

Then you can build the documentation by running:

```bash
make html
```

## View the Documentation

Run the following command to start a simple HTTP server:

```bash
cd _build/html
python3 -m http.server
```

Then you can view the documentation in your browser at `http://localhost:8000` (the port can be customized by appending ` -p PORT_NUMBER` in the python command above).


================================================
FILE: docs/conf.py
================================================
# -*- coding: utf-8 -*-
import os
import sys

import tlcpack_sphinx_addon

# -- General configuration ------------------------------------------------

sys.path.insert(0, os.path.abspath("../python"))
sys.path.insert(0, os.path.abspath("../"))
autodoc_mock_imports = ["torch"]

# General information about the project.
project = "web-llm"
author = "WebLLM Contributors"
copyright = "2023, %s" % author

# Version information.

version = "0.2.82"
release = "0.2.82"

extensions = [
    "sphinx_tabs.tabs",
    "sphinx_toolbox.collapse",
    "sphinxcontrib.httpdomain",
    "sphinx.ext.autodoc",
    "sphinx.ext.napoleon",
    "sphinx_reredirects",
]

redirects = {"get_started/try_out": "../index.html#getting-started"}

source_suffix = [".rst"]

language = "en"

exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"

# A list of ignored prefixes for module index sorting.
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False

# -- Options for HTML output ----------------------------------------------

# The theme is set by the make target
import sphinx_rtd_theme

html_theme = "sphinx_rtd_theme"
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]

templates_path = []

html_static_path = []

footer_copyright = "© 2023 MLC LLM"
footer_note = " "

html_logo = "_static/img/mlc-logo-with-text-landscape.svg"

html_theme_options = {
    "logo_only": True,
}

header_links = [
    ("Home", "https://webllm.mlc.ai/"),
    ("GitHub", "https://github.com/mlc-ai/web-llm"),
    ("Discord", "https://discord.gg/9Xpy2HGBuD"),
]

header_dropdown = {
    "name": "Other Resources",
    "items": [
        ("WebLLM Chat", "https://chat.webllm.ai/"),
        ("MLC Course", "https://mlc.ai/"),
        ("MLC Blog", "https://blog.mlc.ai/"),
        ("MLC LLM", "https://llm.mlc.ai/"),
    ],
}

html_context = {
    "footer_copyright": footer_copyright,
    "footer_note": footer_note,
    "header_links": header_links,
    "header_dropdown": header_dropdown,
    "display_github": True,
    "github_user": "mlc-ai",
    "github_repo": "web-llm",
    "github_version": "main/docs/",
    "theme_vcs_pageview_mode": "edit",
    # "header_logo": "/path/to/logo",
    # "header_logo_link": "",
    # "version_selecter": "",
}


# add additional overrides
templates_path += [tlcpack_sphinx_addon.get_templates_path()]
html_static_path += [tlcpack_sphinx_addon.get_static_path()]


================================================
FILE: docs/developer/add_models.rst
================================================
Adding Models
=============

WebLLM allows you to compile custom language models using `MLC-LLM <https://llm.mlc.ai/>`_ and then serve the compiled model through WebLLM.

For instructions on how to compile and add custom models to WebLLM, please refer to the `MLC-LLM documentation <https://llm.mlc.ai/docs/deploy/webllm.html>`_. 

================================================
FILE: docs/developer/building_from_source.rst
================================================
Building From Source
====================

Clone the Repository
---------------------
.. code-block:: bash

   git clone https://github.com/mlc-ai/web-llm.git
   cd web-llm

Install Dependencies
---------------------
.. code-block:: bash

   npm install

Build the Project
-----------------
.. code-block:: bash

   npm run build

Test Changes
------------

To test your changes, you can reuse an existing example or create a new example that specifically tests the new functionality you wish to provide.

To test the effects of your code change in an example, inside ``examples/<example>/package.json``, change ``"@mlc-ai/web-llm": "^0.2.xx"`` to ``"@mlc-ai/web-llm": "../.."`` to let it reference your local code. Note that sometimes you may need to switch between ``"file:../.."`` and ``"../.."`` to trigger npm to recognize new changes.

.. code-block:: bash

   cd examples/<example>
   # Modify package.json as described
   npm install
   npm start


================================================
FILE: docs/index.rst
================================================
👋 Welcome to WebLLM
====================

`GitHub <https://github.com/mlc-ai/web-llm>`_ | `WebLLM Chat <https://chat.webllm.ai/>`_ | `NPM <https://www.npmjs.com/package/@mlc-ai/web-llm>`_ | `Discord <https://discord.gg/9Xpy2HGBuD>`_

WebLLM is a high-performance in-browser language model inference engine that brings large language models (LLMs) to web browsers with hardware acceleration. With WebGPU support, it allows developers to build AI-powered applications directly within the browser environment, removing the need for server-side processing and ensuring privacy.

It provides a specialized runtime for the web backend of MLCEngine, leverages
`WebGPU <https://www.w3.org/TR/webgpu/>`_ for local acceleration, offers OpenAI-compatible API,
and provides built-in support for web workers to separate heavy computation from the UI flow.

Key Features
------------
- 🌐 In-Browser Inference: Run LLMs directly in the browser
- 🚀 WebGPU Acceleration: Leverage hardware acceleration for optimal performance
- 🔄 OpenAI API Compatibility: Seamless integration with standard AI workflows
- 📦 Multiple Model Support: Works with Llama, Phi, Gemma, Mistral, and more

Start exploring WebLLM by `chatting with WebLLM Chat <https://chat.webllm.ai/>`_, and start building webapps with high-performance local LLM inference with the following guides and tutorials.

.. toctree::
   :maxdepth: 2
   :caption: User Guide

   user/get_started.rst
   user/basic_usage.rst
   user/advanced_usage.rst
   user/api_reference.rst

.. toctree::
   :maxdepth: 2
   :caption: Developer Guide

   developer/building_from_source.rst
   developer/add_models.rst


================================================
FILE: docs/make.bat
================================================
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
	set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
	echo.
	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
	echo.installed, then set the SPHINXBUILD environment variable to point
	echo.to the full path of the 'sphinx-build' executable. Alternatively you
	echo.may add the Sphinx directory to PATH.
	echo.
	echo.If you don't have Sphinx installed, grab it from
	echo.https://www.sphinx-doc.org/
	exit /b 1
)

if "%1" == "" goto help

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%

:end
popd


================================================
FILE: docs/requirements.txt
================================================
sphinx-tabs == 3.4.1
sphinx-rtd-theme
sphinx == 5.2.3
sphinx-toolbox == 3.4.0
tlcpack-sphinx-addon==0.2.2
sphinxcontrib_httpdomain==1.8.1
sphinxcontrib-napoleon==0.7
sphinx-reredirects==0.1.2


================================================
FILE: docs/user/advanced_usage.rst
================================================
Advanced Use Cases
==================

Using Workers
-------------

You can put the heavy computation in a worker script to optimize your application performance. To do so, you need to:

Create a handler in the worker thread that communicates with the frontend while handling the requests.
Create a worker engine in your main application that sends messages to the handler in the worker thread under the hood.
For detailed implementations of different kinds of workers, look at the following sections.

Using Web Workers
^^^^^^^^^^^^^^^^^
WebLLM comes with API support for `Web Workers <https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers>`_ so you can offload the computation-heavy generation work into a separate worker thread. WebLLM has implemented cross-thread communication through messages under the hood, so manual implementation is not required.

In the worker script, import and instantiate a ``WebWorkerMLCEngineHandler``, which handles communication with other scripts and processes incoming requests.

.. code-block:: typescript

   // worker.ts
   import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";

   const handler = new WebWorkerMLCEngineHandler();
   self.onmessage = (msg: MessageEvent) => {
       handler.onmessage(msg);
   };

In the main script, import and instantiate a ``WebWorkerMLCEngine`` that implements the same ``MLCEngineInterface`` and exposes the same APIs. Then, simply use it as you would a normal ``MLCEngine``.

.. code-block:: typescript

   import { CreateWebWorkerMLCEngine } from "@mlc-ai/web-llm";

   async function runWorker() {
       const engine = await CreateWebWorkerMLCEngine(
           new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
           "Llama-3.1-8B-Instruct"
       );

       const messages = [{ role: "user", content: "How does WebLLM use workers?" }];
       const reply = await engine.chat.completions.create({ messages });
       console.log(reply.choices[0].message.content);
   }

   runWorker();


Under the hood, ``WebWorkerMLCEngine`` does **not** perform any computation. It translates all calls into messages and sends them to the ``WebWorkerMLCEngineHandler`` for processing. The worker thread receives these messages and processes the actual computation using a hidden engine, and returns the result to the main thread using messages.

Service Workers
^^^^^^^^^^^^^^^
WebLLM also supports offloading computation using `Service Workers <https://developer.mozilla.org/en-US/docs/Web/API/Service_Worker_API>`_. This allows you to avoid reloading the model between page refreshes and optimize your application's offline experience.

(Note, the lifecycle of a Service Worker is managed by the browser and can be killed any time without notifying the web application. WebLLM's ``ServiceWorkerMLCEngine`` attempts to keep the service worker thread alive by periodically sending heartbeat events. However, the script could still be killed at any time by Chrome, and your application should include proper error handling. Check `keepAliveMs` and `missedHeartbeat` in `ServiceWorkerMLCEngine <https://github.com/mlc-ai/web-llm/blob/main/src/service_worker.ts#L218>`_ for more details.)

In the worker script, import and instantiate ``ServiceWorkerMLCEngineHandler``, which handles communication with page scripts and processes incoming requests.

.. code-block:: typescript

   // sw.ts
   import { ServiceWorkerMLCEngineHandler } from "@mlc-ai/web-llm";

   self.addEventListener("activate", () => {
       const handler = new ServiceWorkerMLCEngineHandler();
       console.log("Service Worker activated!");
   });


Then, in the main page script, register the service worker and instantiate the engine using the ``CreateServiceWorkerMLCEngine`` factory function that implements the same ``MLCEngineInterface`` and exposes the same APIs. Then, simply use it as you would a normal ``MLCEngine``.

.. code-block:: typescript

    // main.ts
    import { MLCEngineInterface, CreateServiceWorkerMLCEngine } from "@mlc-ai/web-llm";

    if ("serviceWorker" in navigator) {
    navigator.serviceWorker.register(
        new URL("sw.ts", import.meta.url),  // worker script
        { type: "module" },
    );
    }

    const engine: MLCEngineInterface =
    await CreateServiceWorkerMLCEngine(
        selectedModel,
        { initProgressCallback }, // engineConfig
    );

Similar to the ``WebWorkerMLCEngine`` above, the ``ServiceWorkerMLCEngine`` is also a proxy and does not perform any actual computation. Instead, it forwards all calls to the service worker thread and receives the result through messages.

Chrome Extension
----------------

WebLLM can be used in Chrome extensions to empower local LLM inference. You can find examples of building Chrome extension using WebLLM in `examples/chrome-extension <https://github.com/mlc-ai/web-llm/blob/main/examples/chrome-extension>`_ and `examples/chrome-extension-webgpu-service-worker <https://github.com/mlc-ai/web-llm/blob/main/examples/chrome-extension-webgpu-service-worker>`_. The latter leverages Service Worker, so the extension is persistent in the background.

Additionally, we have a full Chrome extension project, `WebLLM Assistant <https://github.com/mlc-ai/web-llm-assistant>`_, which leverages WebLLM to provide a personal web browsing copilot assistant experience. Feel free to check it out and contribute if you are interested.


Additional Customization
------------------------

Using IndexedDB Cache
^^^^^^^^^^^^^^^^^^^^^

By default, WebLLM caches model artifacts using the `Cache API <https://developer.mozilla.org/en-US/docs/Web/API/Cache>`_ for faster subsequent model loads. You can alternatively use `IndexedDB caching <https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API>`_ by setting the `useIndexedDBCache` field in `appConfig` of `MLCEngineConfig` to `true`.

.. code-block:: typescript

   const engine = await CreateMLCEngine("Llama-3.1-8B-Instruct", {
       appConfig: {
           useIndexedDBCache: true,
           models: [
               { model_id: "Llama-3.1-8B", model_path: "/models/llama3" },
           ],
       },
   });

Customizing Token Behavior
^^^^^^^^^^^^^^^^^^^^^^^^^^

You can modify `logit_bias` in `GenerationConfig` to control token likelihood. Setting a token's bias to a positive value increases its likelihood of being generated, while a negative value decreases it. A large negative value (e.g., -100) can effectively prevent the token from being generated.

.. code-block:: typescript

   const messages = [
       { role: "user", content: "Describe WebLLM in detail." },
   ];

   const response = await engine.chatCompletion({
       messages,
       logit_bias: { "50256": -100 }, // Example: Prevent specific token generation
   });


================================================
FILE: docs/user/api_reference.rst
================================================
.. _api-reference:

WebLLM API Reference
====================

The ``MLCEngine`` class is the core interface of WebLLM. It enables model loading, chat completions, embeddings, and other operations. Below, we document its methods, along with the associated configuration interfaces.

Interfaces
----------

The following interfaces are used as parameters or configurations within ``MLCEngine`` methods. They are linked to their respective methods for reference.

MLCEngineConfig
^^^^^^^^^^^^^^^

Optional configurations for ``CreateMLCEngine()`` and ``CreateWebWorkerMLCEngine()``.


- **Fields**:
    - ``appConfig``: Configure the app, including the list of models and whether to use IndexedDB cache.
    - ``initProgressCallback``: A callback for showing model loading progress.
    - ``logitProcessorRegistry``: A registry for stateful logit processors (see ``webllm.LogitProcessor``).


- **Usage**:
    - ``appConfig``: Contains application-specific settings, including:
        - Model configurations.
        - IndexedDB caching preferences.
    - ``initProgressCallback``: Allows developers to visualize model loading progress by implementing a callback.
    - ``logitProcessorRegistry``: A ``Map`` object for registering custom logit processors. Only applies to ``MLCEngine``.


.. note:: All fields are optional, and ``logitProcessorRegistry`` is only used in ``MLCEngine``.


Example:

.. code-block:: typescript

   const engine = await CreateMLCEngine("Llama-3.1-8B-Instruct", {
       appConfig: { /* app-specific config */ },
       initProgressCallback: (progress) => console.log(progress),
   });


GenerationConfig
^^^^^^^^^^^^^^^^

Configurations for a single generation task, primarily used in chat completions.

- **Fields**:
    - ``repetition_penalty``, ``ignore_eos``: Parameters specific to MLC models.
    - ``top_p``, ``temperature``, ``max_tokens``, ``stop``: Common parameters shared with OpenAI APIs.
    - ``frequency_penalty``, ``presence_penalty``: Tune repetition behavior following OpenAI semantics.
    - ``logit_bias``, ``n``, ``logprobs``, ``top_logprobs``: Advanced sampling controls.
    - ``response_format``, ``enable_thinking``, ``enable_latency_breakdown``: Additional OpenAI-style request features.

- **Usage**:
    - Fields like ``repetition_penalty`` and ``ignore_eos`` give explicit control over repetition handling and whether the model stops at the EOS token, respectively.
    - Common parameters shared with OpenAI APIs (e.g., ``temperature``, ``top_p``) ensure compatibility while still falling back to the values configured during ``MLCEngine.reload()`` when omitted.
    - ``frequency_penalty`` and ``presence_penalty`` mirror OpenAI's bounds ``[-2, 2]``; providing only one will default the other to ``0``.
    - ``response_format`` (for JSON or other schema outputs), ``enable_thinking``, and ``enable_latency_breakdown`` pass through directly to the engine and surface enhanced telemetry or structured responses when the underlying model supports them.


Example:

.. code-block:: typescript

   const messages = [
       { role: "system", content: "You are a helpful assistant." },
       { role: "user", content: "Explain WebLLM." },
   ];

   const response = await engine.chatCompletion({
       messages,
       top_p: 0.9,
       temperature: 0.8,
       max_tokens: 150,
   });

ChatConfig
^^^^^^^^^^

Model's baseline configuration loaded from ``mlc-chat-config.json`` when ``MLCEngine.reload()`` runs. ``ChatOptions`` (and therefore the ``chatOpts`` argument to ``reload``) can override any subset of these fields.

- **Fields** (subset):
    - ``tokenizer_files``, ``tokenizer_info``: Files and parameters required to initialize the tokenizer.
    - ``conv_template``, ``conv_config``: Conversation templates that define prompts, separators, and role formatting.
    - ``context_window_size``, ``sliding_window_size``, ``attention_sink_size``: KV-cache and memory settings.
    - Default generation knobs such as ``repetition_penalty``, ``frequency_penalty``, ``presence_penalty``, ``top_p``, and ``temperature``.

- **Usage**:
    - Loaded automatically for each model; provides defaults that ``GenerationConfig`` falls back to when fields are omitted.
    - Override selected values per model load by supplying ``chatOpts`` (``Partial<ChatConfig>``) to ``MLCEngine.reload()``.


Example:

.. code-block:: typescript

   await engine.reload("Llama-3.1-8B-Instruct", {
       temperature: 0.7,
       repetition_penalty: 1.1,
       context_window_size: 4096,
   });

ChatCompletionRequest
^^^^^^^^^^^^^^^^^^^^^

Defines the structure for chat completion requests.

- **Base Interface**: ``ChatCompletionRequestBase``
    - Contains parameters such as ``messages``, ``stream``, ``frequency_penalty``, and ``presence_penalty``.
- **Sub-interfaces**:
    - ``ChatCompletionRequestNonStreaming``: For non-streaming completions.
    - ``ChatCompletionRequestStreaming``: For streaming completions.

- **Usage**:
    - Combines settings from ``GenerationConfig`` and ``ChatCompletionRequestBase`` to provide complete control over chat behavior.
    - The ``stream`` parameter enables streaming responses, improving interactivity in conversational agents.
    - The ``logit_bias`` feature allows controlling token generation probabilities, providing a mechanism to restrict or encourage specific outputs.


Example:

.. code-block:: typescript

   const response = await engine.chatCompletion({
       messages: [
           { role: "user", content: "Tell me about WebLLM." },
       ],
       stream: true,
   });

Model Loading
-------------

``MLCEngine.reload(modelId: string | string[], chatOpts?: ChatOptions | ChatOptions[]): Promise<void>``

Loads the specified model(s) into the engine. Uses ``MLCEngineConfig`` during initialization.

- Parameters:
    - ``modelId``: Identifier(s) for the model(s) to load.
    - ``chatOpts``: Configuration for generation (see ``ChatConfig``).

Example:

.. code-block:: typescript

   await engine.reload(["Llama-3.1-8B", "Gemma-2B"], [
       { temperature: 0.7 },
       { top_p: 0.9 },
   ]);

``MLCEngine.unload(): Promise<void>``

Unloads all loaded models and clears their associated configurations.

Example:

.. code-block:: typescript

   await engine.unload();

---

Chat Completions
----------------

``MLCEngine.chat.completions.create(request: ChatCompletionRequest): Promise<ChatCompletion | AsyncIterable<ChatCompletionChunk>>``

Generates chat-based completions using a specified request configuration.

- Parameters:
  - ``request``: A ``ChatCompletionRequest`` instance.

Example:

.. code-block:: typescript

   const response = await engine.chat.completions.create({
       messages: [
           { role: "system", content: "You are a helpful AI assistant." },
           { role: "user", content: "What is WebLLM?" },
       ],
       temperature: 0.8,
       stream: false,
   });

---

Utility Methods
^^^^^^^^^^^^^^^

``MLCEngine.getMessage(modelId?: string): Promise<string>``

Retrieves the current output message from the specified model.

- Parameters:
    - ``modelId``: (Optional) Identifier of model to query. Omitting modelId only works when the engine currently has a single model loaded.

``MLCEngine.resetChat(keepStats?: boolean, modelId?: string): Promise<void>``

Resets the chat history and optionally retains usage statistics.

- Parameters:
    - ``keepStats``: (Optional) If true, retains usage statistics.
    - ``modelId``: (Optional) Identifier of the model to reset. Omitting modelId only works when the engine currently has a single model loaded.

GPU Information
----------------

The following methods provide detailed information about the GPU used for WebLLM computations.

``MLCEngine.getGPUVendor(): Promise<string>``

Retrieves the vendor name of the GPU used for computations. This is useful for understanding hardware capabilities during inference.

- **Returns**: A string indicating the GPU vendor (e.g., "Intel", "NVIDIA").

Example:

.. code-block:: typescript

   const gpuVendor = await engine.getGPUVendor();
   console.log(``GPU Vendor: ${gpuVendor}``);

``MLCEngine.getMaxStorageBufferBindingSize(): Promise<number>``

Returns the maximum storage buffer size supported by the GPU. This is important when working with larger models that require significant memory for processing.

- **Returns**: A number representing the maximum size in bytes.

Example:

.. code-block:: typescript

   const maxBufferSize = await engine.getMaxStorageBufferBindingSize();
   console.log(``Max Storage Buffer Binding Size: ${maxBufferSize}``);


================================================
FILE: docs/user/basic_usage.rst
================================================
Basic Usage
================

Model Records in WebLLM
-----------------------

Each of the model available WebLLM is registered as an instance of
``ModelRecord`` and can be accessed at
`webllm.prebuiltAppConfig.model_list <https://github.com/mlc-ai/web-llm/blob/main/src/config.ts#L313>`__.

Creating an MLCEngine
---------------------

WebLLM APIs are exposed through the ``MLCEngine`` interface. You can create an ``MLCEngine`` instance and load the model by calling the CreateMLCEngine() factory function.

(Note that loading models requires downloading and it can take a significant amount of time for the very first run without previous caching. You should properly handle this asynchronous call.)

``MLCEngine`` can be instantiated in two ways:
1. Using the factory function ``CreateMLCEngine``.
2. Instantiating the ``MLCEngine`` class directly and using ``reload()`` to load models.

.. code-block:: typescript

   import { CreateMLCEngine, MLCEngine } from "@mlc-ai/web-llm";

    // Initialize with a progress callback
    const initProgressCallback = (progress) => {
        console.log("Model loading progress:", progress);
    };

   // Using CreateMLCEngine
   const engine = await CreateMLCEngine("Llama-3.1-8B-Instruct", { initProgressCallback });

   // Direct instantiation
   const engineInstance = new MLCEngine({ initProgressCallback });
   await engineInstance.reload("Llama-3.1-8B-Instruct");

Under the hood, this factory function ``CreateMLCEngine`` does the following steps for first creating an engine instance (synchronous) and then loading the model (asynchronous). You can also do them separately in your application.

.. code-block:: typescript

    import { MLCEngine } from "@mlc-ai/web-llm";

    // This is a synchronous call that returns immediately
    const engine = new MLCEngine({
        initProgressCallback: initProgressCallback
    });

    // This is an asynchronous call and can take a long time to finish
    await engine.reload(selectedModel);


Chat Completion
---------------

Chat completions can be invoked using OpenAI style chat APIs through the ``engine.chat.completions`` interface of an initialized ``MLCEngine``. For the full list of parameters and their descriptions, check :ref:`api-reference` for full list of parameters.

(Note: Since the model is determined during ``MLCEngine`` instantiation, the ``model`` parameter is not supported and will be **ignored**. Instead, call ``CreateMLCEngine(model)`` or ``engine.reload(model)`` to reinitialize the engine to use a specific model.)

.. code-block:: typescript

    const messages = [
        { role: "system", content: "You are a helpful AI assistant." },
        { role: "user", content: "Hello!" }
    ];

    const reply = await engine.chat.completions.create({
        messages,
    });

    console.log(reply.choices[0].message);
    console.log(reply.usage);


Streaming Chat Completion
-------------------------

Streaming chat completion could be enabled by passsing ``stream: true`` parameter to the `engine.chat.completions.create` call configuration. Check :ref:`api-reference` for full list of parameters.

.. code-block:: typescript

    const messages = [
        { role: "system", content: "You are a helpful AI assistant." },
        { role: "user", content: "Hello!" },
    ]

    // chunks is an AsyncGenerator object
    const chunks = await engine.chat.completions.create({
        messages,
        temperature: 1,
        stream: true, // <-- Enable streaming
        stream_options: { include_usage: true },
    });

    let reply = "";
    for await (const chunk of chunks) {
        reply += chunk.choices[0]?.delta.content || "";
        console.log(reply);
        if (chunk.usage) {
            console.log(chunk.usage); // only last chunk has usage
        }
    }

    const fullReply = await engine.getMessage();
    console.log(fullReply);


Chatbot Examples
----------------

Learn how to use WebLLM to integrate large language models into your applications and generate chat completions through this simple Chatbot example:

- `Example in JSFiddle <https://jsfiddle.net/neetnestor/4nmgvsa2/>`_
- `Example in CodePen <https://codepen.io/neetnestor/pen/vYwgZaG>`_

For an advanced example of a larger, more complicated project, look at `WebLLM Chat <https://github.com/mlc-ai/web-llm-chat/blob/main/app/client/webllm.ts>`_.

More examples for different use cases are available in the `WebLLM examples folder <https://github.com/mlc-ai/web-llm/tree/main/examples>`_.




================================================
FILE: docs/user/get_started.rst
================================================
Getting Started with WebLLM
===========================

This guide will help you set up WebLLM in your project, install necessary dependencies, and verify your setup.


WebLLM Chat
-----------

If you want to experience AI Chat supported by local LLM inference and understand how WebLLM works, try out `WebLLM Chat <https://chat.webllm.ai/>`__, which provides a great example
of integrating WebLLM into a full web application.

A WebGPU-compatible browser is needed to run WebLLM-powered web applications.
You can download the latest Google Chrome and use `WebGPU Report <https://webgpureport.org/>`__
to verify the functionality of WebGPU on your browser.

Installation
------------

WebLLM offers a minimalist and modular interface to access the chatbot in the browser. The package is designed in a modular way to hook to any of the UI components.

WebLLM is available as an `npm package <https://www.npmjs.com/package/@mlc-ai/web-llm>`_ and is also CDN-delivered. Therefore, you can install WebLLM using Node.js package managers like npm, yarn, or pnpm, or directly import the pacakge via CDN.

Using Package Managers
^^^^^^^^^^^^^^^^^^^^^^
Install WebLLM via your preferred package manager:

.. code-block:: bash

   # npm
   npm install @mlc-ai/web-llm
   # yarn
   yarn add @mlc-ai/web-llm
   # pnpm
   pnpm install @mlc-ai/web-llm

Import WebLLM into your project:

.. code-block:: javascript

   // Import everything
   import * as webllm from "@mlc-ai/web-llm";

   // Or only import what you need
   import { CreateMLCEngine } from "@mlc-ai/web-llm";

Using CDN
^^^^^^^^^
Thanks to `jsdelivr.com <https://www.jsdelivr.com/package/npm/@mlc-ai/web-llm>`_, WebLLM can be imported directly through URL and work out-of-the-box on cloud development platforms like `jsfiddle.net <https://jsfiddle.net/>`_, `Codepen.io <https://codepen.io/>`_, and `Scribbler <https://scribbler.live/>`_:

.. code-block:: javascript

   import * as webllm from "https://esm.run/@mlc-ai/web-llm";

This method is especially useful for online environments like CodePen, JSFiddle, or local experiments.

Verifying Installation
^^^^^^^^^^^^^^^^^^^^^^
Run the following script to verify the installation:

.. code-block:: javascript

   import { CreateMLCEngine } from "@mlc-ai/web-llm";
   console.log("WebLLM loaded successfully!");


Online IDE Sandbox
------------------

Instead of setting WebLLM locally, you can also try it on online Javascript IDE sandboxes like:

- `Example in JSFiddle <https://jsfiddle.net/neetnestor/4nmgvsa2/>`_
- `Example in CodePen <https://codepen.io/neetnestor/pen/vYwgZaG>`_




================================================
FILE: eslint.config.cjs
================================================
const {
    defineConfig,
    globalIgnores,
} = require("eslint/config");

const tsParser = require("@typescript-eslint/parser");
const typescriptEslint = require("@typescript-eslint/eslint-plugin");
const js = require("@eslint/js");

const {
    FlatCompat,
} = require("@eslint/eslintrc");

const compat = new FlatCompat({
    baseDirectory: __dirname,
    recommendedConfig: js.configs.recommended,
    allConfig: js.configs.all
});

module.exports = defineConfig([{
    extends: compat.extends(
        "eslint:recommended",
        "plugin:@typescript-eslint/recommended",
        "plugin:prettier/recommended",
    ),

    languageOptions: {
        parser: tsParser,
    },

    plugins: {
        "@typescript-eslint": typescriptEslint,
    },

    rules: {
        "@typescript-eslint/no-explicit-any": "off",
        "@typescript-eslint/no-empty-function": "off",
        "@typescript-eslint/no-non-null-assertion": "off",
    },
}, {
    files: ["examples/**/*.js", "examples/**/*.ts"],

    "rules": {
        "no-undef": "off",
        "@typescript-eslint/no-unused-vars": "off",
    },
}, globalIgnores([
    "**/dist",
    "**/debug",
    "**/lib",
    "**/build",
    "**/node_modules",
    "**/3rdparty",
    "**/.eslintrc.cjs",
    "**/.next",
])]);


================================================
FILE: examples/.gitignore
================================================
package-lock.json


================================================
FILE: examples/README.md
================================================
# Awesome WebLLM

This page contains a curated list of examples, tutorials, blogs about WebLLM usecases.
Please send a pull request if you find things that belong here.

## Example Projects

Note that all examples below run in-browser and use WebGPU as a backend.

#### Project List

- [get-started](get-started): minimum get started example with chat completion.

  [![Open in JSFiddle](https://img.shields.io/badge/open-JSFiddle-blue?logo=jsfiddle&logoColor=white)](https://jsfiddle.net/neetnestor/yac9gbwf/)
  [![Open in Codepen](https://img.shields.io/badge/open-codepen-gainsboro?logo=codepen)](https://codepen.io/neetnestor/pen/NWVdgey)

- [simple-chat-js](simple-chat-js): a mininum and complete chat bot app in vanilla JavaScript.

  [![Open in JSFiddle](https://img.shields.io/badge/open-JSFiddle-blue?logo=jsfiddle&logoColor=white)](https://jsfiddle.net/neetnestor/4nmgvsa2/)
  [![Open in Codepen](https://img.shields.io/badge/open-codepen-gainsboro?logo=codepen)](https://codepen.io/neetnestor/pen/vYwgZaG)

- [simple-chat-ts](simple-chat-ts): a mininum and complete chat bot app in TypeScript.
- [get-started-web-worker](get-started-web-worker): same as get-started, but using web worker.
- [next-simple-chat](next-simple-chat): a mininum and complete chat bot app with [Next.js](https://nextjs.org/).
- [multi-round-chat](multi-round-chat): while APIs are functional, we internally optimize so that multi round chat usage can reuse KV cache
- [text-completion](text-completion): demonstrates API `engine.completions.create()`, which is pure text completion with no conversation, as opposed to `engine.chat.completions.create()`
- [embeddings](embeddings): demonstrates API `engine.embeddings.create()`, integration with `EmbeddingsInterface` and `MemoryVectorStore` of [Langchain.js](https://js.langchain.com), and RAG with Langchain.js using WebLLM for both LLM and Embedding in a single engine
- [multi-models](multi-models): demonstrates loading multiple models in a single engine concurrently

#### Advanced OpenAI API Capabilities

These examples demonstrate various capabilities via WebLLM's OpenAI-like API.

- [streaming](streaming): return output as chunks in real-time in the form of an AsyncGenerator
- [json-mode](json-mode): efficiently ensure output is in json format, see [OpenAI Reference](https://platform.openai.com/docs/guides/text-generation/chat-completions-api) for more.
- [json-schema](json-schema): besides guaranteeing output to be in JSON, ensure output to adhere to a specific JSON schema specified the user
- [seed-to-reproduce](seed-to-reproduce): use seeding to ensure reproducible output with fields `seed`.
- [function-calling](function-calling) (WIP): function calling with fields `tools` and `tool_choice` (with preliminary support).
- [vision-model](vision-model): process request with image input using Vision Language Model (e.g. Phi3.5-vision)

#### Chrome Extension

- [chrome-extension](chrome-extension): chrome extension that does not have a persistent background
- [chrome-extension-webgpu-service-worker](chrome-extension-webgpu-service-worker): chrome extension using service worker, hence having a persistent background

#### Others

- [logit-processor](logit-processor): while `logit_bias` is supported, we additionally support stateful logit processing where users can specify their own rules. We also expose low-level API `forwardTokensAndSample()`.
- [cache-usage](cache-usage): demonstrates how WebLLM supports both the [Cache API](https://developer.mozilla.org/en-US/docs/Web/API/Cache) and [IndexedDB cache](https://developer.mozilla.org/en-US/docs/Web/API/IndexedDB_API), and
  users can pick with `appConfig.useIndexedDBCache`. Also demonstrates various cache utils such as checking
  whether a model is cached, deleting a model's weights from cache, deleting a model library wasm from cache, etc.
- [simple-chat-upload](simple-chat-upload): demonstrates how to upload local models to WebLLM instead of downloading via a URL link

## Demo Spaces

- [web-llm-embed](https://huggingface.co/spaces/matthoffner/web-llm-embed): document chat prototype using react-llm with transformers.js embeddings
- [DeVinci](https://x6occ-biaaa-aaaai-acqzq-cai.icp0.io/): AI chat app based on WebLLM and hosted on decentralized cloud platform


================================================
FILE: examples/abort-reload/README.md
================================================
# WebLLM Get Started App

This folder provides a demo for cancelling model fetching after calling `engine.reload()`.

```bash
npm install
npm start
```

Note if you would like to hack WebLLM core package.
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
instruction in the project to build webllm locally. This option is only recommended
if you would like to hack WebLLM core package.


================================================
FILE: examples/abort-reload/package.json
================================================
{
  "name": "get-started",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "start": "parcel src/get_started.html  --port 8887",
    "build": "parcel build src/get_started.html --dist-dir lib"
  },
  "devDependencies": {
    "buffer": "^5.7.1",
    "parcel": "^2.8.3",
    "process": "^0.11.10",
    "tslib": "^2.3.1",
    "typescript": "^4.9.5",
    "url": "^0.11.3"
  },
  "dependencies": {
    "@mlc-ai/web-llm": "^0.2.82"
  }
}


================================================
FILE: examples/abort-reload/src/get_started.html
================================================
<!doctype html>
<html>
  <script>
    webLLMGlobal = {};
  </script>
  <body>
    <h2>WebLLM Test Page</h2>
    Open console to see output
    <br />
    <br />
    <label id="init-label"> </label>

    <h3>Prompt</h3>
    <label id="prompt-label"> </label>

    <h3>Response</h3>
    <label id="generate-label"> </label>
    <br />
    <label id="stats-label"> </label>

    <script type="module" src="./get_started.js"></script>
  </body>
</html>


================================================
FILE: examples/abort-reload/src/get_started.js
================================================
import * as webllm from "@mlc-ai/web-llm";
import { error } from "loglevel";

let engine;

function setLabel(id, text) {
  const label = document.getElementById(id);
  if (label == null) {
    throw Error("Cannot find label " + id);
  }
  label.innerText = text;
}

async function main() {
  const initProgressCallback = (report) => {
    console.log(report.text);
    setLabel("init-label", report.text);
  };
  // Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts`
  const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
  engine = new webllm.MLCEngine({
    initProgressCallback,
  });
  engine.reload(selectedModel);
}
main();
setTimeout(() => {
  console.log("calling unload");
  engine.unload().catch((err) => {
    console.log(err);
  });
}, 5000);


================================================
FILE: examples/cache-usage/README.md
================================================
# WebLLM Cache Usage

WebLLM supports both the Cache API and IndexedDB, which you can specify via `AppConfig.useIndexedDBCache`.
This folder provides an example on how Cache and IndexedDB Cache are used in WebLLM. We also
demonstrate the utility cache functions such as deleting models, checking if models are in cache, etc.

For more information about the two caches, see: https://developer.mozilla.org/en-US/docs/Web/API/Storage_API/Storage_quotas_and_eviction_criteria#what_technologies_store_data_in_the_browser.

To inspect the downloaded artifacts in your browser, open up developer console, go to application,
and you will find the artifacts under either `IndexedDB` or `Cache storage`.

To run the exapmle, you can do the following steps under this folder

```bash
npm install
npm start
```

Note if you would like to hack WebLLM core package.
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
instruction in the project to build webllm locally. This option is only recommended
if you would like to hack WebLLM core package.


================================================
FILE: examples/cache-usage/package.json
================================================
{
  "name": "cache-usage",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "start": "parcel src/cache_usage.html  --port 8888",
    "build": "parcel build src/cache_usage.html --dist-dir lib"
  },
  "devDependencies": {
    "buffer": "^5.7.1",
    "parcel": "^2.8.3",
    "process": "^0.11.10",
    "tslib": "^2.3.1",
    "typescript": "^4.9.5",
    "url": "^0.11.3"
  },
  "dependencies": {
    "@mlc-ai/web-llm": "^0.2.82"
  }
}


================================================
FILE: examples/cache-usage/src/cache_usage.html
================================================
<!doctype html>
<html>
  <script>
    webLLMGlobal = {};
  </script>

  <body>
    <h2>WebLLM Test Page</h2>
    Open console to see output
    <br />
    <br />
    <label id="init-label"> </label>

    <h3>Prompt</h3>
    <label id="prompt-label"> </label>

    <h3>Response</h3>
    <label id="generate-label"> </label>
    <br />
    <label id="stats-label"> </label>

    <script type="module" src="./cache_usage.ts"></script>
  </body>
</html>


================================================
FILE: examples/cache-usage/src/cache_usage.ts
================================================
import * as webllm from "@mlc-ai/web-llm";

function setLabel(id: string, text: string) {
  const label = document.getElementById(id);
  if (label == null) {
    throw Error("Cannot find label " + id);
  }
  label.innerText = text;
}

const initProgressCallback = (report: webllm.InitProgressReport) => {
  setLabel("init-label", report.text);
};

async function main() {
  const appConfig = webllm.prebuiltAppConfig;
  // CHANGE THIS TO SEE EFFECTS OF BOTH, CODE BELOW DO NOT NEED TO CHANGE
  appConfig.useIndexedDBCache = true;

  if (appConfig.useIndexedDBCache) {
    console.log("Using IndexedDB Cache");
  } else {
    console.log("Using Cache API");
  }

  // 1. This triggers downloading and caching the model with either Cache or IndexedDB Cache
  const selectedModel = "phi-2-q4f16_1-MLC";
  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
    selectedModel,
    { initProgressCallback: initProgressCallback, appConfig: appConfig },
  );

  const request: webllm.ChatCompletionRequest = {
    stream: false,
    messages: [
      {
        role: "user",
        content: "Write an analogy between mathematics and a lighthouse.",
      },
    ],
    n: 1,
  };
  let reply = await engine.chat.completions.create(request);
  console.log(reply);

  // 2. Check whether model weights are cached
  let modelCached = await webllm.hasModelInCache(selectedModel, appConfig);
  console.log("hasModelInCache: ", modelCached);
  if (!modelCached) {
    throw Error("Expect hasModelInCache() to be true, but got: " + modelCached);
  }

  // 3. We reload, and we should see this time it is much faster because the weights are cached.
  console.log("Reload model start");
  await engine.reload(selectedModel);
  console.log("Reload model end");
  reply = await engine.chat.completions.create(request);
  console.log(reply);

  // 4. Delete every thing about this model from cache
  // You can also delete only the model library wasm, only the model weights, or only the config file
  await webllm.deleteModelAllInfoInCache(selectedModel, appConfig);
  modelCached = await webllm.hasModelInCache(selectedModel, appConfig);
  console.log("After deletion, hasModelInCache: ", modelCached);
  if (modelCached) {
    throw Error(
      "Expect hasModelInCache() to be false, but got: " + modelCached,
    );
  }

  // 5. If we reload, we should expect the model to start downloading again
  console.log("Reload model start");
  await engine.reload(selectedModel);
  console.log("Reload model end");
  reply = await engine.chat.completions.create(request);
  console.log(reply);
}

main();


================================================
FILE: examples/chrome-extension/README.md
================================================
# WebLLM Chrome Extension

![Chrome Extension](https://github.com/mlc-ai/mlc-llm/assets/11940172/0d94cc73-eff1-4128-a6e4-70dc879f04e0)

To run the extension, do the following steps under this folder

```bash
npm install
npm run build
```

This will create a new directory at `chrome-extension/dist/`. To load the extension into Chrome, go to Extensions > Manage Extensions and select Load Unpacked. Add the `chrome-extension/dist/` directory. You can now pin the extension to your toolbar and use the drop-down menu to chat with your favorite model!


================================================
FILE: examples/chrome-extension/package.json
================================================
{
  "name": "chrome-extension",
  "version": "1.0.1",
  "description": "",
  "private": true,
  "scripts": {
    "build": "parcel build src/manifest.json --config @parcel/config-webextension"
  },
  "author": "",
  "license": "ISC",
  "devDependencies": {
    "@parcel/config-webextension": "^2.9.3",
    "@types/chrome": "^0.0.242",
    "buffer": "^6.0.3",
    "parcel": "^2.9.3",
    "process": "^0.11.10",
    "url": "^0.11.1"
  },
  "dependencies": {
    "@mlc-ai/web-llm": "^0.2.82",
    "progressbar.js": "^1.1.0"
  }
}


================================================
FILE: examples/chrome-extension/src/content.js
================================================
// Only the content script is able to access the DOM
chrome.runtime.onConnect.addListener(function (port) {
  port.onMessage.addListener(function (msg) {
    port.postMessage({ contents: document.body.innerText });
  });
});


================================================
FILE: examples/chrome-extension/src/example.html
================================================
In the year 2154, humanity had colonized several planets in the distant reaches
of the galaxy. The planet of Xylophia-IV was one of the most remote and
inhospitable, with temperatures often dropping to -200 degrees Celsius. Despite
these harsh conditions, a team of scientists had established a research station
on the planet to study the unique geological formations and exotic flora and
fauna. One day, while conducting a routine survey of the planet's surface, the
team discovered an strange object buried deep in the ice. As they examined it
closer, they realized it was a small, metallic capsule with a glowing blue
symbol etched onto its surface. The team's leader, a brilliant scientist named
Dr. Maria Rodriguez, was immediately intrigued by the capsule's mysterious
origins. She ordered her team to bring it back to the research station for
further analysis. After weeks of studying the capsule, the team finally cracked
the code to the symbol etched onto its surface. It was a message from an alien
race, warning Earth of an impending attack from an unknown threat. The team was
shocked and dismayed by the news, but they knew they had to act quickly to warn
the rest of humanity. They transmitted the message to the nearest space station,
which relayed it to Earth's government. As the threat of attack loomed near, the
team remained on high alert, ready to face whatever dangers lay ahead. They had
uncovered a secrets of the universe, and now they were determined to protect
their planet and its inhabitants at all costs.


================================================
FILE: examples/chrome-extension/src/manifest.json
================================================
{
  "manifest_version": 3,
  "name": "MLCBot",
  "version": "0.1.1",
  "description": "Chat with your browser",
  "icons": {
    "16": "icons/icon-16.png",
    "32": "icons/icon-32.png",
    "64": "icons/icon-64.png",
    "128": "icons/icon-128.png"
  },
  "content_security_policy": {
    "extension_pages": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://cdn-lfs-us-1.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co https://cas-bridge.xethub.hf.co"
  },
  "action": {
    "default_title": "MLCBot",
    "default_popup": "popup.html"
  },
  "content_scripts": [
    {
      "matches": ["<all_urls>"],
      "js": ["content.js"]
    }
  ],
  "permissions": ["storage", "tabs", "webNavigation", "activeTab", "scripting"],
  "host_permissions": ["http://*/", "https://*/"]
}


================================================
FILE: examples/chrome-extension/src/manifest_v2.json
================================================
{
  "manifest_version": 2,
  "name": "MLCBot",
  "version": "0.1.0",
  "description": "Chat with your browser",
  "icons": {
    "16": "icons/icon-16.png",
    "32": "icons/icon-32.png",
    "64": "icons/icon-64.png",
    "128": "icons/icon-128.png"
  },
  "content_security_policy": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'unsafe-eval' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co https://cas-bridge.xethub.hf.co",
  "browser_action": {
    "default_popup": "popup.html"
  },
  "content_scripts": [
    {
      "matches": ["<all_urls>"],
      "js": ["content.js"]
    }
  ],
  "permissions": ["storage", "tabs", "webNavigation", "activeTab"]
}


================================================
FILE: examples/chrome-extension/src/popup.css
================================================
*,
*::before,
*::after {
  margin: 0;
  padding: 0;
  box-sizing: border-box;
}

html {
  font-family:
    -apple-system,
    BlinkMacSystemFont,
    Segoe UI,
    Helvetica,
    Arial,
    sans-serif;
  color: #222;
}

body {
  margin: 0;
  padding: 0.5rem;
  background-color: #778da9;
  width: 335px;
  font-size: small;
}

p {
  margin: 0;
}

/* LOADING BAR */
#loadingContainer {
  margin-bottom: 15px;
  width: 315px;
  height: 8px;
}

/* INPUT AREA */
#query-input {
  border: 1px solid #ccc;
  border-radius: 4px;
}

.input-container {
  display: flex;
  flex-direction: row;
  align-items: center;
}

.input-container input {
  width: 100%;
  outline: none;
  padding: 0.5rem;
  margin-right: 0.5rem;
}

/* BUTTON */
.btn {
  background-color: #1b263b;
  color: white;
  font-size: small;
  cursor: pointer;
  border-radius: 4px;
  border: none;
  padding: 0.5rem;
}

.btn:hover {
  background-color: #d0d0d0;
}

.btn:disabled {
  background-color: #a7a7a7;
  color: rgb(255, 255, 255);
  cursor: default;
}

.btn img {
  width: 1rem;
  height: 1rem;
}

/* LOADING */

.stage {
  display: flex;
  justify-content: center;
  align-items: center;
  position: relative;
  margin: 0 -5%;
  overflow: hidden;
}

#loading-indicator {
  display: none;
  color: white;
  margin-top: 0.5rem;
}

.dot-flashing {
  position: relative;
  width: 10px;
  height: 10px;
  border-radius: 5px;
  background-color: #1b263b;
  color: #1b263b;
  animation: dot-flashing 0.4s infinite linear alternate;
  animation-delay: 0.2s;
}

.dot-flashing::before,
.dot-flashing::after {
  content: "";
  display: inline-block;
  position: absolute;
  top: 0;
}

.dot-flashing::before {
  left: -15px;
  width: 10px;
  height: 10px;
  border-radius: 5px;
  background-color: #1b263b;
  color: #1b263b;
  animation: dot-flashing 0.4s infinite alternate;
  animation-delay: 0s;
}

.dot-flashing::after {
  left: 15px;
  width: 10px;
  height: 10px;
  border-radius: 5px;
  background-color: #1b263b;
  color: #1b263b;
  animation: dot-flashing 0.4s infinite alternate;
  animation-delay: 0.4s;
}

@keyframes dot-flashing {
  0% {
    background-color: #1b263b;
  }

  50%,
  100% {
    background-color: #415a77;
  }
}

/* ANSWERS */
#queriesAnswersContainer {
  display: block;
  color: white;
  margin-top: 0.5rem;
}

#answer {
  color: #333333;
}

#answerWrapper {
  display: none;
  background-color: #ffd166;
  border-radius: 8px;
  padding: 0.5rem;
  margin-top: 0.5rem;
}

.queriesAnswers {
  border-radius: 8px;
  background-color: #ffd166;
  padding: 0.5rem;
  color: #333333;
}

#lastQuery {
  color: rgb(188, 188, 188);
}

#lastAnswer {
  color: white;
  margin-top: 0.5rem;
}

#lastRequest {
  padding: 0.5rem;
  margin-top: 0.5rem;
  background-color: #333333;
  border-radius: 4px;
}

/* ANSWER OPTIONS */
.timeStamp {
  color: #9a8c98;
}

.copyRow {
  display: flex;
  flex-direction: row;
  align-items: end;
  justify-content: space-between;
  color: #a7a7a7;
  margin-top: 0.5rem;
}

.copyText {
  display: none;
  color: #a7a7a7;
  margin-right: 0.5rem;
}

.copyButton {
  color: #415a77;
  background-color: transparent;
  border: none;
  cursor: pointer;
  padding: 0;
  margin-left: 0.5rem;
}

.copyButton:hover {
  color: #5e80a7;
  background-color: transparent;
}

.removeButton {
  color: #415a77;
  background-color: transparent;
  border: none;
  cursor: pointer;
  padding: 0;
}

.removeButton:hover {
  color: #5e80a7;
  background-color: transparent;
}


================================================
FILE: examples/chrome-extension/src/popup.html
================================================
<!doctype html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <title>Chatbot</title>
    <link rel="stylesheet" href="popup.css" />
    <link
      rel="stylesheet"
      href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"
    />
  </head>
  <body>
    <select id="model-selection"></select>
    <div id="loadingBox">
      <p id="init-label">Initializing model...</p>
      <div id="loadingContainer"></div>
    </div>
    <p id="model-name"></p>
    <div class="input-container form-group">
      <input
        type="search"
        id="query-input"
        placeholder="What's on your mind?"
      />
      <button id="submit-button" class="btn">
        <i class="fa fa-comments"></i>
      </button>
    </div>

    <div class="stage">
      <div id="loading-indicator" class="dot-flashing"></div>
    </div>

    <div id="answerWrapper">
      <div id="answer"></div>
      <div class="copyRow">
        <span id="timestamp"></span>
        <button
          id="copyAnswer"
          class="btn copyButton"
          title="Copy the Answer to the Clipboard"
        >
          <i class="fa-solid fa-copy fa-lg"></i>
        </button>
      </div>
    </div>

    <script type="module" src="./popup.ts"></script>
  </body>
</html>


================================================
FILE: examples/chrome-extension/src/popup.ts
================================================
"use strict";

// This code is partially adapted from the openai-chatgpt-chrome-extension repo:
// https://github.com/jessedi0n/openai-chatgpt-chrome-extension

import "./popup.css";

import {
  MLCEngineInterface,
  InitProgressReport,
  CreateMLCEngine,
  ChatCompletionMessageParam,
  prebuiltAppConfig,
} from "@mlc-ai/web-llm";
import { ProgressBar, Line } from "progressbar.js";

// modified setLabel to not throw error
function setLabel(id: string, text: string) {
  const label = document.getElementById(id);
  if (label != null) {
    label.innerText = text;
  }
}

function getElementAndCheck(id: string): HTMLElement {
  const element = document.getElementById(id);
  if (element == null) {
    throw Error("Cannot find element " + id);
  }
  return element;
}

const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));

const queryInput = getElementAndCheck("query-input")!;
const submitButton = getElementAndCheck("submit-button")!;
const modelName = getElementAndCheck("model-name");

let context = "";
let modelDisplayName = "";

// throws runtime.lastError if you refresh extension AND try to access a webpage that is already open
fetchPageContents();

(<HTMLButtonElement>submitButton).disabled = true;

let progressBar: ProgressBar = new Line("#loadingContainer", {
  strokeWidth: 4,
  easing: "easeInOut",
  duration: 1400,
  color: "#ffd166",
  trailColor: "#eee",
  trailWidth: 1,
  svgStyle: { width: "100%", height: "100%" },
});

let isLoadingParams = true;

let initProgressCallback = (report: InitProgressReport) => {
  setLabel("init-label", report.text);
  progressBar.animate(report.progress, {
    duration: 50,
  });
  if (report.progress == 1.0) {
    enableInputs();
  }
};

// initially selected model
let selectedModel = "Qwen2-0.5B-Instruct-q4f16_1-MLC";

// populate model-selection
const modelSelector = getElementAndCheck(
  "model-selection",
) as HTMLSelectElement;
for (let i = 0; i < prebuiltAppConfig.model_list.length; ++i) {
  const model = prebuiltAppConfig.model_list[i];
  const opt = document.createElement("option");
  opt.value = model.model_id;
  opt.innerHTML = model.model_id;
  opt.selected = false;

  // set initial selection as the initially selected model
  if (model.model_id == selectedModel) {
    opt.selected = true;
  }

  modelSelector.appendChild(opt);
}

modelName.innerText = "Loading initial model...";
const engine: MLCEngineInterface = await CreateMLCEngine(selectedModel, {
  initProgressCallback: initProgressCallback,
});
modelName.innerText = "Now chatting with " + modelDisplayName;

let chatHistory: ChatCompletionMessageParam[] = [];

function enableInputs() {
  if (isLoadingParams) {
    sleep(500);
    isLoadingParams = false;
  }

  // remove loading bar and loading bar descriptors, if exists
  const initLabel = document.getElementById("init-label");
  initLabel?.remove();
  const loadingBarContainer = document.getElementById("loadingContainer")!;
  loadingBarContainer?.remove();
  queryInput.focus();

  const modelNameArray = selectedModel.split("-");
  modelDisplayName = modelNameArray[0];
  let j = 1;
  while (j < modelNameArray.length && modelNameArray[j][0] != "q") {
    modelDisplayName = modelDisplayName + "-" + modelNameArray[j];
    j++;
  }
}

let requestInProgress = false;

// Disable submit button if input field is empty
queryInput.addEventListener("keyup", () => {
  if (
    (<HTMLInputElement>queryInput).value === "" ||
    requestInProgress ||
    isLoadingParams
  ) {
    (<HTMLButtonElement>submitButton).disabled = true;
  } else {
    (<HTMLButtonElement>submitButton).disabled = false;
  }
});

// If user presses enter, click submit button
queryInput.addEventListener("keyup", (event) => {
  if (event.code === "Enter") {
    event.preventDefault();
    submitButton.click();
  }
});

// Listen for clicks on submit button
async function handleClick() {
  requestInProgress = true;
  (<HTMLButtonElement>submitButton).disabled = true;

  // Get the message from the input field
  const message = (<HTMLInputElement>queryInput).value;
  console.log("message", message);
  // Clear the answer
  document.getElementById("answer")!.innerHTML = "";
  // Hide the answer
  document.getElementById("answerWrapper")!.style.display = "none";
  // Show the loading indicator
  document.getElementById("loading-indicator")!.style.display = "block";

  // Generate response
  let inp = message;
  if (context.length > 0) {
    inp =
      "Use only the following context when answering the question at the end. Don't use any other knowledge.\n" +
      context +
      "\n\nQuestion: " +
      message +
      "\n\nHelpful Answer: ";
  }
  console.log("Input:", inp);
  chatHistory.push({ role: "user", content: inp });

  let curMessage = "";
  const completion = await engine.chat.completions.create({
    stream: true,
    messages: chatHistory,
  });
  for await (const chunk of completion) {
    const curDelta = chunk.choices[0].delta.content;
    if (curDelta) {
      curMessage += curDelta;
    }
    updateAnswer(curMessage);
  }
  const response = await engine.getMessage();
  chatHistory.push({ role: "assistant", content: await engine.getMessage() });
  console.log("response", response);

  requestInProgress = false;
  (<HTMLButtonElement>submitButton).disabled = false;
}
submitButton.addEventListener("click", handleClick);

// listen for changes in modelSelector
async function handleSelectChange() {
  if (isLoadingParams) {
    return;
  }

  modelName.innerText = "";

  const initLabel = document.createElement("p");
  initLabel.id = "init-label";
  initLabel.innerText = "Initializing model...";
  const loadingContainer = document.createElement("div");
  loadingContainer.id = "loadingContainer";

  const loadingBox = getElementAndCheck("loadingBox");
  loadingBox.appendChild(initLabel);
  loadingBox.appendChild(loadingContainer);

  isLoadingParams = true;
  (<HTMLButtonElement>submitButton).disabled = true;

  if (requestInProgress) {
    engine.interruptGenerate();
  }
  engine.resetChat();
  chatHistory = [];
  await engine.unload();

  selectedModel = modelSelector.value;

  progressBar = new Line("#loadingContainer", {
    strokeWidth: 4,
    easing: "easeInOut",
    duration: 1400,
    color: "#ffd166",
    trailColor: "#eee",
    trailWidth: 1,
    svgStyle: { width: "100%", height: "100%" },
  });

  initProgressCallback = (report: InitProgressReport) => {
    setLabel("init-label", report.text);
    progressBar.animate(report.progress, {
      duration: 50,
    });
    if (report.progress == 1.0) {
      enableInputs();
    }
  };

  engine.setInitProgressCallback(initProgressCallback);

  requestInProgress = true;
  modelName.innerText = "Reloading with new model...";
  await engine.reload(selectedModel);
  requestInProgress = false;
  modelName.innerText = "Now chatting with " + modelDisplayName;
}
modelSelector.addEventListener("change", handleSelectChange);

// Listen for messages from the background script
chrome.runtime.onMessage.addListener(({ answer, error }) => {
  if (answer) {
    updateAnswer(answer);
  }
});

function updateAnswer(answer: string) {
  // Show answer
  document.getElementById("answerWrapper")!.style.display = "block";
  const answerWithBreaks = answer.replace(/\n/g, "<br>");
  document.getElementById("answer")!.innerHTML = answerWithBreaks;
  // Add event listener to copy button
  document.getElementById("copyAnswer")!.addEventListener("click", () => {
    // Get the answer text
    const answerText = answer;
    // Copy the answer text to the clipboard
    navigator.clipboard
      .writeText(answerText)
      .then(() => console.log("Answer text copied to clipboard"))
      .catch((err) => console.error("Could not copy text: ", err));
  });
  const options: Intl.DateTimeFormatOptions = {
    month: "short",
    day: "2-digit",
    hour: "2-digit",
    minute: "2-digit",
    second: "2-digit",
  };
  const time = new Date().toLocaleString("en-US", options);
  // Update timestamp
  document.getElementById("timestamp")!.innerText = time;
  // Hide loading indicator
  document.getElementById("loading-indicator")!.style.display = "none";
}

function fetchPageContents() {
  chrome.tabs.query({ currentWindow: true, active: true }, function (tabs) {
    const port = chrome.tabs.connect(tabs[0].id, { name: "channelName" });
    port.postMessage({});
    port.onMessage.addListener(function (msg) {
      console.log("Page contents:", msg.contents);
      context = msg.contents;
    });
  });
}


================================================
FILE: examples/chrome-extension-webgpu-service-worker/README.md
================================================
# WebLLM Chrome Extension using WebGPU Running on Service Worker

![Chrome Extension](https://github.com/mlc-ai/mlc-llm/assets/11940172/0d94cc73-eff1-4128-a6e4-70dc879f04e0)

> [!WARNING]  
> Service worker support in WebGPU is enabled by default in [Chrome 124](https://chromiumdash.appspot.com/commit/8d78510e4aca5ac3cd8ee4a33e96b404eaa43246).
> If you are using Chrome 123, go to `chrome://flags/#enable-experimental-web-platform-features`, enable the `#enable-experimental-web-platform-features` flag, and **relaunch the browser**.

This example shows how we can create a Chrome extension using WebGPU and service worker.

- The project structure is as follows:
  - `manifest.json`: A required file that lists important information about the structure and behavior of that extension. Here we are using manifest V3.
  - `popup.ts`: Script of the extension pop-up window.
  - `background.ts`: Script of the service worker. An extension service worker is loaded when it is needed, and unloaded when it goes dormant.
  - `content.js`: Content script that interacts with DOM.
- Run

  ```bash
  npm install
  npm run build
  ```

  This will create a new directory at `./dist/`. To load the extension into Chrome, go to Extensions > Manage Extensions and select Load Unpacked. Add the `./dist/` directory. You can now pin the extension to your toolbar and use it to chat with your favorite model!

**Note**: This example disables chatting using the contents of the active tab by default.
To enable it, set `useContext` in `popup.ts` to `true`. More info about this feature can be found
[here](https://github.com/mlc-ai/web-llm/pull/190).
However, if the web content is too large, it might run into issues. We recommend using `example.html` to
test this feature.


================================================
FILE: examples/chrome-extension-webgpu-service-worker/package.json
================================================
{
  "name": "chrome-extension",
  "version": "1.0.0",
  "description": "",
  "private": true,
  "scripts": {
    "build": "parcel build src/manifest.json --config @parcel/config-webextension"
  },
  "author": "",
  "license": "ISC",
  "devDependencies": {
    "@parcel/config-webextension": "^2.9.3",
    "@types/chrome": "^0.0.242",
    "buffer": "^6.0.3",
    "parcel": "^2.9.3",
    "process": "^0.11.10",
    "url": "^0.11.1"
  },
  "dependencies": {
    "@mlc-ai/web-llm": "^0.2.82",
    "progressbar.js": "^1.1.0"
  }
}


================================================
FILE: examples/chrome-extension-webgpu-service-worker/src/background.ts
================================================
import { ExtensionServiceWorkerMLCEngineHandler } from "@mlc-ai/web-llm";

// Hookup an engine to a service worker handler
let handler;

chrome.runtime.onConnect.addListener(function (port) {
  console.assert(port.name === "web_llm_service_worker");
  if (handler === undefined) {
    handler = new ExtensionServiceWorkerMLCEngineHandler(port);
  } else {
    handler.setPort(port);
  }
  port.onMessage.addListener(handler.onmessage.bind(handler));
});


================================================
FILE: examples/chrome-extension-webgpu-service-worker/src/content.js
================================================
// Only the content script is able to access the DOM
chrome.runtime.onConnect.addListener(function (port) {
  port.onMessage.addListener(function (msg) {
    port.postMessage({ contents: document.body.innerHTML });
  });
});


================================================
FILE: examples/chrome-extension-webgpu-service-worker/src/example.html
================================================
In the year 2154, humanity had colonized several planets in the distant reaches
of the galaxy. The planet of Xylophia-IV was one of the most remote and
inhospitable, with temperatures often dropping to -200 degrees Celsius. Despite
these harsh conditions, a team of scientists had established a research station
on the planet to study the unique geological formations and exotic flora and
fauna. One day, while conducting a routine survey of the planet's surface, the
team discovered an strange object buried deep in the ice. As they examined it
closer, they realized it was a small, metallic capsule with a glowing blue
symbol etched onto its surface. The team's leader, a brilliant scientist named
Dr. Maria Rodriguez, was immediately intrigued by the capsule's mysterious
origins. She ordered her team to bring it back to the research station for
further analysis. After weeks of studying the capsule, the team finally cracked
the code to the symbol etched onto its surface. It was a message from an alien
race, warning Earth of an impending attack from an unknown threat. The team was
shocked and dismayed by the news, but they knew they had to act quickly to warn
the rest of humanity. They transmitted the message to the nearest space station,
which relayed it to Earth's government. As the threat of attack loomed near, the
team remained on high alert, ready to face whatever dangers lay ahead. They had
uncovered a secrets of the universe, and now they were determined to protect
their planet and its inhabitants at all costs.


================================================
FILE: examples/chrome-extension-webgpu-service-worker/src/manifest.json
================================================
{
  "manifest_version": 3,
  "name": "MLCBot",
  "version": "0.1.0",
  "description": "Chat with your browser",
  "icons": {
    "16": "icons/icon-16.png",
    "32": "icons/icon-32.png",
    "64": "icons/icon-64.png",
    "128": "icons/icon-128.png"
  },
  "content_security_policy": {
    "extension_pages": "style-src-elem 'self' https://cdnjs.cloudflare.com; font-src 'self' https://cdnjs.cloudflare.com; script-src 'self' 'wasm-unsafe-eval'; default-src 'self' data:; connect-src 'self' data: http://localhost:8000 https://huggingface.co https://cdn-lfs.huggingface.co https://cdn-lfs-us-1.huggingface.co https://raw.githubusercontent.com https://cdn-lfs-us-1.hf.co https://cas-bridge.xethub.hf.co"
  },
  "action": {
    "default_title": "MLCBot",
    "default_popup": "popup.html"
  },
  "content_scripts": [
    {
      "matches": ["<all_urls>"],
      "js": ["content.js"]
    }
  ],
  "background": {
    "service_worker": "background.ts",
    "type": "module"
  },
  "permissions": ["storage", "tabs", "webNavigation"]
}


================================================
FILE: examples/chrome-extension-webgpu-service-worker/src/popup.css
================================================
*,
*::before,
*::after {
  margin: 0;
  padding: 0;
  box-sizing: border-box;
}

html {
  font-family:
    -apple-system,
    BlinkMacSystemFont,
    Segoe UI,
    Helvetica,
    Arial,
    sans-serif;
  color: #222;
}

body {
  margin: 0;
  padding: 0.5rem;
  background-color: #778da9;
  width: 320px;
  font-size: small;
}

p {
  margin: 0;
}

/* LOADING BAR */
#loadingContainer {
  margin-bottom: 15px;
  width: 300px;
  height: 8px;
}

/* INPUT AREA */
#query-input {
  border: 1px solid #ccc;
  border-radius: 4px;
}

.input-container {
  display: flex;
  flex-direction: row;
  align-items: center;
}

.input-container input {
  width: 100%;
  outline: none;
  padding: 0.5rem;
  margin-right: 0.5rem;
}

/* SUBMIT BUTTON */
.btn {
  background-color: #1b263b;
  color: white;
  font-size: small;
  cursor: pointer;
  border-radius: 4px;
  border: none;
  padding: 0.5rem;
}

.btn:hover {
  background-color: #d0d0d0;
}

.btn:disabled {
  background-color: #a7a7a7;
  color: rgb(255, 255, 255);
  cursor: default;
}

.btn img {
  width: 1rem;
  height: 1rem;
}

/* LOADING */

.stage {
  display: flex;
  justify-content: center;
  align-items: center;
  position: relative;
  margin: 0 -5%;
  overflow: hidden;
}

#loading-indicator {
  display: none;
  color: white;
  margin-top: 0.5rem;
}

.dot-flashing {
  position: relative;
  width: 10px;
  height: 10px;
  border-radius: 5px;
  background-color: #1b263b;
  color: #1b263b;
  animation: dot-flashing 0.4s infinite linear alternate;
  animation-delay: 0.2s;
}

.dot-flashing::before,
.dot-flashing::after {
  content: "";
  display: inline-block;
  position: absolute;
  top: 0;
}

.dot-flashing::before {
  left: -15px;
  width: 10px;
  height: 10px;
  border-radius: 5px;
  background-color: #1b263b;
  color: #1b263b;
  animation: dot-flashing 0.4s infinite alternate;
  animation-delay: 0s;
}

.dot-flashing::after {
  left: 15px;
  width: 10px;
  height: 10px;
  border-radius: 5px;
  background-color: #1b263b;
  color: #1b263b;
  animation: dot-flashing 0.4s infinite alternate;
  animation-delay: 0.4s;
}

@keyframes dot-flashing {
  0% {
    background-color: #1b263b;
  }

  50%,
  100% {
    background-color: #415a77;
  }
}

/* ANSWERS */
#queriesAnswersContainer {
  display: block;
  color: white;
  margin-top: 0.5rem;
}

#answer {
  color: #333333;
}

#answerWrapper {
  display: none;
  background-color: #ffd166;
  border-radius: 8px;
  padding: 0.5rem;
  margin-top: 0.5rem;
}

.queriesAnswers {
  border-radius: 8px;
  background-color: #ffd166;
  padding: 0.5rem;
  color: #333333;
}

#lastQuery {
  color: rgb(188, 188, 188);
}

#lastAnswer {
  color: white;
  margin-top: 0.5rem;
}

#lastRequest {
  padding: 0.5rem;
  margin-top: 0.5rem;
  background-color: #333333;
  border-radius: 4px;
}

/* ANSWER OPTIONS */
.timeStamp {
  color: #9a8c98;
}

.copyRow {
  display: flex;
  flex-direction: row;
  align-items: end;
  justify-content: space-between;
  color: #a7a7a7;
  margin-top: 0.5rem;
}

.copyText {
  display: none;
  color: #a7a7a7;
  margin-right: 0.5rem;
}

.copyButton {
  color: #415a77;
  background-color: transparent;
  border: none;
  cursor: pointer;
  padding: 0;
  margin-left: 0.5rem;
}

.copyButton:hover {
  color: #5e80a7;
  background-color: transparent;
}

.removeButton {
  color: #415a77;
  background-color: transparent;
  border: none;
  cursor: pointer;
  padding: 0;
}

.removeButton:hover {
  color: #5e80a7;
  background-color: transparent;
}


================================================
FILE: examples/chrome-extension-webgpu-service-worker/src/popup.html
================================================
<!doctype html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <title>Chatbot</title>
    <link rel="stylesheet" href="popup.css" />
    <link
      rel="stylesheet"
      href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css"
    />
  </head>
  <body>
    <div id="loadingContainer"></div>

    <div class="input-container form-group">
      <input
        type="search"
        id="query-input"
        placeholder="What's on your mind?"
      />
      <button id="submit-button" class="btn">
        <i class="fa fa-comments"></i>
      </button>
    </div>

    <div class="stage">
      <div id="loading-indicator" class="dot-flashing"></div>
    </div>

    <div id="answerWrapper">
      <div id="answer"></div>
      <div class="copyRow">
        <span id="timestamp"></span>
        <button
          id="copyAnswer"
          class="btn copyButton"
          title="Copy the Answer to the Clipboard"
        >
          <i class="fa-solid fa-copy fa-lg"></i>
        </button>
      </div>
    </div>

    <script type="module" src="./popup.ts"></script>
  </body>
</html>


================================================
FILE: examples/chrome-extension-webgpu-service-worker/src/popup.ts
================================================
"use strict";

// This code is partially adapted from the openai-chatgpt-chrome-extension repo:
// https://github.com/jessedi0n/openai-chatgpt-chrome-extension

import "./popup.css";

import {
  ChatCompletionMessageParam,
  CreateExtensionServiceWorkerMLCEngine,
  MLCEngineInterface,
  InitProgressReport,
} from "@mlc-ai/web-llm";
import { ProgressBar, Line } from "progressbar.js";

/***************** UI elements *****************/
// Whether or not to use the content from the active tab as the context
const useContext = false;
const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));

const queryInput = document.getElementById("query-input")!;
const submitButton = document.getElementById("submit-button")!;

let isLoadingParams = false;

(<HTMLButtonElement>submitButton).disabled = true;

const progressBar: ProgressBar = new Line("#loadingContainer", {
  strokeWidth: 4,
  easing: "easeInOut",
  duration: 1400,
  color: "#ffd166",
  trailColor: "#eee",
  trailWidth: 1,
  svgStyle: { width: "100%", height: "100%" },
});

/***************** Web-LLM MLCEngine Configuration *****************/
const initProgressCallback = (report: InitProgressReport) => {
  progressBar.animate(report.progress, {
    duration: 50,
  });
  if (report.progress == 1.0) {
    enableInputs();
  }
};

const engine: MLCEngineInterface = await CreateExtensionServiceWorkerMLCEngine(
  "Qwen2-0.5B-Instruct-q4f16_1-MLC",
  { initProgressCallback: initProgressCallback },
);
const chatHistory: ChatCompletionMessageParam[] = [];

isLoadingParams = true;

function enableInputs() {
  if (isLoadingParams) {
    sleep(500);
    (<HTMLButtonElement>submitButton).disabled = false;
    const loadingBarContainer = document.getElementById("loadingContainer")!;
    loadingBarContainer.remove();
    queryInput.focus();
    isLoadingParams = false;
  }
}

/***************** Event Listeners *****************/

// Disable submit button if input field is empty
queryInput.addEventListener("keyup", () => {
  if ((<HTMLInputElement>queryInput).value === "") {
    (<HTMLButtonElement>submitButton).disabled = true;
  } else {
    (<HTMLButtonElement>submitButton).disabled = false;
  }
});

// If user presses enter, click submit button
queryInput.addEventListener("keyup", (event) => {
  if (event.code === "Enter") {
    event.preventDefault();
    submitButton.click();
  }
});

// Listen for clicks on submit button
async function handleClick() {
  // Get the message from the input field
  const message = (<HTMLInputElement>queryInput).value;
  console.log("message", message);
  chatHistory.push({ role: "user", content: message });

  // Clear the answer
  document.getElementById("answer")!.innerHTML = "";
  // Hide the answer
  document.getElementById("answerWrapper")!.style.display = "none";
  // Show the loading indicator
  document.getElementById("loading-indicator")!.style.display = "block";

  // Send the chat completion message to the engine
  let curMessage = "";
  const completion = await engine.chat.completions.create({
    stream: true,
    messages: chatHistory,
  });

  // Update the answer as the model generates more text
  for await (const chunk of completion) {
    const curDelta = chunk.choices[0].delta.content;
    if (curDelta) {
      curMessage += curDelta;
    }
    updateAnswer(curMessage);
  }
  chatHistory.push({ role: "assistant", content: await engine.getMessage() });
}

submitButton.addEventListener("click", handleClick);

function updateAnswer(answer: string) {
  // Show answer
  document.getElementById("answerWrapper")!.style.display = "block";
  const answerWithBreaks = answer.replace(/\n/g, "<br>");
  document.getElementById("answer")!.innerHTML = answerWithBreaks;
  // Add event listener to copy button
  document.getElementById("copyAnswer")!.addEventListener("click", () => {
    // Get the answer text
    const answerText = answer;
    // Copy the answer text to the clipboard
    navigator.clipboard
      .writeText(answerText)
      .then(() => console.log("Answer text copied to clipboard"))
      .catch((err) => console.error("Could not copy text: ", err));
  });
  const options: Intl.DateTimeFormatOptions = {
    month: "short",
    day: "2-digit",
    hour: "2-digit",
    minute: "2-digit",
    second: "2-digit",
  };
  const time = new Date().toLocaleString("en-US", options);
  // Update timestamp
  document.getElementById("timestamp")!.innerText = time;
  // Hide loading indicator
  document.getElementById("loading-indicator")!.style.display = "none";
}

function fetchPageContents() {
  chrome.tabs.query({ currentWindow: true, active: true }, function (tabs) {
    if (tabs[0]?.id) {
      const port = chrome.tabs.connect(tabs[0].id, { name: "channelName" });
      port.postMessage({});
      port.onMessage.addListener(function (msg) {
        console.log("Page contents:", msg.contents);
        chrome.runtime.sendMessage({ context: msg.contents });
      });
    }
  });
}

// Grab the page contents when the popup is opened
window.onload = function () {
  if (useContext) {
    fetchPageContents();
  }
};


================================================
FILE: examples/embeddings/README.md
================================================
# WebLLM Get Started App

This folder provides a minimum demo to show WebLLM API in a webapp setting.
To try it out, you can do the following steps under this folder

```bash
npm install
npm start
```

Note if you would like to hack WebLLM core package.
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
instruction in the project to build webllm locally. This option is only recommended
if you would like to hack WebLLM core package.


================================================
FILE: examples/embeddings/package.json
================================================
{
  "name": "embeddings-example",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "start": "parcel src/embeddings.html  --port 8885",
    "build": "parcel build src/embeddings.html --dist-dir lib"
  },
  "devDependencies": {
    "buffer": "^5.7.1",
    "parcel": "^2.8.3",
    "process": "^0.11.10",
    "tslib": "^2.3.1",
    "typescript": "^4.9.5",
    "url": "^0.11.3"
  },
  "dependencies": {
    "@mlc-ai/web-llm": "^0.2.82",
    "langchain": "0.2.15"
  }
}


================================================
FILE: examples/embeddings/src/embeddings.html
================================================
<!doctype html>
<html>
  <script>
    webLLMGlobal = {};
  </script>
  <body>
    <h2>WebLLM Test Page</h2>
    Open console to see output
    <br />
    <br />
    <label id="init-label"> </label>

    <h3>Prompt</h3>
    <label id="prompt-label"> </label>

    <h3>Response</h3>
    <label id="generate-label"> </label>
    <br />
    <label id="stats-label"> </label>

    <script type="module" src="./embeddings.ts"></script>
  </body>
</html>


================================================
FILE: examples/embeddings/src/embeddings.ts
================================================
import * as webllm from "@mlc-ai/web-llm";
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import type { EmbeddingsInterface } from "@langchain/core/embeddings";
import type { Document } from "@langchain/core/documents";
import { formatDocumentsAsString } from "langchain/util/document";
import { PromptTemplate } from "@langchain/core/prompts";
import {
  RunnableSequence,
  RunnablePassthrough,
} from "@langchain/core/runnables";

function setLabel(id: string, text: string) {
  const label = document.getElementById(id);
  if (label == null) {
    throw Error("Cannot find label " + id);
  }
  label.innerText = text;
}

const initProgressCallback = (report: webllm.InitProgressReport) => {
  setLabel("init-label", report.text);
};

// For integration with Langchain
class WebLLMEmbeddings implements EmbeddingsInterface {
  engine: webllm.MLCEngineInterface;
  modelId: string;
  constructor(engine: webllm.MLCEngineInterface, modelId: string) {
    this.engine = engine;
    this.modelId = modelId;
  }

  async _embed(texts: string[]): Promise<number[][]> {
    const reply = await this.engine.embeddings.create({
      input: texts,
      model: this.modelId,
    });
    const result: number[][] = [];
    for (let i = 0; i < texts.length; i++) {
      result.push(reply.data[i].embedding);
    }
    return result;
  }

  async embedQuery(document: string): Promise<number[]> {
    return this._embed([document]).then((embeddings) => embeddings[0]);
  }

  async embedDocuments(documents: string[]): Promise<number[][]> {
    return this._embed(documents);
  }
}

// Prepare inputs
const documents_og = ["The Data Cloud!", "Mexico City of Course!"];
const queries_og = ["what is snowflake?", "Where can I get the best tacos?"];
const documents: string[] = [];
const queries: string[] = [];
const query_prefix =
  "Represent this sentence for searching relevant passages: ";
// Process according to Snowflake model
documents_og.forEach(function (item, index) {
  documents[index] = `[CLS] ${item} [SEP]`;
});
queries_og.forEach(function (item, index) {
  queries[index] = `[CLS] ${query_prefix}${item} [SEP]`;
});
console.log("Formatted documents: ", documents);
console.log("Formatted queries: ", queries);

// Using webllm's API
async function webllmAPI() {
  // b4 means the max batch size is compiled as 4. That is, the model can process 4 inputs in a
  // batch. If given more than 4, the model will forward multiple times. The larger the max batch
  // size, the more memory it consumes.
  // const selectedModel = "snowflake-arctic-embed-m-q0f32-MLC-b32";
  const selectedModel = "snowflake-arctic-embed-m-q0f32-MLC-b4";
  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
    selectedModel,
    {
      initProgressCallback: initProgressCallback,
      logLevel: "INFO", // specify the log level
    },
  );

  const docReply = await engine.embeddings.create({ input: documents });
  console.log(docReply);
  console.log(docReply.usage);

  const queryReply = await engine.embeddings.create({ input: queries });
  console.log(queryReply);
  console.log(queryReply.usage);

  // Calculate similarity (we use langchain here, but any method works)
  const vectorStore = await MemoryVectorStore.fromExistingIndex(
    new WebLLMEmbeddings(engine, selectedModel),
  );
  // See score
  for (let i = 0; i < queries_og.length; i++) {
    console.log(`Similarity with: ${queries_og[i]}`);
    for (let j = 0; j < documents_og.length; j++) {
      const similarity = vectorStore.similarity(
        queryReply.data[i].embedding,
        docReply.data[j].embedding,
      );
      console.log(`${documents_og[j]}: ${similarity}`);
    }
  }
}

// Alternatively, integrating with Langchain's API
async function langchainAPI() {
  // b4 means the max batch size is compiled as 4. That is, the model can process 4 inputs in a
  // batch. If given more than 4, the model will forward multiple times. The larger the max batch
  // size, the more memory it consumes.
  // const selectedModel = "snowflake-arctic-embed-m-q0f32-MLC-b32";
  const selectedModel = "snowflake-arctic-embed-m-q0f32-MLC-b4";
  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
    selectedModel,
    {
      initProgressCallback: initProgressCallback,
      logLevel: "INFO", // specify the log level
    },
  );

  const vectorStore = await MemoryVectorStore.fromExistingIndex(
    new WebLLMEmbeddings(engine, selectedModel),
  );
  const document0: Document = {
    pageContent: documents[0],
    metadata: {},
  };
  const document1: Document = {
    pageContent: documents[1],
    metadata: {},
  };
  await vectorStore.addDocuments([document0, document1]);

  const similaritySearchResults0 = await vectorStore.similaritySearch(
    queries[0],
    1,
  );
  for (const doc of similaritySearchResults0) {
    console.log(`* ${doc.pageContent}`);
  }

  const similaritySearchResults1 = await vectorStore.similaritySearch(
    queries[1],
    1,
  );
  for (const doc of similaritySearchResults1) {
    console.log(`* ${doc.pageContent}`);
  }
}

// RAG with Langchain.js using WebLLM for both LLM and Embedding in a single engine
// Followed https://js.langchain.com/v0.1/docs/expression_language/cookbook/retrieval/
// There are many possible ways to achieve RAG (e.g. degree of integration with Langchain,
// using WebWorker, etc.). We provide a minimal example here.
async function simpleRAG() {
  // 0. Load both embedding model and LLM to a single WebLLM Engine
  const embeddingModelId = "snowflake-arctic-embed-m-q0f32-MLC-b4";
  const llmModelId = "gemma-2-2b-it-q4f32_1-MLC-1k";
  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
    [embeddingModelId, llmModelId],
    {
      initProgressCallback: initProgressCallback,
      logLevel: "INFO", // specify the log level
    },
  );

  const vectorStore = await MemoryVectorStore.fromTexts(
    ["mitochondria is the powerhouse of the cell"],
    [{ id: 1 }],
    new WebLLMEmbeddings(engine, embeddingModelId),
  );
  const retriever = vectorStore.asRetriever();

  const prompt =
    PromptTemplate.fromTemplate(`Answer the question based only on the following context:
  {context}
  
  Question: {question}`);

  const chain = RunnableSequence.from([
    {
      context: retriever.pipe(formatDocumentsAsString),
      question: new RunnablePassthrough(),
    },
    prompt,
  ]);

  const formattedPrompt = (
    await chain.invoke("What is the powerhouse of the cell?")
  ).toString();
  const reply = await engine.chat.completions.create({
    messages: [{ role: "user", content: formattedPrompt }],
    model: llmModelId,
  });

  console.log(reply.choices[0].message.content);

  /*
    "The powerhouse of the cell is the mitochondria."
  */
}

// Select one to run
// webllmAPI();
// langchainAPI();
simpleRAG();


================================================
FILE: examples/function-calling/README.md
================================================
### OpenAI API Demos - Function calling

This folder contains two main ways of using function calling with WebLLM.

`function-calling-manual` demonstrates how you can use function calling with Llama3.1 and Hermes2
without using the `tools`, `tool_choice`, and `tool_call` fields. This is the most flexible way and you can follow
the instruction given by the model releaser and iterate yourself on top of that. However, you need to do parsing on your own, which differs for each model. For instance, Hermes2 models use `<tool_call>` and `</tool_call>` to wrap around a tool call, which may be very different from other models' format.

`function-calling-openai` conforms to the OpenAI function calling usage, leveraging `tools`, `tool_choice`, and `tool_call`
fields. This is more usable, but sacrifices the flexibility since we have pre-defined system prompt
for this.


================================================
FILE: examples/function-calling/function-calling-manual/README.md
================================================
### Demos - Function calling

Run `npm install` first, followed by `npm start`.

Note if you would like to hack WebLLM core package,
you can change web-llm dependencies as `"file:../../.."`, and follow the build from source
instruction in the project to build webllm locally. This option is only recommended
if you would like to hack WebLLM core package.


================================================
FILE: examples/function-calling/function-calling-manual/package.json
================================================
{
  "name": "openai-api",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "start": "parcel src/function_calling_manual.html  --port 8888",
    "build": "parcel build src/function_calling_manual.html --dist-dir lib"
  },
  "devDependencies": {
    "buffer": "^5.7.1",
    "parcel": "^2.8.3",
    "process": "^0.11.10",
    "tslib": "^2.3.1",
    "typescript": "^4.9.5",
    "url": "^0.11.3"
  },
  "dependencies": {
    "@mlc-ai/web-llm": "^0.2.82"
  }
}


================================================
FILE: examples/function-calling/function-calling-manual/src/function_calling_manual.html
================================================
<!doctype html>
<html>
  <script>
    webLLMGlobal = {};
  </script>

  <body>
    <h2>WebLLM Test Page</h2>
    Open console to see output
    <br />
    <br />
    <label id="init-label"> </label>
    <label id="generate-label"> </label>

    <script type="module" src="./function_calling_manual.ts"></script>
  </body>
</html>


================================================
FILE: examples/function-calling/function-calling-manual/src/function_calling_manual.ts
================================================
/* eslint-disable no-useless-escape */
import * as webllm from "@mlc-ai/web-llm";

// Common helper methods
function setLabel(id: string, text: string) {
  const label = document.getElementById(id);
  if (label == null) {
    throw Error("Cannot find label " + id);
  }
  label.innerText = text;
}

const initProgressCallback = (report: webllm.InitProgressReport) => {
  setLabel("init-label", report.text);
};

// Same example as https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B#prompt-format-for-function-calling
async function hermes2_example() {
  // 0. Setups
  // Most manual function calling models specify the tools inside the system prompt
  const system_prompt = `You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> {"type": "function", "function": {"name": "get_stock_fundamentals", "description": "get_stock_fundamentals(symbol: str) -> dict - Get fundamental data for a given stock symbol using yfinance API.\\n\\n    Args:\\n        symbol (str): The stock symbol.\\n\\n    Returns:\\n        dict: A dictionary containing fundamental data.\\n            Keys:\\n                - \'symbol\': The stock symbol.\\n                - \'company_name\': The long name of the company.\\n                - \'sector\': The sector to which the company belongs.\\n                - \'industry\': The industry to which the company belongs.\\n                - \'market_cap\': The market capitalization of the company.\\n                - \'pe_ratio\': The forward price-to-earnings ratio.\\n                - \'pb_ratio\': The price-to-book ratio.\\n                - \'dividend_yield\': The dividend yield.\\n                - \'eps\': The trailing earnings per share.\\n                - \'beta\': The beta value of the stock.\\n                - \'52_week_high\': The 52-week high price of the stock.\\n                - \'52_week_low\': The 52-week low price of the stock.", "parameters": {"type": "object", "properties": {"symbol": {"type": "string"}}, "required": ["symbol"]}}}  </tools> Use the following pydantic model json schema for each tool call you will make: {"properties": {"arguments": {"title": "Arguments", "type": "object"}, "name": {"title": "Name", "type": "string"}}, "required": ["arguments", "name"], "title": "FunctionCall", "type": "object"} For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:\n<tool_call>\n{"arguments": <args-dict>, "name": <function-name>}\n</tool_call>`;
  // Same formatting for Hermes-2-Pro-Llama-3, Hermes-2-Theta-Llama-3
  // const selectedModel = "Hermes-2-Theta-Llama-3-8B-q4f16_1-MLC";
  const selectedModel = "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC";
  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
    selectedModel,
    { initProgressCallback: initProgressCallback, logLevel: "INFO" },
  );
  const seed = 0;

  // 1. First request, expect to generate tool call
  const messages: webllm.ChatCompletionMessageParam[] = [
    { role: "system", content: system_prompt },
    {
      role: "user",
      content: "Fetch the stock fundamentals data for Tesla (TSLA)",
    },
  ];
  const request1: webllm.ChatCompletionRequest = {
    stream: false, // works with either streaming or non-streaming; code below assumes non-streaming
    messages: messages,
    seed: seed,
  };
  const reply1 = await engine.chat.completions.create(request1);
  const response1 = reply1.choices[0].message.content;
  console.log(reply1.usage);
  console.log("Response 1: " + response1);
  messages.push({ role: "assistant", content: response1 });
  // <tool_call>\n{"arguments": {"symbol": "TSLA"}, "name": "get_stock_fundamentals"}\n</tool_call>

  // 2. Call function on your own to get tool response
  const tool_response = `<tool_response>\n{"name": "get_stock_fundamentals", "content": {'symbol': 'TSLA', 'company_name': 'Tesla, Inc.', 'sector': 'Consumer Cyclical', 'industry': 'Auto Manufacturers', 'market_cap': 611384164352, 'pe_ratio': 49.604652, 'pb_ratio': 9.762013, 'dividend_yield': None, 'eps': 4.3, 'beta': 2.427, '52_week_high': 299.29, '52_week_low': 152.37}}\n</tool_response>`;
  messages.push({ role: "tool", content: tool_response, tool_call_id: "0" });

  // 3. Get natural language response
  const request2: webllm.ChatCompletionRequest = {
    stream: false, // works with either streaming or non-streaming; code below assumes non-streaming
    messages: messages,
    seed: seed,
  };
  const reply2 = await engine.chat.completions.create(request2);
  const response2 = reply2.choices[0].message.content;
  messages.push({ role: "assistant", content: response2 });
  console.log(reply2.usage);
  console.log("Response 2: " + response2);

  // 4. Another function call
  messages.push({
    role: "user",
    content: "Now do another one with NVIDIA, symbol being NVDA.",
  });
  const request3: webllm.ChatCompletionRequest = {
    stream: false, // works with either streaming or non-streaming; code below assumes non-streaming
    messages: messages,
    seed: seed,
  };
  const reply3 = await engine.chat.completions.create(request3);
  const response3 = reply3.choices[0].message.content;
  messages.push({ role: "assistant", content: response3 });
  console.log(reply3.usage);
  console.log("Response 3: " + response3);
  // <tool_call>\n{"arguments": {"symbol": "NVDA"}, "name": "get_stock_fundamentals"}\n</tool_call>
}

// Similar example to https://llama.meta.com/docs/model-cards-and-prompt-formats/llama3_1/#user-defined-custom-tool-calling
async function llama3_1_example() {
  // Follows example, but tweaks the formatting with <function>
  const system_prompt = `Cutting Knowledge Date: December 2023
Today Date: 23 Jul 2024
# Tool Instructions
- When looking for real time information use relevant functions if available
You have access to the following functions:

{
    "type": "function",
    "function": {
        "name": "get_current_temperature",
        "description": "Get the current temperature at a location.",
        "parameters": {
            "type": "object",
            "properties": {
                "location": {
                    "type": "string",
                    "description": "The location to get the temperature for, in the format \"City, Country\""
                }
            },
            "required": [
                "location"
            ]
        },
        "return": {
            "type": "number",
            "description": "The current temperature at the specified location in the specified units, as a float."
        }
    }
}
{
    "type": "function",
    "function": {
        "name": "send_message",
        "description": "Send a message to a recipient.",
        "parameters": {
            "type": "object",
            "properties": {
                "recipient": {
                    "type": "string",
                    "description": "Name of the recipient of the message"
                }
                "content": {
                    "type": "string",
                    "description": "Content of the message"
                }
            },
            "required": [
                "recipient",
                "content"
            ]
        },
        "return": {
            "type": "None"
        }
    }
}
If a you choose to call a function ONLY reply in the following format:
    <function>{"name": function name, "parameters": dictionary of argument name and its value}</function>
Here is an example,
    <function>{"name": "example_function_name", "parameters": {"example_name": "example_value"}}</function>
Reminder:
- Function calls MUST follow the specified format and use BOTH <function> and </function>
- Required parameters MUST be specified
- Only call one function at a time
- When calling a function, do NOT add any other words, ONLY the function calling
- Put the entire function call reply on one line
- Always add your sources when using search results to answer the user query
You are a helpful Assistant.`;

  const selectedModel = "Llama-3.1-8B-Instruct-q4f16_1-MLC";
  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
    selectedModel,
    { initProgressCallback: initProgressCallback, logLevel: "INFO" },
  );
  const seed = 0;

  // 1. First request, expect to generate tool call to get temperature of Paris
  const messages: webllm.ChatCompletionMessageParam[] = [
    { role: "system", content: system_prompt },
    {
      role: "user",
      content: "Hey, what's the temperature in Paris right now?",
    },
  ];
  const request1: webllm.ChatCompletionRequest = {
    stream: false, // works with either streaming or non-streaming; code below assumes non-streaming
    messages: messages,
    seed: seed,
  };
  const reply1 = await engine.chat.completions.create(request1);
  const response1 = reply1.choices[0].message.content;
  console.log(reply1.usage);
  console.log("Response 1: " + response1);
  messages.push({ role: "assistant", content: response1 });
  // <function>{"name": "get_current_temperature", "parameters": {"location": "Paris, France"}}</function>

  // 2. Call function on your own to get tool response
  const tool_response = `{"output": 22.5}`;
  messages.push({ role: "tool", content: tool_response, tool_call_id: "0" });

  // 3. Get natural language response
  const request2: webllm.ChatCompletionRequest = {
    stream: false, // works with either streaming or non-streaming; code below assumes non-streaming
    messages: messages,
    seed: seed,
  };
  const reply2 = await engine.chat.completions.create(request2);
  const response2 = reply2.choices[0].message.content;
  messages.push({ role: "assistant", content: response2 });
  console.log(reply2.usage);
  console.log("Response 2: " + response2);
  // The current temperature in Paris is 22.5°C.

  // 4. Make another request, expect model to call `send_message`
  messages.push({
    role: "user",
    content: "Send a message to Tom to tell him this information.",
  });
  const request3: webllm.ChatCompletionRequest = {
    stream: false, // works with either streaming or non-streaming; code below assumes non-streaming
    messages: messages,
    seed: seed,
  };
  const reply3 = await engine.chat.completions.create(request3);
  const response3 = reply3.choices[0].message.content;
  messages.push({ role: "assistant", content: response3 });
  console.log(reply3.usage);
  console.log("Response 3: " + response3);
  // <function>{"name": "send_message", "parameters": {"recipient": "Tom", "content": "The current temperature in Paris is 22.5°C."}}</function>

  // 5. Call API, which has no return value, so simply prompt model again
  const tool_response2 = `{"output": None}`;
  messages.push({ role: "tool", content: tool_response2, tool_call_id: "1" });
  const request4: webllm.ChatCompletionRequest = {
    stream: false, // works with either streaming or non-streaming; code below assumes non-streaming
    messages: messages,
    seed: seed,
  };
  const reply4 = await engine.chat.completions.create(request4);
  const response4 = reply4.choices[0].message.content;
  console.log(reply4.usage);
  console.log("Response 4: " + response4);
  // The message has been sent to Tom.
}

// Pick one to run
// hermes2_example();
llama3_1_example();


================================================
FILE: examples/function-calling/function-calling-openai/README.md
================================================
### Demos - Function calling

Run `npm install` first, followed by `npm start`.

Note if you would like to hack WebLLM core package,
you can change web-llm dependencies as `"file:../../.."`, and follow the build from source
instruction in the project to build webllm locally. This option is only recommended
if you would like to hack WebLLM core package.


================================================
FILE: examples/function-calling/function-calling-openai/package.json
================================================
{
  "name": "openai-api",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "start": "parcel src/function_calling_openai.html  --port 8888",
    "build": "parcel build src/function_calling_openai.html --dist-dir lib"
  },
  "devDependencies": {
    "buffer": "^5.7.1",
    "parcel": "^2.8.3",
    "process": "^0.11.10",
    "tslib": "^2.3.1",
    "typescript": "^4.9.5",
    "url": "^0.11.3"
  },
  "dependencies": {
    "@mlc-ai/web-llm": "^0.2.82"
  }
}


================================================
FILE: examples/function-calling/function-calling-openai/src/function_calling_openai.html
================================================
<!doctype html>
<html>
  <script>
    webLLMGlobal = {};
  </script>

  <body>
    <h2>WebLLM Test Page</h2>
    Open console to see output
    <br />
    <br />
    <label id="init-label"> </label>
    <label id="generate-label"> </label>

    <script type="module" src="./function_calling_openai.ts"></script>
  </body>
</html>


================================================
FILE: examples/function-calling/function-calling-openai/src/function_calling_openai.ts
================================================
import * as webllm from "@mlc-ai/web-llm";

function setLabel(id: string, text: string) {
  const label = document.getElementById(id);
  if (label == null) {
    throw Error("Cannot find label " + id);
  }
  label.innerText = text;
}

async function main() {
  const initProgressCallback = (report: webllm.InitProgressReport) => {
    setLabel("init-label", report.text);
  };
  const selectedModel = "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC";
  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
    selectedModel,
    { initProgressCallback: initProgressCallback },
  );

  const tools: Array<webllm.ChatCompletionTool> = [
    {
      type: "function",
      function: {
        name: "get_current_weather",
        description: "Get the current weather in a given location",
        parameters: {
          type: "object",
          properties: {
            location: {
              type: "string",
              description: "The city and state, e.g. San Francisco, CA",
            },
            unit: { type: "string", enum: ["celsius", "fahrenheit"] },
          },
          required: ["location"],
        },
      },
    },
  ];

  const request: webllm.ChatCompletionRequest = {
    stream: true, // works with stream as well, where the last chunk returns tool_calls
    stream_options: { include_usage: true },
    messages: [
      {
        role: "user",
        content:
          "What is the current weather in celsius in Pittsburgh and Tokyo?",
      },
    ],
    tool_choice: "auto",
    tools: tools,
  };

  if (!request.stream) {
    const reply0 = await engine.chat.completions.create(request);
    console.log(reply0.choices[0]);
    console.log(reply0.usage);
  } else {
    // If streaming, the last chunk returns tool calls
    const asyncChunkGenerator = await engine.chat.completions.create(request);
    let message = "";
    let lastChunk: webllm.ChatCompletionChunk | undefined;
    let usageChunk: webllm.ChatCompletionChunk | undefined;
    for await (const chunk of asyncChunkGenerator) {
      console.log(chunk);
      message += chunk.choices[0]?.delta?.content || "";
      setLabel("generate-label", message);
      if (!chunk.usage) {
        lastChunk = chunk;
      }
      usageChunk = chunk;
    }
    console.log(lastChunk!.choices[0].delta);
    console.log(usageChunk!.usage);
  }
}

main();


================================================
FILE: examples/get-started/README.md
================================================
# WebLLM Get Started App

This folder provides a minimum demo to show WebLLM API in a webapp setting.
To try it out, you can do the following steps under this folder

```bash
npm install
npm start
```

Note if you would like to hack WebLLM core package.
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
instruction in the project to build webllm locally. This option is only recommended
if you would like to hack WebLLM core package.


================================================
FILE: examples/get-started/package.json
================================================
{
  "name": "get-started",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "start": "parcel src/get_started.html  --port 8888",
    "build": "parcel build src/get_started.html --dist-dir lib"
  },
  "devDependencies": {
    "buffer": "^5.7.1",
    "parcel": "^2.8.3",
    "process": "^0.11.10",
    "tslib": "^2.3.1",
    "typescript": "^4.9.5",
    "url": "^0.11.3"
  },
  "dependencies": {
    "@mlc-ai/web-llm": "^0.2.82"
  }
}


================================================
FILE: examples/get-started/src/get_started.html
================================================
<!doctype html>
<html>
  <script>
    webLLMGlobal = {};
  </script>
  <body>
    <h2>WebLLM Test Page</h2>
    Open console to see output
    <br />
    <br />
    <label id="init-label"> </label>

    <h3>Prompt</h3>
    <label id="prompt-label"> </label>

    <h3>Response</h3>
    <label id="generate-label"> </label>
    <br />
    <label id="stats-label"> </label>

    <script type="module" src="./get_started.ts"></script>
  </body>
</html>


================================================
FILE: examples/get-started/src/get_started.ts
================================================
import * as webllm from "@mlc-ai/web-llm";

function setLabel(id: string, text: string) {
  const label = document.getElementById(id);
  if (label == null) {
    throw Error("Cannot find label " + id);
  }
  label.innerText = text;
}

async function main() {
  const initProgressCallback = (report: webllm.InitProgressReport) => {
    setLabel("init-label", report.text);
  };
  // Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts`
  const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";
  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
    selectedModel,
    {
      initProgressCallback: initProgressCallback,
      logLevel: "INFO", // specify the log level
    },
    // customize kv cache, use either context_window_size or sliding_window_size (with attention sink)
    {
      context_window_size: 2048,
      // sliding_window_size: 1024,
      // attention_sink_size: 4,
    },
  );

  // Option 2: Specify your own model other than the prebuilt ones
  // const appConfig: webllm.AppConfig = {
  //   model_list: [
  //     {
  //       model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f32_1-MLC",
  //       model_id: "Llama-3.1-8B-Instruct-q4f32_1-MLC",
  //       model_lib:
  //         webllm.modelLibURLPrefix +
  //         webllm.modelVersion +
  //         "/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
  //       overrides: {
  //         context_window_size: 2048,
  //       },
  //     },
  //   ],
  // };
  // const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
  //   selectedModel,
  //   { appConfig: appConfig, initProgressCallback: initProgressCallback },
  // );

  // Option 3: Instantiate MLCEngine() and call reload() separately
  // const engine: webllm.MLCEngineInterface = new webllm.MLCEngine({
  //   appConfig: appConfig, // if do not specify, we use webllm.prebuiltAppConfig
  //   initProgressCallback: initProgressCallback,
  // });
  // await engine.reload(selectedModel);

  const reply0 = await engine.chat.completions.create({
    messages: [{ role: "user", content: "List three US states." }],
    // below configurations are all optional
    n: 3,
    temperature: 1.5,
    max_tokens: 256,
    // 46510 and 7188 are "California", and 8421 and 51325 are "Texas" in Llama-3.1-8B-Instruct
    // So we would have a higher chance of seeing the latter two, but never the first in the answer
    logit_bias: {
      "46510": -100,
      "7188": -100,
      "8421": 5,
      "51325": 5,
    },
    logprobs: true,
    top_logprobs: 2,
  });
  console.log(reply0);
  console.log(reply0.usage);

  // To change model, either create a new engine via `CreateMLCEngine()`, or call `engine.reload(modelId)`
}

main();


================================================
FILE: examples/get-started-latency-breakdown/README.md
================================================
# WebLLM Get Started App

This folder provides a minimum demo to show WebLLM API in a webapp setting with
collection of latency statistics for individual token sampling steps.
To try it out, you can do the following steps under this folder

```bash
npm install
npm start
```

Note if you would like to hack WebLLM core package.
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
instruction in the project to build webllm locally. This option is only recommended
if you would like to hack WebLLM core package.


================================================
FILE: examples/get-started-latency-breakdown/package.json
================================================
{
  "name": "get-started-latency-breakdown",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "start": "parcel src/get_started_latency_breakdown.html  --port 8888",
    "build": "parcel build src/get_started_latency_breakdown.html --dist-dir lib"
  },
  "devDependencies": {
    "buffer": "^5.7.1",
    "parcel": "^2.8.3",
    "process": "^0.11.10",
    "tslib": "^2.3.1",
    "typescript": "^4.9.5",
    "url": "^0.11.3"
  },
  "dependencies": {
    "@mlc-ai/web-llm": "^0.2.82"
  }
}


================================================
FILE: examples/get-started-latency-breakdown/src/get_started_latency_breakdown.html
================================================
<!doctype html>
<html>
  <script>
    webLLMGlobal = {};
  </script>
  <body>
    <h2>WebLLM Test Page</h2>
    Open console to see output
    <br />
    <br />
    <label id="init-label"> </label>

    <h3>Prompt</h3>
    <label id="prompt-label"> </label>

    <h3>Response</h3>
    <label id="generate-label"> </label>
    <br />
    <label id="stats-label"> </label>

    <script type="module" src="./get_started_latency_breakdown.ts"></script>
  </body>
</html>


================================================
FILE: examples/get-started-latency-breakdown/src/get_started_latency_breakdown.ts
================================================
import * as webllm from "@mlc-ai/web-llm";

function setLabel(id: string, text: string) {
  const label = document.getElementById(id);
  if (label == null) {
    throw Error("Cannot find label " + id);
  }
  label.innerText = text;
}

type LatencyBreakdown = {
  logitProcessorTime: number[];
  logitBiasTime: number[];
  penaltyTime: number[];
  sampleTime: number[];
  totalTime: number[];
  grammarBitmaskTime: number[];
};
function computeStats(
  latency_breakdown: LatencyBreakdown,
): Record<string, any> {
  function _computeStats(arr: number[]) {
    if (!arr.length) return undefined;
    const sorted = [...arr].sort((a, b) => a - b);
    const sum = arr.reduce((a, b) => a + b, 0);
    const avg = sum / arr.length;
    const min = sorted[0];
    const max = sorted[sorted.length - 1];
    const p99 = sorted[Math.floor(0.99 * (sorted.length - 1))];
    return { avg, min, max, p99 };
  }

  const latencyStats: Record<string, any> = {};
  for (const key of Object.keys(latency_breakdown)) {
    const arr = (latency_breakdown as any)[key];
    if (Array.isArray(arr) && arr.length > 0) {
      latencyStats[key] = _computeStats(arr);
    }
  }
  return latencyStats;
}

async function main() {
  const initProgressCallback = (report: webllm.InitProgressReport) => {
    setLabel("init-label", report.text);
  };
  // Option 1: If we do not specify appConfig, we use `prebuiltAppConfig` defined in `config.ts`
  const selectedModel = "Qwen3-0.6B-q0f32-MLC";
  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
    selectedModel,
    {
      initProgressCallback: initProgressCallback,
      logLevel: "INFO", // specify the log level
    },
    // customize kv cache, use either context_window_size or sliding_window_size (with attention sink)
    {
      context_window_size: 2048,
      // sliding_window_size: 1024,
      // attention_sink_size: 4,
    },
  );

  const latencyBreakdown: LatencyBreakdown = {
    logitProcessorTime: [],
    logitBiasTime: [],
    penaltyTime: [],
    sampleTime: [],
    totalTime: [],
    grammarBitmaskTime: [],
  };

  const decodeTokensPerS: number[] = [];
  const completionTokens: number[] = [];
  const e2eLatencyS: number[] = [];
  const timePerOutputTokenS: number[] = [];

  const numTrials = 20;
  for (let i = 0; i < numTrials; i++) {
    console.log(`Trial ${i + 1} / ${numTrials}`);
    const reply0 = await engine.chat.completions.create({
      messages: [{ role: "user", content: "List twenty US states." }],
      // below configurations are all optional
      n: 1,
      temperature: 0,
      max_tokens: 2048,
      // 46510 and 7188 are "California", and 8421 and 51325 are "Texas" in Llama-3.1-8B-Instruct
      // So we would have a higher chance of seeing the latter two, but never the first in the answer
      // logit_bias: {
      //   "46510": -100,
      //   "7188": -100,
      //   "8421": 5,
      //   "41325": 5,
      // },
      top_p: 0.8,
      logprobs: true,
      top_logprobs: 2,
      frequency_penalty: 1.2,
      presence_penalty: 1.0,
      repetition_penalty: 1.1,
    });

    const logitProcessorTime =
      reply0.usage?.extra.latencyBreakdown?.logitProcessorTime;
    const logitBiasTime = reply0.usage?.extra.latencyBreakdown?.logitBiasTime;
    const penaltyTime = reply0.usage?.extra.latencyBreakdown?.penaltyTime;
    const sampleTime = reply0.usage?.extra.latencyBreakdown?.sampleTime;
    const totalTime = reply0.usage?.extra.latencyBreakdown?.totalTime;
    const grammarBitmaskTime =
      reply0.usage?.extra.latencyBreakdown?.grammarBitmaskTime;

    latencyBreakdown.logitProcessorTime.push(...(logitProcessorTime || []));
    latencyBreakdown.logitBiasTime.push(...(logitBiasTime || []));
    latencyBreakdown.penaltyTime.push(...(penaltyTime || []));
    latencyBreakdown.sampleTime.push(...(sampleTime || []));
    latencyBreakdown.totalTime.push(...(totalTime || []));
    latencyBreakdown.grammarBitmaskTime.push(...(grammarBitmaskTime || []));

    decodeTokensPerS.push(reply0.usage?.extra.decode_tokens_per_s || 0);
    e2eLatencyS.push(reply0.usage?.extra.e2e_latency_s || 0);
    timePerOutputTokenS.push(reply0.usage?.extra.time_per_output_token_s || 0);
    completionTokens.push(reply0.usage?.completion_tokens || 0);
  }

  const latencyStats: { [key: string]: number } =
    computeStats(latencyBreakdown);
  console.log("Latency stats: ", latencyStats);
  console.log("Decode tokens per second: ", decodeTokensPerS);
  console.log("Completion tokens: ", completionTokens);
  console.log("E2E latency (s): ", e2eLatencyS);
  console.log("Time per output token (s): ", timePerOutputTokenS);

  // To change model, either create a new engine via `CreateMLCEngine()`, or call `engine.reload(modelId)`
}

main();


================================================
FILE: examples/get-started-web-worker/README.md
================================================
# WebLLM Get Started with WebWorker

This folder provides a minimum demo to show WebLLM API using
[WebWorker](https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Using_web_workers).
The main benefit of web worker is that all ML workloads runs on a separate thread as a result
will less likely block the UI.

To try it out, you can do the following steps under this folder

```bash
npm install
npm start
```

Note if you would like to hack WebLLM core package.
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
instruction in the project to build webllm locally. This option is only recommended
if you would like to hack WebLLM core package.


================================================
FILE: examples/get-started-web-worker/package.json
================================================
{
  "name": "get-started-web-worker",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "start": "parcel src/get_started.html  --port 8885",
    "build": "parcel build src/get_started.html --dist-dir lib"
  },
  "devDependencies": {
    "buffer": "^6.0.3",
    "parcel": "^2.8.3",
    "process": "^0.11.10",
    "tslib": "^2.3.1",
    "typescript": "^4.9.5",
    "url": "^0.11.3"
  },
  "dependencies": {
    "@mlc-ai/web-llm": "^0.2.82"
  }
}


================================================
FILE: examples/get-started-web-worker/src/get_started.html
================================================
<!doctype html>
<html>
  <script>
    webLLMGlobal = {};
  </script>
  <body>
    <h2>WebLLM Test Page</h2>
    Open console to see output
    <br />
    <br />
    <label id="init-label"> </label>

    <h3>Prompt</h3>
    <label id="prompt-label"> </label>

    <h3>Response</h3>
    <label id="generate-label"> </label>
    <br />
    <label id="stats-label"> </label>

    <script type="module" src="./main.ts"></script>
  </body>
</html>


================================================
FILE: examples/get-started-web-worker/src/main.ts
================================================
import * as webllm from "@mlc-ai/web-llm";

function setLabel(id: string, text: string) {
  const label = document.getElementById(id);
  if (label == null) {
    throw Error("Cannot find label " + id);
  }
  label.innerText = text;
}

// There are two demonstrations, pick one to run

/**
 * Chat completion (OpenAI style) without streaming, where we get the entire response at once.
 */
async function mainNonStreaming() {
  const initProgressCallback = (report: webllm.InitProgressReport) => {
    setLabel("init-label", report.text);
  };
  const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";

  const engine: webllm.MLCEngineInterface =
    await webllm.CreateWebWorkerMLCEngine(
      new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
      selectedModel,
      { initProgressCallback: initProgressCallback },
    );

  const request: webllm.ChatCompletionRequest = {
    messages: [
      {
        role: "system",
        content:
          "You are a helpful, respectful and honest assistant. " +
          "Be as happy as you can when speaking please. ",
      },
      { role: "user", content: "Provide me three US states." },
      { role: "assistant", content: "California, New York, Pennsylvania." },
      { role: "user", content: "Two more please!" },
    ],
    n: 3,
    temperature: 1.5,
    max_tokens: 256,
  };

  const reply0 = await engine.chat.completions.create(request);
  console.log(reply0);

  console.log(reply0.usage);
}

/**
 * Chat completion (OpenAI style) with streaming, where delta is sent while generating response.
 */
async function mainStreaming() {
  const initProgressCallback = (report: webllm.InitProgressReport) => {
    setLabel("init-label", report.text);
  };
  const selectedModel = "Llama-3.1-8B-Instruct-q4f32_1-MLC";

  const engine: webllm.MLCEngineInterface =
    await webllm.CreateWebWorkerMLCEngine(
      new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
      selectedModel,
      { initProgressCallback: initProgressCallback },
    );

  const request: webllm.ChatCompletionRequest = {
    stream: true,
    stream_options: { include_usage: true },
    messages: [
      {
        role: "system",
        content:
          "You are a helpful, respectful and honest assistant. " +
          "Be as happy as you can when speaking please. ",
      },
      { role: "user", content: "Provide me three US states." },
      { role: "assistant", content: "California, New York, Pennsylvania." },
      { role: "user", content: "Two more please!" },
    ],
    temperature: 1.5,
    max_tokens: 256,
  };

  const asyncChunkGenerator = await engine.chat.completions.create(request);
  let message = "";
  for await (const chunk of asyncChunkGenerator) {
    console.log(chunk);
    message += chunk.choices[0]?.delta?.content || "";
    setLabel("generate-label", message);
    if (chunk.usage) {
      console.log(chunk.usage); // only last chunk has usage
    }
    // engine.interruptGenerate();  // works with interrupt as well
  }
  console.log("Final message:\n", await engine.getMessage()); // the concatenated message
}

// Run one of the function below
// mainNonStreaming();
mainStreaming();


================================================
FILE: examples/get-started-web-worker/src/worker.ts
================================================
import { WebWorkerMLCEngineHandler } from "@mlc-ai/web-llm";

// Hookup an engine to a worker handler
const handler = new WebWorkerMLCEngineHandler();
self.onmessage = (msg: MessageEvent) => {
  handler.onmessage(msg);
};


================================================
FILE: examples/json-mode/README.md
================================================
### OpenAI API Demos

Run `npm install` first, followed by `npm start`.

Note if you would like to hack WebLLM core package,
you can change web-llm dependencies as `"file:../.."`, and follow the build from source
instruction in the project to build webllm locally. This option is only recommended
if you would like to hack WebLLM core package.


================================================
FILE: examples/json-mode/package.json
================================================
{
  "name": "openai-api",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "start": "parcel src/json_mode.html  --port 8888",
    "build": "parcel build src/json_mode.html --dist-dir lib"
  },
  "devDependencies": {
    "buffer": "^5.7.1",
    "parcel": "^2.8.3",
    "process": "^0.11.10",
    "tslib": "^2.3.1",
    "typescript": "^4.9.5",
    "url": "^0.11.3"
  },
  "dependencies": {
    "@mlc-ai/web-llm": "^0.2.82"
  }
}


================================================
FILE: examples/json-mode/src/json_mode.html
================================================
<!doctype html>
<html>
  <script>
    webLLMGlobal = {};
  </script>

  <body>
    <h2>WebLLM Test Page</h2>
    Open console to see output.
    <br />
    <br />
    <label id="init-label"> </label>

    <script type="module" src="./json_mode.ts"></script>
  </body>
</html>


================================================
FILE: examples/json-mode/src/json_mode.ts
================================================
import * as webllm from "@mlc-ai/web-llm";

function setLabel(id: string, text: string) {
  const label = document.getElementById(id);
  if (label == null) {
    throw Error("Cannot find label " + id);
  }
  label.innerText = text;
}

async function main() {
  const initProgressCallback = (report: webllm.InitProgressReport) => {
    setLabel("init-label", report.text);
  };
  // Pick any one of these models to start trying -- most models in WebLLM support grammar
  const selectedModel = "Llama-3.2-3B-Instruct-q4f16_1-MLC";
  // const selectedModel = "Qwen2.5-1.5B-Instruct-q4f16_1-MLC";
  // const selectedModel = "Phi-3.5-mini-instruct-q4f16_1-MLC";
  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
    selectedModel,
    { initProgressCallback: initProgressCallback },
  );
  // Note that you'd need to prompt the model to answer in JSON either in
  // user's message or the system prompt
  const request: webllm.ChatCompletionRequest = {
    stream: false, // works with streaming, logprobs, top_logprobs as well
    messages: [
      {
        role: "user",
        content: "Write a short JSON file introducing yourself.",
      },
    ],
    n: 2,
    max_tokens: 128,
    response_format: { type: "json_object" } as webllm.ResponseFormat,
  };

  const reply0 = await engine.chatCompletion(request);
  console.log(reply0);
  console.log("First reply's last choice:\n" + (await engine.getMessage()));
  console.log(reply0.usage);
}

main();


================================================
FILE: examples/json-schema/README.md
================================================
### OpenAI API Demos

Run `npm install` first, followed by `npm start`.

Note if you would like to hack WebLLM core package,
you can change web-llm dependencies as `"file:../.."`, and follow the build from source
instruction in the project to build webllm locally. This option is only recommended
if you would like to hack WebLLM core package.


================================================
FILE: examples/json-schema/package.json
================================================
{
  "name": "openai-api",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "start": "parcel src/json_schema.html  --port 8885",
    "build": "parcel build src/json_schema.html --dist-dir lib"
  },
  "devDependencies": {
    "buffer": "^5.7.1",
    "parcel": "^2.8.3",
    "process": "^0.11.10",
    "tslib": "^2.3.1",
    "typescript": "^4.9.5",
    "url": "^0.11.3"
  },
  "dependencies": {
    "@mlc-ai/web-llm": "^0.2.82"
  }
}


================================================
FILE: examples/json-schema/src/json_schema.html
================================================
<!doctype html>
<html>
  <script>
    webLLMGlobal = {};
  </script>

  <body>
    <h2>WebLLM Test Page</h2>
    Open console to see output.
    <br />
    <br />
    <label id="init-label"> </label>

    <script type="module" src="./json_schema.ts"></script>
  </body>
</html>


================================================
FILE: examples/json-schema/src/json_schema.ts
================================================
import * as webllm from "@mlc-ai/web-llm";
import { Type, Static } from "@sinclair/typebox";

function setLabel(id: string, text: string) {
  const label = document.getElementById(id);
  if (label == null) {
    throw Error("Cannot find label " + id);
  }
  label.innerText = text;
}

async function simpleStructuredTextExample() {
  // There are several options of providing such a schema
  // 1. You can directly define a schema in string
  const schema1 = `{
        "properties": {
            "size": {"title": "Size", "type": "integer"}, 
            "is_accepted": {"title": "Is Accepted", "type": "boolean"}, 
            "num": {"title": "Num", "type": "number"}
        },
        "required": ["size", "is_accepted", "num"], 
        "title": "Schema", "type": "object"
    }`;

  // 2. You can use 3rdparty libraries like typebox to create a schema
  const T = Type.Object({
    size: Type.Integer(),
    is_accepted: Type.Boolean(),
    num: Type.Number(),
  });
  type T = Static<typeof T>;
  const schema2 = JSON.stringify(T);
  console.log(schema2);
  // {"type":"object","properties":{"size":{"type":"integer"},"is_accepted":{"type":"boolean"},
  // "num":{"type":"number"}},"required":["size","is_accepted","num"]}

  const initProgressCallback = (report: webllm.InitProgressReport) => {
    setLabel("init-label", report.text);
  };

  // Pick any one of these models to start trying -- most models in WebLLM support grammar
  // const selectedModel = "Llama-3.2-3B-Instruct-q4f16_1-MLC";
  // const selectedModel = "Qwen2.5-1.5B-Instruct-q4f16_1-MLC";
  const selectedModel = "Phi-3.5-mini-instruct-q4f16_1-MLC";
  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
    selectedModel,
    { initProgressCallback: initProgressCallback, logLevel: "INFO" },
  );

  // Note that you'd need to prompt the model to answer in JSON either in
  // user's message or the system prompt
  const request: webllm.ChatCompletionRequest = {
    stream: false, // works with streaming, logprobs, top_logprobs as well
    messages: [
      {
        role: "user",
        content:
          "Generate a json containing three fields: an integer field named size, a " +
          "boolean field named is_accepted, and a float field named num.",
      },
    ],
    max_tokens: 128,
    response_format: {
      type: "json_object",
      schema: schema2,
    } as webllm.ResponseFormat,
  };

  const reply0 = await engine.chatCompletion(request);
  console.log(reply0);
  console.log("Output:\n" + (await engine.getMessage()));
  console.log(reply0.usage);
}

// The json schema and prompt is taken from
// https://github.com/sgl-project/sglang/tree/main?tab=readme-ov-file#json-decoding
async function harryPotterExample() {
  const T = Type.Object({
    name: Type.String(),
    house: Type.Enum({
      Gryffindor: "Gryffindor",
      Hufflepuff: "Hufflepuff",
      Ravenclaw: "Ravenclaw",
      Slytherin: "Slytherin",
    }),
    blood_status: Type.Enum({
      "Pure-blood": "Pure-blood",
      "Half-blood": "Half-blood",
      "Muggle-born": "Muggle-born",
    }),
    occupation: Type.Enum({
      Student: "Student",
      Professor: "Professor",
      "Ministry of Magic": "Ministry of Magic",
      Other: "Other",
    }),
    wand: Type.Object({
      wood: Type.String(),
      core: Type.String(),
      length: Type.Number(),
    }),
    alive: Type.Boolean(),
    patronus: Type.String(),
  });

  type T = Static<typeof T>;
  const schema = JSON.stringify(T);
  console.log(schema);

  const initProgressCallback = (report: webllm.InitProgressReport) => {
    setLabel("init-label", report.text);
  };

  // Pick any one of these models to start trying -- most models in WebLLM support grammar
  const selectedModel = "Llama-3.2-3B-Instruct-q4f16_1-MLC";
  // const selectedModel = "Qwen2.5-1.5B-Instruct-q4f16_1-MLC";
  // const selectedModel = "Phi-3.5-mini-instruct-q4f16_1-MLC";

  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
    selectedModel,
    { initProgressCallback: initProgressCallback, logLevel: "INFO" },
  );

  // Note that you'd need to prompt the model to answer in JSON either in
  // user's message or the system prompt
  const request: webllm.ChatCompletionRequest = {
    stream: false,
    messages: [
      {
        role: "user",
        content:
          "Hermione Granger is a character in Harry Potter. Please fill in the following information about this character in JSON format." +
          "Name is a string of character name. House is one of Gryffindor, Hufflepuff, Ravenclaw, Slytherin. Blood status is one of Pure-blood, Half-blood, Muggle-born. Occupation is one of Student, Professor, Ministry of Magic, Other. Wand is an object with wood, core, and length. Alive is a boolean. Patronus is a string.",
      },
    ],
    max_tokens: 128,
    response_format: {
      type: "json_object",
      schema: schema,
    } as webllm.ResponseFormat,
  };

  const reply = await engine.chatCompletion(request);
  console.log(reply);
  console.log("Output:\n" + (await engine.getMessage()));
  console.log(reply.usage);
  console.log(reply.usage!.extra);
}

async function functionCallingExample() {
  const T = Type.Object({
    tool_calls: Type.Array(
      Type.Object({
        arguments: Type.Any(),
        name: Type.String(),
      }),
    ),
  });
  type T = Static<typeof T>;
  const schema = JSON.stringify(T);
  console.log(schema);

  const tools: Array<webllm.ChatCompletionTool> = [
    {
      type: "function",
      function: {
        name: "get_current_weather",
        description: "Get the current weather in a given location",
        parameters: {
          type: "object",
          properties: {
            location: {
              type: "string",
              description: "The city and state, e.g. San Francisco, CA",
            },
            unit: { type: "string", enum: ["celsius", "fahrenheit"] },
          },
          required: ["location"],
        },
      },
    },
  ];

  const initProgressCallback = (report: webllm.InitProgressReport) => {
    setLabel("init-label", report.text);
  };

  const selectedModel = "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC";
  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
    selectedModel,
    {
      initProgressCallback: initProgressCallback,
    },
  );

  const request: webllm.ChatCompletionRequest = {
    stream: false,
    messages: [
      {
        role: "system",
        content: `You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> ${JSON.stringify(
          tools,
        )} </tools>. Do not stop calling functions until the task has been accomplished or you've reached max iteration of 10.
      Calling multiple functions at once can overload the system and increase cost so call one function at a time please.
      If you plan to continue with analysis, always call another function.
      Return a valid json object (using double quotes) in the following schema: ${JSON.stringify(
        schema,
      )}.`,
      },
      {
        role: "user",
        content:
          "What is the current weather in celsius in Pittsburgh and Tokyo?",
      },
    ],
    response_format: {
      type: "json_object",
      schema: schema,
    } as webllm.ResponseFormat,
  };

  const reply = await engine.chat.completions.create(request);
  console.log(reply.choices[0].message.content);

  console.log(reply.usage);
}

async function ebnfGrammarExample() {
  // You can directly define an EBNFGrammar string with ResponseFormat.grammar
  const jsonGrammarStr = String.raw`
root ::= basic_array | basic_object
basic_any ::= basic_number | basic_string | basic_boolean | basic_null | basic_array | basic_object
basic_integer ::= ("0" | "-"? [1-9] [0-9]*) ".0"?
basic_number ::= ("0" | "-"? [1-9] [0-9]*) ("." [0-9]+)? ([eE] [+-]? [0-9]+)?
basic_string ::= (([\"] basic_string_1 [\"]))
basic_string_1 ::= "" | [^"\\\x00-\x1F] basic_string_1 | "\\" escape basic_string_1
escape ::= ["\\/bfnrt] | "u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9]
basic_boolean ::= "true" | "false"
basic_null ::= "null"
basic_array ::= "[" ("" | ws basic_any (ws "," ws basic_any)*) ws "]"
basic_object ::= "{" ("" | ws basic_string ws ":" ws basic_any ( ws "," ws basic_string ws ":" ws basic_any)*) ws "}"
ws ::= [ \n\t]*
`;

  const initProgressCallback = (report: webllm.InitProgressReport) => {
    setLabel("init-label", report.text);
  };

  // Pick any one of these models to start trying -- most models in WebLLM support grammar
  const selectedModel = "Llama-3.2-3B-Instruct-q4f16_1-MLC";
  // const selectedModel = "Qwen2.5-1.5B-Instruct-q4f16_1-MLC";
  // const selectedModel = "Phi-3.5-mini-instruct-q4f16_1-MLC";
  const engine: webllm.MLCEngineInterface = await webllm.CreateMLCEngine(
    selectedModel,
    { initProgressCallback: initProgressCallback, logLevel: "INFO" },
  );

  // Note that you'd need to prompt the model to answer in JSON either in
  // user's message or the system prompt
  const request: webllm.ChatCompletionRequest = {
    stream: false, // works with streaming, logprobs, top_logprobs as well
    messages: [
      {
        role: "user",
        content: "Introduce yourself in JSON",
      },
    ],
    max_tokens: 128,
    response_format: {
      type: "grammar",
      grammar: jsonGrammarStr,
    } as webllm.ResponseFormat,
  };

  const reply0 = await engine.chatCompletion(request);
  console.log(reply0);
  console.log("Output:\n" + (await engine.getMessage()));
  console.log(reply0.usage);
}

async function main() {
  // await simpleStructuredTextExample();
  await harryPotterExample();
  // await functionCallingExample();
  // await ebnfGrammarExample();
}

main();


================================================
FILE: examples/logit-processor/README.md
================================================
# WebLLM Logit Processor and Low-Level API Example

This folder explains the usage of `LogitProcessor`, demonstrating how it can be used to
manipulate the raw logits before sampling the token (e.g. setting certain tokens to `inf` or `-inf`).
We demonstrate how to use it with and without a web worker, which can be toggled with `USE_WEB_WORKER`
in `logit_processor.ts` (see `worker.ts` on how `LogitProcessor` plays a role there).

We also demonstrate the usage of a low-level API `forwardTokenAndSample()`, which, unlike `chat.completions.create()`
that assumes the usage is for autoregressive chatting, here we have more fine-grained control.

See `my_logit_processor.ts` on how to customize your own logit processor. Here we make the logit
of token 0 `100.0` manually, large enough that we should expect to always sample token 0, which
is indeed the case if we observe the console log. We also demonstarte that a LogitProcessor can be
stateful, and the state can also be cleaned with `LogitProcessor.resetState()`.

To try it out, you can do the following steps under this folder

```bash
npm install
npm start
```

Note if you would like to hack WebLLM core package, you can change web-llm dependencies as `"file:../.."`, and follow the build from source instruction in the project to build webllm locally. This option is only recommended if you would like to hack WebLLM core package.


================================================
FILE: examples/logit-processor/package.json
================================================
{
  "name": "logit-processor",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "start": "parcel src/logit_processor.html  --port 8885",
    "build": "parcel build src/logit_processor.html --dist-dir lib"
  },
  "devDependencies": {
    "buffer": "^5.7.1",
    "parcel": "^2.8.3",
    "process": "^0.11.10",
    "tslib": "^2.3.1",
    "typescript": "^4.9.5",
    "url": "^0.11.3"
  },
  "dependencies": {
    "@mlc-ai/web-llm": "^0.2.82"
  }
}


================================================
FILE: examples/logit-processor/src/logit_processor.html
================================================
<!doctype html>
<html>
  <script>
    webLLMGlobal = {};
  </script>

  <body>
    <h2>WebLLM Logit Processor Test Page</h2>
    Open console to see the effect of your logit processor.
    <br />
    <br />
    <label id="init-label"> </label>

    <script type="module" src="./logit_processor.ts"></script>
  </body>
</html>


================================================
FILE: examples/logit-processor/src/logit_processor.ts
================================================
import * as webllm from "@mlc-ai/web-llm";
import { MyLogitProcessor } from "./my_logit_processor";

const USE_WEB_WORKER = true; // Toggle this to use Logit Processor without a web worker
const AUTOREGRESS_LIMIT = 32; // How many tokens to generate for this test

function setLabel(id: string, text: string) {
  const label = document.getElementById(id);
  if (label == null) {
    throw Error("Cannot find label " + id);
  }
  label.innerText = text;
}

async function main() {
  const initProgressCallback = (report: webllm.InitProgressReport) => {
    setLabel("init-label", report.text);
  };
  // Instantiate myLogitProcessor, registering in the logitProcessorRegistry
  const myLogitProcessor = new MyLogitProcessor();
  const logitProcessorRegistry = new Map<string, webllm.LogitProcessor>();
  logitProcessorRegistry.set("phi-2-q4f32_1-MLC", myLogitProcessor);

  let engine: webllm.MLCEngineInterface;

  // Depending on whether we use a web worker, the code is slightly different
  if (USE_WEB_WORKER) {
    // see worker.ts on how LogitProcessor plays a role there
    engine = await webllm.CreateWebWorkerMLCEngine(
      new Worker(new URL("./worker.ts", import.meta.url), { type: "module" }),
      "phi-2-q4f32_1-MLC",
      { initProgressCallback: initProgressCallback },
    );
  } else {
    engine = await webllm.CreateMLCEngine("phi-2-q4f32_1-MLC", {
      initProgressCallback: initProgressCallback,
      logitProcessorRegistry: logitProcessorRegistry,
    });
  }

  // Below we demonstrate the usage of a low-level API `forwardTokensAndSample()`
  const prompt: Array<number> = [42];
  let nextToken = await engine.forwardTokensAndSample(
    prompt,
    /*isPrefill=*/ true,
  );
  console.log(nextToken);

  let counter = prompt.length;
  while (counter < AUTOREGRESS_LIMIT) {
    counter += 1;
    nextToken = await engine.forwardTokensAndSample(
      [nextToken],
      /*isPrefill=*/ false,
    );
    console.log(nextToken);
  }

  // By calling `engine.resetChat()`, we triggers MyLogitProcessor.resetState()
  engine.resetChat();
  counter = prompt.length;
  nextToken = await engine.forwardTokensAndSample(prompt, /*isPrefill=*/ true);
  console.log(nextToken);
  while (counter < AUTOREGRESS_LIMIT) {
    counter += 1;
    nextToken = await engine.forwardTokensAndSample(
      [nextToken],
      /*isPrefill=*/ false,
    );
    console.log(nextToken);
  }

  // `forwardTokensAndSample()` is made compatible with registering runtime stats.
  console.log(await engine.runtimeStatsText());
}

main();


================================================
FILE: examples/logit-processor/src/my_logit_processor.ts
================================================
import * as webllm from "@mlc-ai/web-llm";

// Define LogitProcessor
export class MyLogitProcessor implements webllm.LogitProcessor {
  private tokenSequence: Array<number> = [];

  processLogits(logits: Float32Array): Float32Array {
    logits[0] = 100.0; // should be enough so that we always sample token 0 below
    return logits;
  }

  processSampledToken(token: number): void {
    this.tokenSequence.push(token);
    console.log("processSampledToken: " + this.tokenSequence.length);
  }

  resetState(): void {
    this.tokenSequence = [];
    console.log("resetState");
  }
}


================================================
FILE: examples/logit-processor/src/worker.ts
================================================
// Serve the chat workload through web worker
import * as webllm from "@mlc-ai/web-llm";
import { MyLogitProcessor } from "./my_logit_processor";

console.log("Use web worker for logit processor");

const myLogitProcessor = new MyLogitProcessor();
const logitProcessorRegistry = new Map<string, webllm.LogitProcessor>();
logitProcessorRegistry.set("phi-2-q4f32_1-MLC", myLogitProcessor);

const handler = new webllm.WebWorkerMLCEngineHandler();
handler.setLogitProcessorRegistry(logitProcessorRegistry);
self.onmessage = (msg: MessageEvent) => {
  handler.onmessage(msg);
};


================================================
FILE: examples/multi-models/README.md
================================================
# WebLLM Get Started App

This folder provides a minimum demo to show WebLLM API in a webapp setting.
To try it out, you can do the following steps under this folder

```bash
npm install
npm start
```

Note if you would like to hack WebLLM core package.
You can change web-llm dependencies as `"file:../.."`, and follow the build from source
instruction in the project to build webllm locally. This option is only recommended
if you would like to hack WebLLM core package.


================================================
FILE: examples/multi-models/package.json
================================================
{
  "name": "get-started",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "start": "parcel src/multi_models.html  --port 8888",
    "build": "parcel build src/multi_models.html --dist-dir lib"
  },
  "devDependencies": {
    "buffer": "^5.7.1",
    "parcel": "^2.8.3",
    "process": "^0.11.10",
    "tslib": "^2.3.1",
    "typescript": "^4.9.5",
    "url": "^0.11.3"
  },
  "dependencies": {
    "@mlc-ai/web-llm": "^0.2.82"
  }
}

Download .txt

gitextract_o4yltvbb/

├── .github/
│   └── workflows/
│       ├── build-site.yaml
│       ├── build.yaml
│       ├── linter.yaml
│       ├── security.yaml
│       └── tests.yaml
├── .gitignore
├── .gitmodules
├── .husky/
│   └── pre-commit
├── .lintstagedrc.json
├── .nvmrc
├── .prettierignore
├── .prettierrc
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── SECURITY.md
├── cleanup-index-js.sh
├── docs/
│   ├── Makefile
│   ├── README.md
│   ├── conf.py
│   ├── developer/
│   │   ├── add_models.rst
│   │   └── building_from_source.rst
│   ├── index.rst
│   ├── make.bat
│   ├── requirements.txt
│   └── user/
│       ├── advanced_usage.rst
│       ├── api_reference.rst
│       ├── basic_usage.rst
│       └── get_started.rst
├── eslint.config.cjs
├── examples/
│   ├── .gitignore
│   ├── README.md
│   ├── abort-reload/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── get_started.html
│   │       └── get_started.js
│   ├── cache-usage/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── cache_usage.html
│   │       └── cache_usage.ts
│   ├── chrome-extension/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── content.js
│   │       ├── example.html
│   │       ├── manifest.json
│   │       ├── manifest_v2.json
│   │       ├── popup.css
│   │       ├── popup.html
│   │       └── popup.ts
│   ├── chrome-extension-webgpu-service-worker/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── background.ts
│   │       ├── content.js
│   │       ├── example.html
│   │       ├── manifest.json
│   │       ├── popup.css
│   │       ├── popup.html
│   │       └── popup.ts
│   ├── embeddings/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── embeddings.html
│   │       └── embeddings.ts
│   ├── function-calling/
│   │   ├── README.md
│   │   ├── function-calling-manual/
│   │   │   ├── README.md
│   │   │   ├── package.json
│   │   │   └── src/
│   │   │       ├── function_calling_manual.html
│   │   │       └── function_calling_manual.ts
│   │   └── function-calling-openai/
│   │       ├── README.md
│   │       ├── package.json
│   │       └── src/
│   │           ├── function_calling_openai.html
│   │           └── function_calling_openai.ts
│   ├── get-started/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── get_started.html
│   │       └── get_started.ts
│   ├── get-started-latency-breakdown/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── get_started_latency_breakdown.html
│   │       └── get_started_latency_breakdown.ts
│   ├── get-started-web-worker/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── get_started.html
│   │       ├── main.ts
│   │       └── worker.ts
│   ├── json-mode/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── json_mode.html
│   │       └── json_mode.ts
│   ├── json-schema/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── json_schema.html
│   │       └── json_schema.ts
│   ├── logit-processor/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── logit_processor.html
│   │       ├── logit_processor.ts
│   │       ├── my_logit_processor.ts
│   │       └── worker.ts
│   ├── multi-models/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── main.ts
│   │       ├── multi_models.html
│   │       └── worker.ts
│   ├── multi-round-chat/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── multi_round_chat.html
│   │       └── multi_round_chat.ts
│   ├── next-simple-chat/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── next.config.js
│   │   ├── package.json
│   │   ├── postcss.config.js
│   │   ├── src/
│   │   │   ├── pages/
│   │   │   │   ├── _app.tsx
│   │   │   │   ├── _document.tsx
│   │   │   │   ├── api/
│   │   │   │   │   └── hello.ts
│   │   │   │   └── index.tsx
│   │   │   ├── styles/
│   │   │   │   └── globals.css
│   │   │   └── utils/
│   │   │       ├── chat_component.tsx
│   │   │       └── chat_ui.ts
│   │   ├── tailwind.config.js
│   │   └── tsconfig.json
│   ├── qwen3/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── qwen3_example.html
│   │       └── qwen3_example.ts
│   ├── seed-to-reproduce/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── seed.html
│   │       └── seed.ts
│   ├── service-worker/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── index.html
│   │       ├── main.ts
│   │       └── sw.ts
│   ├── simple-chat-js/
│   │   ├── index.css
│   │   ├── index.html
│   │   └── index.js
│   ├── simple-chat-ts/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── gh-config.js
│   │       ├── llm_chat.css
│   │       ├── llm_chat.html
│   │       ├── simple_chat.ts
│   │       └── worker.ts
│   ├── simple-chat-upload/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── gh-config.js
│   │       ├── llm_chat.css
│   │       ├── llm_chat.html
│   │       ├── simple_chat.ts
│   │       └── worker.ts
│   ├── streaming/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── streaming.html
│   │       └── streaming.ts
│   ├── structural-tag-tool-use/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── mcp_structural_tag.html
│   │       └── mcp_structural_tag.ts
│   ├── text-completion/
│   │   ├── README.md
│   │   ├── package.json
│   │   └── src/
│   │       ├── text_completion.html
│   │       └── text_completion.ts
│   └── vision-model/
│       ├── README.md
│       ├── package.json
│       └── src/
│           ├── utils.ts
│           ├── vision_model.html
│           ├── vision_model.ts
│           └── worker.ts
├── jest.config.cjs
├── licenses/
│   └── license.openai_node.txt
├── package.json
├── rollup.config.js
├── scripts/
│   ├── gh_deploy_site.sh
│   ├── local_deploy_site.sh
│   ├── prep_deps.sh
│   └── serve_mlc_llm_dist.sh
├── site/
│   ├── .gitignore
│   ├── _config.yml
│   ├── _includes/
│   │   ├── head.html
│   │   └── hero.html
│   ├── assets/
│   │   ├── css/
│   │   │   └── hero.scss
│   │   └── video/
│   │       ├── Code.webm
│   │       └── Pittsburgh.webm
│   └── index.md
├── src/
│   ├── cache_util.ts
│   ├── config.ts
│   ├── conversation.ts
│   ├── embedding.ts
│   ├── engine.ts
│   ├── error.ts
│   ├── extension_service_worker.ts
│   ├── index.ts
│   ├── llm_chat.ts
│   ├── message.ts
│   ├── openai_api_protocols/
│   │   ├── chat_completion.ts
│   │   ├── completion.ts
│   │   ├── embedding.ts
│   │   └── index.ts
│   ├── service_worker.ts
│   ├── support.ts
│   ├── types.ts
│   ├── utils.ts
│   └── web_worker.ts
├── tests/
│   ├── .gitignore
│   ├── cache_util.test.ts
│   ├── constants.ts
│   ├── conversation.test.ts
│   ├── embedding_stats.test.ts
│   ├── engine_integration.test.ts
│   ├── extension_service_worker.test.ts
│   ├── function_calling.test.ts
│   ├── generation_config.test.ts
│   ├── llm_chat_pipeline.test.ts
│   ├── multi_round_chat.test.ts
│   ├── openai_chat_completion.test.ts
│   ├── openai_completion.test.ts
│   ├── openai_embeddings.test.ts
│   ├── scripts/
│   │   └── sanity_checks/
│   │       ├── README.md
│   │       ├── package.json
│   │       ├── sanity_checks.html
│   │       └── sanity_checks.ts
│   ├── service_worker.test.ts
│   ├── util.test.ts
│   └── web_worker_handler.test.ts
├── tsconfig.json
└── utils/
    ├── .gitignore
    └── vram_requirements/
        ├── .gitignore
        ├── README.md
        ├── package.json
        └── src/
            ├── gh-config.js
            ├── vram_requirements.html
            └── vram_requirements.ts

Download .txt

SYMBOL INDEX (629 symbols across 60 files)

FILE: examples/abort-reload/src/get_started.js
  function setLabel (line 6) | function setLabel(id, text) {
  function main (line 14) | async function main() {

FILE: examples/cache-usage/src/cache_usage.ts
  function setLabel (line 3) | function setLabel(id: string, text: string) {
  function main (line 15) | async function main() {

FILE: examples/chrome-extension-webgpu-service-worker/src/popup.ts
  function enableInputs (line 56) | function enableInputs() {
  function handleClick (line 87) | async function handleClick() {
  function updateAnswer (line 120) | function updateAnswer(answer: string) {
  function fetchPageContents (line 149) | function fetchPageContents() {

FILE: examples/chrome-extension/src/popup.ts
  function setLabel (line 18) | function setLabel(id: string, text: string) {
  function getElementAndCheck (line 25) | function getElementAndCheck(id: string): HTMLElement {
  function enableInputs (line 99) | function enableInputs() {
  function handleClick (line 145) | async function handleClick() {
  function handleSelectChange (line 194) | async function handleSelectChange() {
  function updateAnswer (line 260) | function updateAnswer(answer: string) {
  function fetchPageContents (line 289) | function fetchPageContents() {

FILE: examples/embeddings/src/embeddings.ts
  function setLabel (line 12) | function setLabel(id: string, text: string) {
  class WebLLMEmbeddings (line 25) | class WebLLMEmbeddings implements EmbeddingsInterface {
    method constructor (line 28) | constructor(engine: webllm.MLCEngineInterface, modelId: string) {
    method _embed (line 33) | async _embed(texts: string[]): Promise<number[][]> {
    method embedQuery (line 45) | async embedQuery(document: string): Promise<number[]> {
    method embedDocuments (line 49) | async embedDocuments(documents: string[]): Promise<number[][]> {
  function webllmAPI (line 72) | async function webllmAPI() {
  function langchainAPI (line 112) | async function langchainAPI() {
  function simpleRAG (line 160) | async function simpleRAG() {

FILE: examples/function-calling/function-calling-manual/src/function_calling_manual.ts
  function setLabel (line 5) | function setLabel(id: string, text: string) {
  function hermes2_example (line 18) | async function hermes2_example() {
  function llama3_1_example (line 86) | async function llama3_1_example() {

FILE: examples/function-calling/function-calling-openai/src/function_calling_openai.ts
  function setLabel (line 3) | function setLabel(id: string, text: string) {
  function main (line 11) | async function main() {

FILE: examples/get-started-latency-breakdown/src/get_started_latency_breakdown.ts
  function setLabel (line 3) | function setLabel(id: string, text: string) {
  type LatencyBreakdown (line 11) | type LatencyBreakdown = {
  function computeStats (line 19) | function computeStats(
  function main (line 43) | async function main() {

FILE: examples/get-started-web-worker/src/main.ts
  function setLabel (line 3) | function setLabel(id: string, text: string) {
  function mainNonStreaming (line 16) | async function mainNonStreaming() {
  function mainStreaming (line 55) | async function mainStreaming() {

FILE: examples/get-started/src/get_started.ts
  function setLabel (line 3) | function setLabel(id: string, text: string) {
  function main (line 11) | async function main() {

FILE: examples/json-mode/src/json_mode.ts
  function setLabel (line 3) | function setLabel(id: string, text: string) {
  function main (line 11) | async function main() {

FILE: examples/json-schema/src/json_schema.ts
  function setLabel (line 4) | function setLabel(id: string, text: string) {
  function simpleStructuredTextExample (line 12) | async function simpleStructuredTextExample() {
  function harryPotterExample (line 77) | async function harryPotterExample() {
  function functionCallingExample (line 150) | async function functionCallingExample() {
  function ebnfGrammarExample (line 228) | async function ebnfGrammarExample() {
  function main (line 281) | async function main() {

FILE: examples/logit-processor/src/logit_processor.ts
  constant USE_WEB_WORKER (line 4) | const USE_WEB_WORKER = true;
  constant AUTOREGRESS_LIMIT (line 5) | const AUTOREGRESS_LIMIT = 32;
  function setLabel (line 7) | function setLabel(id: string, text: string) {
  function main (line 15) | async function main() {

FILE: examples/logit-processor/src/my_logit_processor.ts
  class MyLogitProcessor (line 4) | class MyLogitProcessor implements webllm.LogitProcessor {
    method processLogits (line 7) | processLogits(logits: Float32Array): Float32Array {
    method processSampledToken (line 12) | processSampledToken(token: number): void {
    method resetState (line 17) | resetState(): void {

FILE: examples/multi-models/src/main.ts
  function setLabel (line 11) | function setLabel(id: string, text: string) {
  function sequentialGeneration (line 50) | async function sequentialGeneration() {
  function parallelGeneration (line 88) | async function parallelGeneration() {

FILE: examples/multi-round-chat/src/multi_round_chat.ts
  function setLabel (line 3) | function setLabel(id: string, text: string) {
  function main (line 16) | async function main() {

FILE: examples/next-simple-chat/src/pages/_app.tsx
  function App (line 4) | function App({ Component, pageProps }: AppProps) {

FILE: examples/next-simple-chat/src/pages/_document.tsx
  function Document (line 3) | function Document() {

FILE: examples/next-simple-chat/src/pages/api/hello.ts
  type Data (line 4) | type Data = {
  function handler (line 8) | function handler(

FILE: examples/next-simple-chat/src/pages/index.tsx
  function Home (line 7) | function Home() {

FILE: examples/next-simple-chat/src/utils/chat_ui.ts
  class ChatUI (line 7) | class ChatUI {
    method constructor (line 16) | constructor(engine: MLCEngineInterface) {
    method pushTask (line 24) | private pushTask(task: () => Promise<void>) {
    method onGenerate (line 33) | async onGenerate(
    method onReset (line 47) | async onReset(clearMessages: () => void) {
    method asyncInitChat (line 61) | async asyncInitChat(
    method unloadChat (line 87) | private async unloadChat() {
    method asyncGenerate (line 95) | private async asyncGenerate(

FILE: examples/qwen3/src/qwen3_example.ts
  function setLabel (line 3) | function setLabel(id: string, text: string) {
  function streamResponse (line 12) | async function streamResponse(
  function main (line 34) | async function main() {

FILE: examples/seed-to-reproduce/src/seed.ts
  function setLabel (line 3) | function setLabel(id: string, text: string) {
  function main (line 17) | async function main() {

FILE: examples/service-worker/src/main.ts
  function setLabel (line 23) | function setLabel(id: string, text: string) {
  function mainNonStreaming (line 36) | async function mainNonStreaming() {
  function mainStreaming (line 74) | async function mainStreaming() {

FILE: examples/simple-chat-js/index.js
  function updateEngineInitProgressCallback (line 17) | function updateEngineInitProgressCallback(report) {
  function initializeWebLLMEngine (line 26) | async function initializeWebLLMEngine() {
  function streamingGenerating (line 36) | async function streamingGenerating(messages, onUpdate, onFinish, onError) {
  function onMessageSend (line 63) | function onMessageSend() {
  function appendMessage (line 108) | function appendMessage(message) {
  function updateLastMessage (line 127) | function updateLastMessage(content) {

FILE: examples/simple-chat-ts/src/simple_chat.ts
  function getElementAndCheck (line 4) | function getElementAndCheck(id: string): HTMLElement {
  class ChatUI (line 12) | class ChatUI {
    method pushTask (line 131) | private pushTask(task: () => Promise<void>) {
    method onGenerate (line 140) | private async onGenerate() {
    method onSelectChange (line 149) | private async onSelectChange(modelSelector: HTMLSelectElement) {
    method onReset (line 164) | private async onReset() {
    method appendMessage (line 177) | private appendMessage(kind, text) {
    method appendUserMessage (line 196) | private appendUserMessage(text: string) {
    method updateLastMessage (line 215) | private updateLastMessage(kind, text) {
    method resetChatHistory (line 238) | private resetChatHistory() {
    method asyncInitChat (line 253) | private async asyncInitChat() {
    method unloadChat (line 275) | private async unloadChat() {
    method asyncGenerate (line 283) | private async asyncGenerate() {

FILE: examples/simple-chat-upload/src/simple_chat.ts
  function getElementAndCheck (line 4) | function getElementAndCheck(id: string): HTMLElement {
  class ChatUI (line 12) | class ChatUI {
    method pushTask (line 131) | private pushTask(task: () => Promise<void>) {
    method onGenerate (line 140) | private async onGenerate() {
    method onSelectChange (line 149) | private async onSelectChange(modelSelector: HTMLSelectElement) {
    method onReset (line 164) | private async onReset() {
    method appendMessage (line 177) | private appendMessage(kind, text) {
    method updateLastMessage (line 195) | private updateLastMessage(kind, text) {
    method resetChatHistory (line 218) | private resetChatHistory() {
    method asyncInitChat (line 233) | private async asyncInitChat() {
    method unloadChat (line 255) | private async unloadChat() {
    method asyncGenerate (line 263) | private async asyncGenerate() {
  function getFileType (line 332) | function getFileType(file: File) {
  function uploadToIndexedDB (line 348) | async function uploadToIndexedDB(file: File) {
  function cacheFile (line 382) | async function cacheFile(file: File, response: Response) {
  function uploadFiles (line 392) | async function uploadFiles(): Promise<void> {

FILE: examples/streaming/src/streaming.ts
  function setLabel (line 3) | function setLabel(id: string, text: string) {
  function main (line 14) | async function main() {

FILE: examples/structural-tag-tool-use/src/mcp_structural_tag.ts
  type ToolInvocation (line 3) | type ToolInvocation = {
  type ToolDefinition (line 8) | type ToolDefinition = {
  function setLabel (line 66) | function setLabel(id: string, text: string) {
  function appendLog (line 74) | function appendLog(text: string) {
  function parseToolCallBlocks (line 82) | function parseToolCallBlocks(
  function runTool (line 104) | async function runTool(call: ToolInvocation): Promise<Record<string, unk...
  function main (line 127) | async function main() {

FILE: examples/text-completion/src/text_completion.ts
  function setLabel (line 3) | function setLabel(id: string, text: string) {
  function main (line 11) | async function main() {

FILE: examples/vision-model/src/utils.ts
  function getImageDataFromURL (line 1) | function getImageDataFromURL(url: string): Promise<ImageData> {
  function imageURLToBase64 (line 21) | async function imageURLToBase64(url: string): Promise<string> {

FILE: examples/vision-model/src/vision_model.ts
  function setLabel (line 4) | function setLabel(id: string, text: string) {
  constant USE_WEB_WORKER (line 12) | const USE_WEB_WORKER = true;
  function main (line 20) | async function main() {

FILE: src/cache_util.ts
  function findModelRecord (line 12) | function findModelRecord(modelId: string, appConfig?: AppConfig): ModelR...
  function hasModelInCache (line 22) | async function hasModelInCache(
  function deleteModelAllInfoInCache (line 35) | async function deleteModelAllInfoInCache(
  function deleteModelInCache (line 51) | async function deleteModelInCache(
  function deleteChatConfigInCache (line 73) | async function deleteChatConfigInCache(
  function deleteModelWasmInCache (line 93) | async function deleteModelWasmInCache(
  function asyncLoadTokenizer (line 119) | async function asyncLoadTokenizer(

FILE: src/config.ts
  type ConvTemplateConfig (line 15) | interface ConvTemplateConfig {
  type Role (line 29) | enum Role {
  type MessagePlaceholders (line 45) | enum MessagePlaceholders {
  type TokenizerInfo (line 58) | interface TokenizerInfo {
  type ChatConfig (line 72) | interface ChatConfig {
  type ChatOptions (line 99) | interface ChatOptions extends Partial<ChatConfig> {}
  type MLCEngineConfig (line 111) | interface MLCEngineConfig {
  type GenerationConfig (line 126) | interface GenerationConfig {
  function postInitAndCheckGenerationConfigValues (line 148) | function postInitAndCheckGenerationConfigValues(
  type ModelType (line 232) | enum ModelType {
  type ModelRecord (line 255) | interface ModelRecord {
  type AppConfig (line 278) | interface AppConfig {

FILE: src/conversation.ts
  type ImageURL (line 27) | type ImageURL = ChatCompletionContentPartImage.ImageURL;
  class Conversation (line 32) | class Conversation {
    method constructor (line 61) | constructor(config: ConvTemplateConfig, isTextCompletion = false) {
    method getPromptArrayInternal (line 67) | private getPromptArrayInternal(
    method getPromptArray (line 236) | getPromptArray(): Array<string | Array<string | ImageURL>> {
    method getPromptArrayLastRound (line 251) | getPromptArrayLastRound() {
    method getPromptArrayTextCompletion (line 264) | getPromptArrayTextCompletion(): Array<string> {
    method reset (line 274) | reset() {
    method getStopStr (line 284) | getStopStr(): string[] {
    method getStopTokens (line 293) | getStopTokens() {
    method appendMessage (line 297) | appendMessage(
    method appendReplyHeader (line 318) | appendReplyHeader(role: Role) {
    method appendEmptyThinkingReplyHeader (line 328) | appendEmptyThinkingReplyHeader(role: Role, emptyThinkingBlockStr: stri...
    method finishReply (line 338) | finishReply(message: string) {
  function getConversation (line 358) | function getConversation(
  function compareConversationObject (line 378) | function compareConversationObject(
  function getConversationFromChatCompletionRequest (line 465) | function getConversationFromChatCompletionRequest(
  function getFunctionCallUsage (line 524) | function getFunctionCallUsage(request: ChatCompletionRequest): string {

FILE: src/embedding.ts
  class EmbeddingPipeline (line 13) | class EmbeddingPipeline {
    method constructor (line 33) | constructor(tvm: tvmjs.Instance, tokenizer: Tokenizer, config: ChatCon...
    method embedStep (line 95) | async embedStep(
    method dispose (line 252) | dispose() {
    method sync (line 263) | async sync(): Promise<void> {
    method asyncLoadWebGPUPipelines (line 268) | async asyncLoadWebGPUPipelines() {
    method getCurRoundEmbedTotalTime (line 277) | getCurRoundEmbedTotalTime(): number {
    method getCurRoundEmbedTotalTokens (line 284) | getCurRoundEmbedTotalTokens(): number {
    method getCurRoundEmbedTokensPerSec (line 291) | getCurRoundEmbedTokensPerSec(): number {

FILE: src/engine.ts
  function CreateMLCEngine (line 90) | async function CreateMLCEngine(
  class MLCEngine (line 106) | class MLCEngine implements MLCEngineInterface {
    method constructor (line 141) | constructor(engineConfig?: MLCEngineConfig) {
    method setAppConfig (line 163) | setAppConfig(appConfig: AppConfig) {
    method setInitProgressCallback (line 167) | setInitProgressCallback(initProgressCallback?: InitProgressCallback) {
    method getInitProgressCallback (line 171) | getInitProgressCallback() {
    method setLogitProcessorRegistry (line 175) | setLogitProcessorRegistry(
    method setLogLevel (line 186) | setLogLevel(logLevel: LogLevel) {
    method reload (line 194) | async reload(
    method reloadInternal (line 239) | private async reloadInternal(
    method unload (line 412) | async unload() {
    method _generate (line 437) | private async _generate(
    method asyncGenerate (line 480) | async *asyncGenerate(
    method interruptGenerate (line 751) | async interruptGenerate() {
    method chatCompletion (line 776) | async chatCompletion(
    method completion (line 964) | async completion(
    method embedding (line 1084) | async embedding(
    method getMaxStorageBufferBindingSize (line 1136) | async getMaxStorageBufferBindingSize(): Promise<number> {
    method getGPUVendor (line 1165) | async getGPUVendor(): Promise<string> {
    method getLLMStates (line 1179) | private getLLMStates(
    method getEmbeddingStates (line 1190) | private getEmbeddingStates(
    method getModelStates (line 1209) | private getModelStates(
    method forwardTokensAndSample (line 1273) | async forwardTokensAndSample(
    method getMessage (line 1290) | async getMessage(modelId?: string): Promise<string> {
    method runtimeStatsText (line 1295) | async runtimeStatsText(modelId?: string): Promise<string> {
    method resetChat (line 1306) | async resetChat(keepStats = false, modelId?: string) {
    method prefill (line 1346) | async prefill(
    method decode (line 1409) | async decode(pipeline: LLMChatPipeline, genConfig?: GenerationConfig) {

FILE: src/error.ts
  class ModelNotFoundError (line 1) | class ModelNotFoundError extends Error {
    method constructor (line 2) | constructor(modelId: string) {
  class ConfigValueError (line 10) | class ConfigValueError extends Error {
    method constructor (line 11) | constructor(message: string) {
  class MinValueError (line 17) | class MinValueError extends ConfigValueError {
    method constructor (line 18) | constructor(paramName: string, minValue: number) {
  class RangeError (line 24) | class RangeError extends ConfigValueError {
    method constructor (line 25) | constructor(
  class NonNegativeError (line 38) | class NonNegativeError extends ConfigValueError {
    method constructor (line 39) | constructor(paramName: string) {
  class InvalidNumberStringError (line 45) | class InvalidNumberStringError extends ConfigValueError {
    method constructor (line 46) | constructor(paramName: string, actualValue?: string) {
  class DependencyError (line 54) | class DependencyError extends ConfigValueError {
    method constructor (line 55) | constructor(
  class WebGPUNotAvailableError (line 67) | class WebGPUNotAvailableError extends Error {
    method constructor (line 68) | constructor() {
  class WebGPUNotFoundError (line 79) | class WebGPUNotFoundError extends Error {
    method constructor (line 80) | constructor() {
  class ModelNotLoadedError (line 86) | class ModelNotLoadedError extends Error {
    method constructor (line 87) | constructor(requestName: string) {
  class WorkerEngineModelNotLoadedError (line 97) | class WorkerEngineModelNotLoadedError extends Error {
    method constructor (line 98) | constructor(engineName: string) {
  class MessageOrderError (line 106) | class MessageOrderError extends Error {
    method constructor (line 107) | constructor(message: string) {
  class SystemMessageOrderError (line 113) | class SystemMessageOrderError extends Error {
    method constructor (line 114) | constructor() {
  class ContentTypeError (line 120) | class ContentTypeError extends Error {
    method constructor (line 121) | constructor(name: string) {
  class UnsupportedRoleError (line 127) | class UnsupportedRoleError extends Error {
    method constructor (line 128) | constructor(role: string) {
  class UserMessageContentErrorForNonVLM (line 134) | class UserMessageContentErrorForNonVLM extends Error {
    method constructor (line 135) | constructor(modelId: string, modelType: string, content: any) {
  class PrefillChunkSizeSmallerThanImageError (line 145) | class PrefillChunkSizeSmallerThanImageError extends Error {
    method constructor (line 146) | constructor(prefillChunkSize: number, imageEmbedSize: number) {
  class CannotFindImageEmbedError (line 156) | class CannotFindImageEmbedError extends Error {
    method constructor (line 157) | constructor() {
  class UnsupportedDetailError (line 166) | class UnsupportedDetailError extends Error {
    method constructor (line 167) | constructor(detail: string) {
  class UnsupportedImageURLError (line 175) | class UnsupportedImageURLError extends Error {
    method constructor (line 176) | constructor(url: string) {
  class MultipleTextContentError (line 184) | class MultipleTextContentError extends Error {
    method constructor (line 185) | constructor() {
  class ToolCallOutputParseError (line 193) | class ToolCallOutputParseError extends Error {
    method constructor (line 194) | constructor(outputMessage: string, error: Error) {
  class ToolCallOutputInvalidTypeError (line 203) | class ToolCallOutputInvalidTypeError extends Error {
    method constructor (line 204) | constructor(expectedType: string) {
  class ToolCallOutputMissingFieldsError (line 212) | class ToolCallOutputMissingFieldsError extends Error {
    method constructor (line 213) | constructor(missingFields: string[], object: any) {
  class ConfigurationNotInitializedError (line 221) | class ConfigurationNotInitializedError extends Error {
    method constructor (line 222) | constructor() {
  class MissingModelWasmError (line 230) | class MissingModelWasmError extends Error {
    method constructor (line 231) | constructor(modelId: string) {
  class FeatureSupportError (line 239) | class FeatureSupportError extends Error {
    method constructor (line 240) | constructor(feature: string) {
  class UnsupportedFieldsError (line 248) | class UnsupportedFieldsError extends Error {
    method constructor (line 249) | constructor(unsupportedFields: string[], targetClass: string) {
  class ShaderF16SupportError (line 258) | class ShaderF16SupportError extends FeatureSupportError {
    method constructor (line 259) | constructor() {
  class DeviceLostError (line 267) | class DeviceLostError extends Error {
    method constructor (line 268) | constructor() {
  class InvalidToolChoiceError (line 276) | class InvalidToolChoiceError extends Error {
    method constructor (line 277) | constructor(toolChoice: string) {
  class UnsupportedToolChoiceTypeError (line 285) | class UnsupportedToolChoiceTypeError extends Error {
    method constructor (line 286) | constructor() {
  class FunctionNotFoundError (line 294) | class FunctionNotFoundError extends Error {
    method constructor (line 295) | constructor(functionName: string) {
  class UnsupportedToolTypeError (line 303) | class UnsupportedToolTypeError extends Error {
    method constructor (line 304) | constructor() {
  class EngineNotLoadedError (line 309) | class EngineNotLoadedError extends Error {
    method constructor (line 310) | constructor() {
  class UnsupportedTokenizerFilesError (line 317) | class UnsupportedTokenizerFilesError extends Error {
    method constructor (line 318) | constructor(files: string[]) {
  class WindowSizeConfigurationError (line 324) | class WindowSizeConfigurationError extends Error {
    method constructor (line 325) | constructor(contextWindowSize: number, slidingWindowSize: number) {
  class AttentionSinkSizeError (line 335) | class AttentionSinkSizeError extends Error {
    method constructor (line 336) | constructor() {
  class WindowSizeSpecificationError (line 346) | class WindowSizeSpecificationError extends Error {
    method constructor (line 347) | constructor() {
  class ContextWindowSizeExceededError (line 356) | class ContextWindowSizeExceededError extends Error {
    method constructor (line 357) | constructor(numPromptTokens: number, contextWindowSize: number) {
  class NonWorkerEnvironmentError (line 367) | class NonWorkerEnvironmentError extends Error {
    method constructor (line 368) | constructor(className: string) {
  class NoServiceWorkerAPIError (line 374) | class NoServiceWorkerAPIError extends Error {
    method constructor (line 375) | constructor() {
  class ServiceWorkerInitializationError (line 384) | class ServiceWorkerInitializationError extends Error {
    method constructor (line 385) | constructor() {
  class StreamingCountError (line 394) | class StreamingCountError extends Error {
    method constructor (line 395) | constructor() {
  class SeedTypeError (line 401) | class SeedTypeError extends Error {
    method constructor (line 402) | constructor(seed: any) {
  class InvalidResponseFormatError (line 407) | class InvalidResponseFormatError extends Error {
    method constructor (line 408) | constructor() {
  class InvalidResponseFormatGrammarError (line 414) | class InvalidResponseFormatGrammarError extends Error {
    method constructor (line 415) | constructor() {
  class InvalidResponseFormatStructuralTagError (line 424) | class InvalidResponseFormatStructuralTagError extends Error {
    method constructor (line 425) | constructor() {
  class CustomResponseFormatError (line 434) | class CustomResponseFormatError extends Error {
    method constructor (line 435) | constructor(currentFormat: any) {
  class UnsupportedModelIdError (line 445) | class UnsupportedModelIdError extends Error {
    method constructor (line 446) | constructor(currentModelId: string, supportedModelIds: string[]) {
  class CustomSystemPromptError (line 454) | class CustomSystemPromptError extends Error {
    method constructor (line 455) | constructor() {
  class InvalidStreamOptionsError (line 463) | class InvalidStreamOptionsError extends Error {
    method constructor (line 464) | constructor() {
  class UnknownMessageKindError (line 469) | class UnknownMessageKindError extends Error {
    method constructor (line 470) | constructor(msgKind: string, msgContent: any) {
  class TextCompletionExpectsKVEmptyError (line 476) | class TextCompletionExpectsKVEmptyError extends Error {
    method constructor (line 477) | constructor() {
  class TextCompletionConversationExpectsPrompt (line 483) | class TextCompletionConversationExpectsPrompt extends Error {
    method constructor (line 484) | constructor() {
  class TextCompletionConversationError (line 492) | class TextCompletionConversationError extends Error {
    method constructor (line 493) | constructor(funcName: string) {
  class EmbeddingUnsupportedEncodingFormatError (line 499) | class EmbeddingUnsupportedEncodingFormatError extends Error {
    method constructor (line 500) | constructor() {
  class EmbeddingUnsupportedModelError (line 506) | class EmbeddingUnsupportedModelError extends Error {
    method constructor (line 507) | constructor(currentModel: string) {
  class EmbeddingSlidingWindowError (line 517) | class EmbeddingSlidingWindowError extends Error {
    method constructor (line 518) | constructor(sliding_window_size: number) {
  class EmbeddingChunkingUnsupportedError (line 527) | class EmbeddingChunkingUnsupportedError extends Error {
    method constructor (line 528) | constructor(contextWindowSize: number, prefillChunkSize: number) {
  class EmbeddingExceedContextWindowSizeError (line 538) | class EmbeddingExceedContextWindowSizeError extends Error {
    method constructor (line 539) | constructor(contextWindowSize: number, receivedSize: number) {
  class EmbeddingInputEmptyError (line 548) | class EmbeddingInputEmptyError extends Error {
    method constructor (line 549) | constructor() {
  class ReloadArgumentSizeUnmatchedError (line 555) | class ReloadArgumentSizeUnmatchedError extends Error {
    method constructor (line 556) | constructor(numModelId: number, numChatOpts: number) {
  class UnclearModelToUseError (line 565) | class UnclearModelToUseError extends Error {
    method constructor (line 566) | constructor(loadedModels: string[], requestName: string) {
  class SpecifiedModelNotFoundError (line 575) | class SpecifiedModelNotFoundError extends Error {
    method constructor (line 576) | constructor(
  class IncorrectPipelineLoadedError (line 590) | class IncorrectPipelineLoadedError extends Error {
    method constructor (line 591) | constructor(
  class ReloadModelIdNotUniqueError (line 604) | class ReloadModelIdNotUniqueError extends Error {
    method constructor (line 605) | constructor(modelId: string[]) {

FILE: src/extension_service_worker.ts
  type ExtensionMLCEngineConfig (line 13) | interface ExtensionMLCEngineConfig extends MLCEngineConfig {
  class ServiceWorkerMLCEngineHandler (line 34) | class ServiceWorkerMLCEngineHandler extends WebWorkerMLCEngineHandler {
    method constructor (line 37) | constructor(port: chrome.runtime.Port) {
    method postMessage (line 43) | postMessage(msg: any) {
    method setPort (line 47) | setPort(port: chrome.runtime.Port) {
    method onPortDisconnect (line 52) | onPortDisconnect(port: chrome.runtime.Port) {
    method onmessage (line 58) | onmessage(event: any): void {
  function CreateServiceWorkerMLCEngine (line 118) | async function CreateServiceWorkerMLCEngine(
  class PortAdapter (line 132) | class PortAdapter implements ChatWorker {
    method constructor (line 136) | constructor(port: chrome.runtime.Port) {
    method handleMessage (line 142) | private handleMessage(message: any) {
    method onmessage (line 149) | get onmessage(): (message: any) => void {
    method onmessage (line 153) | set onmessage(listener: (message: any) => void) {
  class ServiceWorkerMLCEngine (line 166) | class ServiceWorkerMLCEngine extends WebWorkerMLCEngine {
    method constructor (line 170) | constructor(engineConfig?: ExtensionMLCEngineConfig, keepAliveMs = 100...

FILE: src/llm_chat.ts
  type ImageURL (line 36) | type ImageURL = ChatCompletionContentPartImage.ImageURL;
  class LLMChatPipeline (line 38) | class LLMChatPipeline {
    method constructor (line 150) | constructor(
    method dispose (line 345) | dispose() {
    method getMessage (line 366) | getMessage() {
    method resetRuntimeStats (line 373) | resetRuntimeStats() {
    method resetChat (line 383) | resetChat(keepStats = false) {
    method resetKVCache (line 398) | resetKVCache() {
    method stopped (line 414) | stopped(): boolean {
    method getFinishReason (line 421) | getFinishReason(): ChatCompletionFinishReason | undefined {
    method getTokenLogprobArray (line 429) | getTokenLogprobArray(): Array<ChatCompletionTokenLogprob> {
    method getCurRoundDecodingTotalTokens (line 436) | getCurRoundDecodingTotalTokens(): number {
    method getCurRoundPrefillTotalTokens (line 443) | getCurRoundPrefillTotalTokens(): number {
    method getCurRoundDecodingTotalTime (line 450) | getCurRoundDecodingTotalTime(): number {
    method getCurRoundPrefillTotalTime (line 457) | getCurRoundPrefillTotalTime(): number {
    method getCurRoundGrammarInitTotalTime (line 464) | getCurRoundGrammarInitTotalTime(): number {
    method getCurRoundGrammarPerTokenTotalTime (line 472) | getCurRoundGrammarPerTokenTotalTime(): number {
    method getCurRoundLatencyBreakdown (line 479) | getCurRoundLatencyBreakdown(): LatencyBreakdown {
    method runtimeStatsText (line 486) | runtimeStatsText(): string {
    method curRoundRuntimeStatsText (line 496) | curRoundRuntimeStatsText(): string {
    method getCurRoundPrefillTokensPerSec (line 506) | getCurRoundPrefillTokensPerSec(): number {
    method getCurRoundDecodingTokensPerSec (line 513) | getCurRoundDecodingTokensPerSec(): number {
    method setSeed (line 520) | setSeed(seed: number): void {
    method getResponseFormatKey (line 524) | private getResponseFormatKey(
    method getConversationObject (line 552) | getConversationObject(): Conversation {
    method setConversation (line 559) | setConversation(newConv: Conversation) {
    method asyncLoadWebGPUPipelines (line 565) | async asyncLoadWebGPUPipelines() {
    method prefillStep (line 572) | async prefillStep(
    method decodeStep (line 763) | async decodeStep(genConfig?: GenerationConfig): Promise<void> {
    method triggerStop (line 802) | triggerStop() {
    method processNextToken (line 819) | private processNextToken(
    method getTokensEmbeddings (line 921) | private getTokensEmbeddings(inputTokens: number[]): tvmjs.Tensor {
    method calculateResizeShape (line 946) | private calculateResizeShape(
    method calculateCropShape (line 966) | private calculateCropShape(
    method getImageEmbeddings (line 982) | private async getImageEmbeddings(
    method embedAndForward (line 1042) | private async embedAndForward(
    method updateLogitsOnCPU (line 1101) | private updateLogitsOnCPU(logits: tvmjs.Tensor): tvmjs.Tensor {
    method sampleTokenFromLogits (line 1115) | private async sampleTokenFromLogits(
    method getInputData (line 1522) | private getInputData(): [Array<Array<number> | ImageURL>, number] {
    method forwardTokensAndSample (line 1593) | async forwardTokensAndSample(
    method getTokenLogprob (line 1653) | private getTokenLogprob(
    method sync (line 1694) | async sync(): Promise<void> {
    method evaluate (line 1699) | async evaluate() {

FILE: src/message.ts
  type RequestKind (line 18) | type RequestKind =
  type ResponseKind (line 40) | type ResponseKind = "return" | "throw" | "initProgressCallback";
  type ReloadParams (line 42) | interface ReloadParams {
  type ResetChatParams (line 46) | interface ResetChatParams {
  type GetMessageParams (line 50) | interface GetMessageParams {
  type RuntimeStatsTextParams (line 53) | interface RuntimeStatsTextParams {
  type ForwardTokensAndSampleParams (line 56) | interface ForwardTokensAndSampleParams {
  type ChatCompletionNonStreamingParams (line 73) | interface ChatCompletionNonStreamingParams {
  type ChatCompletionStreamInitParams (line 78) | interface ChatCompletionStreamInitParams {
  type CompletionNonStreamingParams (line 84) | interface CompletionNonStreamingParams {
  type CompletionStreamInitParams (line 89) | interface CompletionStreamInitParams {
  type EmbeddingParams (line 95) | interface EmbeddingParams {
  type CompletionStreamNextChunkParams (line 100) | interface CompletionStreamNextChunkParams {
  type CustomRequestParams (line 104) | interface CustomRequestParams {
  type MessageContent (line 108) | type MessageContent =
  type WorkerRequest (line 137) | type WorkerRequest = {
  type HeartbeatWorkerResponse (line 143) | type HeartbeatWorkerResponse = {
  type OneTimeWorkerResponse (line 148) | type OneTimeWorkerResponse = {
  type InitProgressWorkerResponse (line 154) | type InitProgressWorkerResponse = {
  type WorkerResponse (line 160) | type WorkerResponse =

FILE: src/openai_api_protocols/chat_completion.ts
  class Chat (line 50) | class Chat {
    method constructor (line 54) | constructor(engine: MLCEngineInterface) {
  class Completions (line 60) | class Completions {
    method constructor (line 63) | constructor(engine: MLCEngineInterface) {
    method create (line 74) | create(
  type ChatCompletionRequestBase (line 91) | interface ChatCompletionRequestBase {
  type ChatCompletionRequestNonStreaming (line 289) | interface ChatCompletionRequestNonStreaming
  type ChatCompletionRequestStreaming (line 297) | interface ChatCompletionRequestStreaming
  type ChatCompletionRequest (line 305) | type ChatCompletionRequest =
  type ChatCompletion (line 312) | interface ChatCompletion {
  type ChatCompletionChunk (line 362) | interface ChatCompletionChunk {
  function postInitAndCheckFields (line 418) | function postInitAndCheckFields(
  type ChatCompletionContentPart (line 609) | type ChatCompletionContentPart =
  type ChatCompletionContentPartText (line 613) | interface ChatCompletionContentPartText {
  type ImageURL (line 626) | interface ImageURL {
  type ChatCompletionContentPartImage (line 639) | interface ChatCompletionContentPartImage {
  type ChatCompletionMessageToolCall (line 649) | interface ChatCompletionMessageToolCall {
  type Function (line 671) | interface Function {
  type ChatCompletionRole (line 690) | type ChatCompletionRole =
  type ChatCompletionStreamOptions (line 700) | interface ChatCompletionStreamOptions {
  type ChatCompletionSystemMessageParam (line 710) | interface ChatCompletionSystemMessageParam {
  type ChatCompletionUserMessageParam (line 722) | interface ChatCompletionUserMessageParam {
  type ChatCompletionAssistantMessageParam (line 742) | interface ChatCompletionAssistantMessageParam {
  type ChatCompletionToolMessageParam (line 767) | interface ChatCompletionToolMessageParam {
  type ChatCompletionMessageParam (line 784) | type ChatCompletionMessageParam =
  type FunctionParameters (line 801) | type FunctionParameters = Record<string, unknown>;
  type FunctionDefinition (line 803) | interface FunctionDefinition {
  type ChatCompletionTool (line 828) | interface ChatCompletionTool {
  type ChatCompletionNamedToolChoice (line 841) | interface ChatCompletionNamedToolChoice {
  type Function (line 851) | interface Function {
  type ChatCompletionToolChoiceOption (line 870) | type ChatCompletionToolChoiceOption =
  type TopLogprob (line 878) | interface TopLogprob {
  type ChatCompletionTokenLogprob (line 901) | interface ChatCompletionTokenLogprob {
  type ChatCompletionMessage (line 935) | interface ChatCompletionMessage {
  type CompletionUsage (line 955) | interface CompletionUsage {
  type ChatCompletionFinishReason (line 1032) | type ChatCompletionFinishReason =
  type Choice (line 1039) | interface Choice {
  type Logprobs (line 1068) | interface Logprobs {
  type Choice (line 1078) | interface Choice {
  type Delta (line 1107) | interface Delta {
  type ToolCall (line 1122) | interface ToolCall {
  type Function (line 1142) | interface Function {
  type Logprobs (line 1162) | interface Logprobs {
  type ResponseFormat (line 1194) | interface ResponseFormat {

FILE: src/openai_api_protocols/completion.ts
  class Completions (line 32) | class Completions {
    method constructor (line 35) | constructor(engine: MLCEngineInterface) {
    method create (line 46) | create(
  type CompletionCreateParamsBase (line 62) | interface CompletionCreateParamsBase {
  type CompletionCreateParams (line 248) | type CompletionCreateParams =
  type CompletionCreateParamsNonStreaming (line 252) | interface CompletionCreateParamsNonStreaming
  type CompletionCreateParamsStreaming (line 260) | interface CompletionCreateParamsStreaming
  type Completion (line 272) | interface Completion {
  type CompletionChoice (line 314) | interface CompletionChoice {
  function postInitAndCheckFields (line 347) | function postInitAndCheckFields(

FILE: src/openai_api_protocols/embedding.ts
  class Embeddings (line 25) | class Embeddings {
    method constructor (line 28) | constructor(engine: MLCEngineInterface) {
    method create (line 35) | create(request: EmbeddingCreateParams): Promise<CreateEmbeddingRespons...
  type CreateEmbeddingResponse (line 40) | interface CreateEmbeddingResponse {
  type Usage (line 67) | interface Usage {
  type Embedding (line 93) | interface Embedding {
  type EmbeddingCreateParams (line 111) | interface EmbeddingCreateParams {
  function postInitAndCheckFields (line 159) | function postInitAndCheckFields(

FILE: src/service_worker.ts
  type IServiceWorker (line 20) | type IServiceWorker = globalThis.ServiceWorker;
  class ServiceWorkerMLCEngineHandler (line 38) | class ServiceWorkerMLCEngineHandler extends WebWorkerMLCEngineHandler {
    method constructor (line 45) | constructor() {
    method postMessage (line 74) | postMessage(message: WorkerResponse) {
    method onmessage (line 87) | onmessage(
  class ServiceWorker (line 152) | class ServiceWorker implements ChatWorker {
    method onmessage (line 155) | get onmessage() {
    method onmessage (line 159) | set onmessage(handler: (event: any) => void) {
    method postMessage (line 168) | postMessage(message: WorkerRequest) {
  function CreateServiceWorkerMLCEngine (line 192) | async function CreateServiceWorkerMLCEngine(
  class ServiceWorkerMLCEngine (line 218) | class ServiceWorkerMLCEngine extends WebWorkerMLCEngine {
    method constructor (line 221) | constructor(engineConfig?: MLCEngineConfig, keepAliveMs = 10000) {
    method onmessage (line 235) | onmessage(event: any): void {

FILE: src/support.ts
  function getTopProbs (line 31) | function getTopProbs(
  function getTokenTableFromTokenizer (line 76) | function getTokenTableFromTokenizer(tokenizer: Tokenizer): string[] {
  function cleanModelUrl (line 90) | function cleanModelUrl(modelUrl: string): string {
  function getToolCallFromOutputMessage (line 137) | function getToolCallFromOutputMessage(
  function findModelRecord (line 206) | function findModelRecord(
  function getModelIdToUse (line 225) | function getModelIdToUse(
  function getChunkedPrefillInputData (line 283) | function getChunkedPrefillInputData(
  type Cont (line 359) | type Cont = () => void;
  class CustomLock (line 368) | class CustomLock {
    method acquire (line 372) | public async acquire(): Promise<void> {
    method release (line 385) | public async release(): Promise<void> {
  type ImageURL (line 406) | type ImageURL = ChatCompletionContentPartImage.ImageURL;
  constant IMAGE_EMBED_SIZE (line 409) | const IMAGE_EMBED_SIZE = 1921;
  function getImageDataFromURL (line 414) | async function getImageDataFromURL(url: string): Promise<ImageData> {
  function getRGBArrayFromImageData (line 433) | function getRGBArrayFromImageData(

FILE: src/types.ts
  type InitProgressReport (line 22) | interface InitProgressReport {
  type InitProgressCallback (line 31) | type InitProgressCallback = (report: InitProgressReport) => void;
  type LogitProcessor (line 38) | interface LogitProcessor {
  type MLCEngineInterface (line 62) | interface MLCEngineInterface {
  constant LOG_LEVELS (line 245) | const LOG_LEVELS = {
  type LogLevel (line 253) | type LogLevel = keyof typeof LOG_LEVELS;
  type LatencyBreakdown (line 255) | type LatencyBreakdown = {

FILE: src/utils.ts
  function areArraysEqual (line 4) | function areArraysEqual(arr1?: Array<any>, arr2?: Array<any>): boolean {
  function areObjectsEqual (line 15) | function areObjectsEqual(obj1: any, obj2: any): boolean {
  function areModelRecordsEqual (line 32) | function areModelRecordsEqual(
  function areAppConfigsEqual (line 71) | function areAppConfigsEqual(
  function areChatOptionsEqual (line 100) | function areChatOptionsEqual(
  function areChatOptionsListEqual (line 124) | function areChatOptionsListEqual(

FILE: src/web_worker.ts
  class WebWorkerMLCEngineHandler (line 61) | class WebWorkerMLCEngineHandler {
    method constructor (line 84) | constructor() {
    method postMessage (line 100) | postMessage(msg: any) {
    method setLogitProcessorRegistry (line 105) | setLogitProcessorRegistry(
    method handleTask (line 111) | async handleTask<T extends MessageContent>(
    method onmessage (line 134) | onmessage(
    method reloadIfUnmatched (line 364) | async reloadIfUnmatched(
  type ChatWorker (line 380) | interface ChatWorker {
  function CreateWebWorkerMLCEngine (line 401) | async function CreateWebWorkerMLCEngine(
  class WebWorkerMLCEngine (line 422) | class WebWorkerMLCEngine implements MLCEngineInterface {
    method constructor (line 443) | constructor(worker: ChatWorker, engineConfig?: MLCEngineConfig) {
    method setInitProgressCallback (line 469) | setInitProgressCallback(initProgressCallback?: InitProgressCallback) {
    method getInitProgressCallback (line 473) | getInitProgressCallback(): InitProgressCallback | undefined {
    method setAppConfig (line 477) | setAppConfig(appConfig: AppConfig) {
    method setLogLevel (line 486) | setLogLevel(logLevel: LogLevel) {
    method getPromise (line 496) | protected getPromise<T extends MessageContent>(
    method reload (line 522) | async reload(
    method getMaxStorageBufferBindingSize (line 547) | async getMaxStorageBufferBindingSize(): Promise<number> {
    method getGPUVendor (line 556) | async getGPUVendor(): Promise<string> {
    method getMessage (line 565) | async getMessage(modelId?: string): Promise<string> {
    method runtimeStatsText (line 576) | async runtimeStatsText(modelId?: string): Promise<string> {
    method interruptGenerate (line 587) | interruptGenerate(): void {
    method unload (line 596) | async unload(): Promise<void> {
    method resetChat (line 607) | async resetChat(keepStats = false, modelId?: string): Promise<void> {
    method forwardTokensAndSample (line 619) | async forwardTokensAndSample(
    method asyncGenerate (line 647) | async *asyncGenerate(
    method chatCompletion (line 677) | async chatCompletion(
    method completion (line 736) | async completion(
    method embedding (line 786) | async embedding(
    method onmessage (line 804) | onmessage(event: any) {

FILE: tests/cache_util.test.ts
  class BaseCache (line 20) | class BaseCache {
    method constructor (line 21) | constructor(private name: string) {}
    method deleteInCache (line 22) | async deleteInCache(url: string) {
    method fetchWithCache (line 25) | async fetchWithCache(url: string, format: string) {

FILE: tests/conversation.test.ts
  type ImageURL (line 89) | type ImageURL = ChatCompletionContentPartImage.ImageURL;

FILE: tests/embedding_stats.test.ts
  type EmbeddingLike (line 8) | type EmbeddingLike = EmbeddingPipeline & Record<string, any>;
  function createEmbeddingPipelineBase (line 39) | function createEmbeddingPipelineBase(): EmbeddingLike {
  function createNDArray (line 80) | function createNDArray() {

FILE: tests/engine_integration.test.ts
  type ChatConfig (line 20) | type ChatConfig = import("../src/config").ChatConfig;
  type Conversation (line 21) | type Conversation = import("../src/conversation").Conversation;
  type TVMInstance (line 22) | type TVMInstance = import("@mlc-ai/web-runtime").Instance;
  type Tokenizer (line 23) | type Tokenizer = import("@mlc-ai/web-tokenizers").Tokenizer;
  class MockLLMChatPipeline (line 30) | class MockLLMChatPipeline {
    method constructor (line 55) | constructor(_tvm: TVMInstance, _tokenizer: Tokenizer, config: ChatConf...
    method asyncLoadWebGPUPipelines (line 62) | async asyncLoadWebGPUPipelines() {}
    method dispose (line 63) | dispose() {}
    method sync (line 64) | async sync() {}
    method getConversationObject (line 66) | getConversationObject() {
    method setConversation (line 70) | setConversation(newConv: Conversation) {
    method resetChat (line 74) | resetChat() {
    method prefillStep (line 80) | async prefillStep(
    method decodeStep (line 98) | async decodeStep(genConfig?: { max_tokens?: number | null }) {
    method stopped (line 117) | stopped() {
    method triggerStop (line 121) | triggerStop() {
    method getMessage (line 126) | getMessage() {
    method getFinishReason (line 130) | getFinishReason() {
    method getCurRoundDecodingTotalTokens (line 134) | getCurRoundDecodingTotalTokens() {
    method getCurRoundPrefillTotalTokens (line 138) | getCurRoundPrefillTotalTokens() {
    method getCurRoundPrefillTokensPerSec (line 142) | getCurRoundPrefillTokensPerSec() {
    method getCurRoundDecodingTokensPerSec (line 146) | getCurRoundDecodingTokensPerSec() {
    method getCurRoundGrammarInitTotalTime (line 150) | getCurRoundGrammarInitTotalTime() {
    method getCurRoundPrefillTotalTime (line 154) | getCurRoundPrefillTotalTime() {
    method getCurRoundDecodingTotalTime (line 158) | getCurRoundDecodingTotalTime() {
    method getCurRoundGrammarPerTokenTotalTime (line 162) | getCurRoundGrammarPerTokenTotalTime() {
    method getCurRoundLatencyBreakdown (line 166) | getCurRoundLatencyBreakdown() {
    method getTokenLogprobArray (line 177) | getTokenLogprobArray() {
    method forwardTokensAndSample (line 181) | async forwardTokensAndSample(inputIds: Array<number>): Promise<number> {
    method runtimeStatsText (line 185) | async runtimeStatsText() {
  class MockEmbeddingPipeline (line 194) | class MockEmbeddingPipeline {
    method dispose (line 197) | dispose() {}
    method sync (line 198) | async sync() {}
    method embedStep (line 199) | async embedStep(
    method getCurRoundEmbedTotalTokens (line 205) | getCurRoundEmbedTotalTokens(): number {
    method getCurRoundEmbedTokensPerSec (line 213) | getCurRoundEmbedTokensPerSec(): number {
  constant MODEL_ID (line 221) | const MODEL_ID = "mock-model";
  constant SECOND_MODEL_ID (line 222) | const SECOND_MODEL_ID = "mock-model-2";
  constant EMBED_MODEL_ID (line 223) | const EMBED_MODEL_ID = "mock-embed";
  function createEngineWithPipeline (line 259) | function createEngineWithPipeline(decodeLimit = 2, modelId = MODEL_ID) {
  function createEngineWithMultiplePipelines (line 286) | function createEngineWithMultiplePipelines() {
  function createEngineWithEmbeddingPipeline (line 330) | function createEngineWithEmbeddingPipeline() {

FILE: tests/extension_service_worker.test.ts
  type MockPort (line 35) | type MockPort = chrome.runtime.Port & {
  function createPort (line 40) | function createPort(): MockPort {
  function createHandler (line 56) | function createHandler() {
  function mockChromeRuntime (line 96) | function mockChromeRuntime(port: MockPort = createPort()) {

FILE: tests/llm_chat_pipeline.test.ts
  type XGrammarMock (line 42) | type XGrammarMock = {
  type PipelineLike (line 69) | type PipelineLike = LLMChatPipeline & Record<string, any>;
  function createPipeline (line 71) | function createPipeline(): PipelineLike {
  function preparePrefillPipeline (line 200) | function preparePrefillPipeline(): PipelineLike {

FILE: tests/scripts/sanity_checks/sanity_checks.ts
  function setLabel (line 3) | function setLabel(id: string, text: string) {
  function createEngine (line 9) | async function createEngine(
  function deleteModel (line 21) | async function deleteModel(modelId: string, appConfig: webllm.AppConfig) {
  function testLogitProcessor (line 25) | async function testLogitProcessor(
  function testLogitBias (line 71) | async function testLogitBias(modelId: string, appConfig: webllm.AppConfi...
  function testPenalties (line 102) | async function testPenalties(modelId: string, appConfig: webllm.AppConfi...
  function testLogprobs (line 127) | async function testLogprobs(modelId: string, appConfig: webllm.AppConfig) {
  function main (line 157) | async function main() {

FILE: tests/service_worker.test.ts
  type ServiceWorkerHandlerEvent (line 9) | type ServiceWorkerHandlerEvent = Parameters<
  function setupWorkerScope (line 42) | function setupWorkerScope() {
  function setupNavigator (line 49) | function setupNavigator(options?: {
  function createHandler (line 70) | function createHandler() {

FILE: tests/util.test.ts
  type ImageURL (line 328) | type ImageURL = ChatCompletionContentPartImage.ImageURL;
  function addOne (line 416) | async function addOne() {

FILE: tests/web_worker_handler.test.ts
  function flushMicrotasks (line 50) | function flushMicrotasks() {
  class MockWorker (line 189) | class MockWorker {
    method constructor (line 194) | constructor() {
    method setResponder (line 202) | setResponder(kind: string, responder: (msg: any) => any) {

FILE: utils/vram_requirements/src/vram_requirements.ts
  function setLabel (line 6) | function setLabel(id: string, text: string) {
  type AppConfig (line 14) | interface AppConfig {
  function main (line 25) | async function main() {

Download .json

Condensed preview — 235 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (895K chars).

[
  {
    "path": ".github/workflows/build-site.yaml",
    "chars": 947,
    "preview": "name: Build site and push to gh-pages\n\non:\n  push:\n    branches:\n      - main\n\njobs:\n  build:\n    name: Build site\n    r"
  },
  {
    "path": ".github/workflows/build.yaml",
    "chars": 672,
    "preview": "name: Build\n\non:\n  pull_request:\n    branches:\n      - main\n  push:\n    branches:\n      - main\n  workflow_dispatch:\n\ncon"
  },
  {
    "path": ".github/workflows/linter.yaml",
    "chars": 440,
    "preview": "name: Linter\n\non:\n  push:\n    branches:\n      - main\n  pull_request:\n    branches:\n      - main\n\njobs:\n  lint:\n    runs-"
  },
  {
    "path": ".github/workflows/security.yaml",
    "chars": 1475,
    "preview": "name: Security\n\non:\n  pull_request:\n    branches:\n      - main\n  push:\n    branches:\n      - main\n  schedule:\n    - cron"
  },
  {
    "path": ".github/workflows/tests.yaml",
    "chars": 863,
    "preview": "name: Tests\n\non:\n  pull_request:\n    branches:\n      - main\n  push:\n    branches:\n      - main\n  workflow_dispatch:\n\ncon"
  },
  {
    "path": ".gitignore",
    "chars": 3980,
    "preview": "scratch/\ndist/\nparams/\n*.bak\n# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n*.S\n# C extensio"
  },
  {
    "path": ".gitmodules",
    "chars": 0,
    "preview": ""
  },
  {
    "path": ".husky/pre-commit",
    "chars": 16,
    "preview": "npx lint-staged\n"
  },
  {
    "path": ".lintstagedrc.json",
    "chars": 74,
    "preview": "{\n  \"./**/*.{js,ts,jsx,tsx,json}\": [\"eslint --fix\", \"prettier --write\"]\n}\n"
  },
  {
    "path": ".nvmrc",
    "chars": 8,
    "preview": "v24.11.1"
  },
  {
    "path": ".prettierignore",
    "chars": 65,
    "preview": "dist\ndebug\nlib\nbuild\nnode_modules\n3rdparty\n.eslintrc.cjs\n**/.next"
  },
  {
    "path": ".prettierrc",
    "chars": 29,
    "preview": "{\n  \"trailingComma\": \"all\"\n}\n"
  },
  {
    "path": "CONTRIBUTING.md",
    "chars": 3688,
    "preview": "# Contributing to WebLLM\n\nThank you for your interest in contributing to WebLLM. This guide helps contributors get set u"
  },
  {
    "path": "LICENSE",
    "chars": 11737,
    "preview": "                                 Apache License\n                           Version 2.0, January 2004\n                   "
  },
  {
    "path": "README.md",
    "chars": 22065,
    "preview": "<div align=\"center\" id=\"top\">\n\n# WebLLM\n[![NPM Package](https://img.shields.io/badge/NPM_Package-Published-cc3534)](http"
  },
  {
    "path": "SECURITY.md",
    "chars": 154,
    "preview": "# Security Policy\n\n## Reporting a Vulnerability\n\nFor security concerns or vulnerability reports, please send email to `m"
  },
  {
    "path": "cleanup-index-js.sh",
    "chars": 2610,
    "preview": "# Remove instances of string \"const{createRequire:createRequire}=await import('module');\"\n# This is required to allow ba"
  },
  {
    "path": "docs/Makefile",
    "chars": 638,
    "preview": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line, and also\n# from the "
  },
  {
    "path": "docs/README.md",
    "chars": 651,
    "preview": "# WebLLM Documentation\n\nThe documentation was built upon [Sphinx](https://www.sphinx-doc.org/en/master/).\n\n## Dependenci"
  },
  {
    "path": "docs/conf.py",
    "chars": 2512,
    "preview": "# -*- coding: utf-8 -*-\nimport os\nimport sys\n\nimport tlcpack_sphinx_addon\n\n# -- General configuration ------------------"
  },
  {
    "path": "docs/developer/add_models.rst",
    "chars": 330,
    "preview": "Adding Models\n=============\n\nWebLLM allows you to compile custom language models using `MLC-LLM <https://llm.mlc.ai/>`_ "
  },
  {
    "path": "docs/developer/building_from_source.rst",
    "chars": 955,
    "preview": "Building From Source\n====================\n\nClone the Repository\n---------------------\n.. code-block:: bash\n\n   git clone"
  },
  {
    "path": "docs/index.rst",
    "chars": 1638,
    "preview": "👋 Welcome to WebLLM\n====================\n\n`GitHub <https://github.com/mlc-ai/web-llm>`_ | `WebLLM Chat <https://chat.web"
  },
  {
    "path": "docs/make.bat",
    "chars": 765,
    "preview": "@ECHO OFF\n\npushd %~dp0\n\nREM Command file for Sphinx documentation\n\nif \"%SPHINXBUILD%\" == \"\" (\n\tset SPHINXBUILD=sphinx-bu"
  },
  {
    "path": "docs/requirements.txt",
    "chars": 192,
    "preview": "sphinx-tabs == 3.4.1\nsphinx-rtd-theme\nsphinx == 5.2.3\nsphinx-toolbox == 3.4.0\ntlcpack-sphinx-addon==0.2.2\nsphinxcontrib_"
  },
  {
    "path": "docs/user/advanced_usage.rst",
    "chars": 6797,
    "preview": "Advanced Use Cases\n==================\n\nUsing Workers\n-------------\n\nYou can put the heavy computation in a worker script"
  },
  {
    "path": "docs/user/api_reference.rst",
    "chars": 8599,
    "preview": ".. _api-reference:\n\nWebLLM API Reference\n====================\n\nThe ``MLCEngine`` class is the core interface of WebLLM. "
  },
  {
    "path": "docs/user/basic_usage.rst",
    "chars": 4513,
    "preview": "Basic Usage\n================\n\nModel Records in WebLLM\n-----------------------\n\nEach of the model available WebLLM is reg"
  },
  {
    "path": "docs/user/get_started.rst",
    "chars": 2593,
    "preview": "Getting Started with WebLLM\n===========================\n\nThis guide will help you set up WebLLM in your project, install"
  },
  {
    "path": "eslint.config.cjs",
    "chars": 1269,
    "preview": "const {\n    defineConfig,\n    globalIgnores,\n} = require(\"eslint/config\");\n\nconst tsParser = require(\"@typescript-eslint"
  },
  {
    "path": "examples/.gitignore",
    "chars": 18,
    "preview": "package-lock.json\n"
  },
  {
    "path": "examples/README.md",
    "chars": 4294,
    "preview": "# Awesome WebLLM\n\nThis page contains a curated list of examples, tutorials, blogs about WebLLM usecases.\nPlease send a p"
  },
  {
    "path": "examples/abort-reload/README.md",
    "chars": 424,
    "preview": "# WebLLM Get Started App\n\nThis folder provides a demo for cancelling model fetching after calling `engine.reload()`.\n\n``"
  },
  {
    "path": "examples/abort-reload/package.json",
    "chars": 443,
    "preview": "{\n  \"name\": \"get-started\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/get_started."
  },
  {
    "path": "examples/abort-reload/src/get_started.html",
    "chars": 449,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open con"
  },
  {
    "path": "examples/abort-reload/src/get_started.js",
    "chars": 802,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\nimport { error } from \"loglevel\";\n\nlet engine;\n\nfunction setLabel(id, text) {"
  },
  {
    "path": "examples/cache-usage/README.md",
    "chars": 1071,
    "preview": "# WebLLM Cache Usage\n\nWebLLM supports both the Cache API and IndexedDB, which you can specify via `AppConfig.useIndexedD"
  },
  {
    "path": "examples/cache-usage/package.json",
    "chars": 443,
    "preview": "{\n  \"name\": \"cache-usage\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/cache_usage."
  },
  {
    "path": "examples/cache-usage/src/cache_usage.html",
    "chars": 450,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open co"
  },
  {
    "path": "examples/cache-usage/src/cache_usage.ts",
    "chars": 2604,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\n\nfunction setLabel(id: string, text: string) {\n  const label = document.getEl"
  },
  {
    "path": "examples/chrome-extension/README.md",
    "chars": 550,
    "preview": "# WebLLM Chrome Extension\n\n![Chrome Extension](https://github.com/mlc-ai/mlc-llm/assets/11940172/0d94cc73-eff1-4128-a6e4"
  },
  {
    "path": "examples/chrome-extension/package.json",
    "chars": 526,
    "preview": "{\n  \"name\": \"chrome-extension\",\n  \"version\": \"1.0.1\",\n  \"description\": \"\",\n  \"private\": true,\n  \"scripts\": {\n    \"build\""
  },
  {
    "path": "examples/chrome-extension/src/content.js",
    "chars": 225,
    "preview": "// Only the content script is able to access the DOM\nchrome.runtime.onConnect.addListener(function (port) {\n  port.onMes"
  },
  {
    "path": "examples/chrome-extension/src/example.html",
    "chars": 1535,
    "preview": "In the year 2154, humanity had colonized several planets in the distant reaches\nof the galaxy. The planet of Xylophia-IV"
  },
  {
    "path": "examples/chrome-extension/src/manifest.json",
    "chars": 1025,
    "preview": "{\n  \"manifest_version\": 3,\n  \"name\": \"MLCBot\",\n  \"version\": \"0.1.1\",\n  \"description\": \"Chat with your browser\",\n  \"icons"
  },
  {
    "path": "examples/chrome-extension/src/manifest_v2.json",
    "chars": 887,
    "preview": "{\n  \"manifest_version\": 2,\n  \"name\": \"MLCBot\",\n  \"version\": \"0.1.0\",\n  \"description\": \"Chat with your browser\",\n  \"icons"
  },
  {
    "path": "examples/chrome-extension/src/popup.css",
    "chars": 3459,
    "preview": "*,\n*::before,\n*::after {\n  margin: 0;\n  padding: 0;\n  box-sizing: border-box;\n}\n\nhtml {\n  font-family:\n    -apple-system"
  },
  {
    "path": "examples/chrome-extension/src/popup.html",
    "chars": 1281,
    "preview": "<!doctype html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"UTF-8\" />\n    <title>Chatbot</title>\n    <link rel=\"stylesh"
  },
  {
    "path": "examples/chrome-extension/src/popup.ts",
    "chars": 8521,
    "preview": "\"use strict\";\n\n// This code is partially adapted from the openai-chatgpt-chrome-extension repo:\n// https://github.com/je"
  },
  {
    "path": "examples/chrome-extension-webgpu-service-worker/README.md",
    "chars": 1761,
    "preview": "# WebLLM Chrome Extension using WebGPU Running on Service Worker\n\n![Chrome Extension](https://github.com/mlc-ai/mlc-llm/"
  },
  {
    "path": "examples/chrome-extension-webgpu-service-worker/package.json",
    "chars": 526,
    "preview": "{\n  \"name\": \"chrome-extension\",\n  \"version\": \"1.0.0\",\n  \"description\": \"\",\n  \"private\": true,\n  \"scripts\": {\n    \"build\""
  },
  {
    "path": "examples/chrome-extension-webgpu-service-worker/src/background.ts",
    "chars": 454,
    "preview": "import { ExtensionServiceWorkerMLCEngineHandler } from \"@mlc-ai/web-llm\";\n\n// Hookup an engine to a service worker handl"
  },
  {
    "path": "examples/chrome-extension-webgpu-service-worker/src/content.js",
    "chars": 225,
    "preview": "// Only the content script is able to access the DOM\nchrome.runtime.onConnect.addListener(function (port) {\n  port.onMes"
  },
  {
    "path": "examples/chrome-extension-webgpu-service-worker/src/example.html",
    "chars": 1535,
    "preview": "In the year 2154, humanity had colonized several planets in the distant reaches\nof the galaxy. The planet of Xylophia-IV"
  },
  {
    "path": "examples/chrome-extension-webgpu-service-worker/src/manifest.json",
    "chars": 1031,
    "preview": "{\n  \"manifest_version\": 3,\n  \"name\": \"MLCBot\",\n  \"version\": \"0.1.0\",\n  \"description\": \"Chat with your browser\",\n  \"icons"
  },
  {
    "path": "examples/chrome-extension-webgpu-service-worker/src/popup.css",
    "chars": 3466,
    "preview": "*,\n*::before,\n*::after {\n  margin: 0;\n  padding: 0;\n  box-sizing: border-box;\n}\n\nhtml {\n  font-family:\n    -apple-system"
  },
  {
    "path": "examples/chrome-extension-webgpu-service-worker/src/popup.html",
    "chars": 1121,
    "preview": "<!doctype html>\n<html lang=\"en\">\n  <head>\n    <meta charset=\"UTF-8\" />\n    <title>Chatbot</title>\n    <link rel=\"stylesh"
  },
  {
    "path": "examples/chrome-extension-webgpu-service-worker/src/popup.ts",
    "chars": 5091,
    "preview": "\"use strict\";\n\n// This code is partially adapted from the openai-chatgpt-chrome-extension repo:\n// https://github.com/je"
  },
  {
    "path": "examples/embeddings/README.md",
    "chars": 473,
    "preview": "# WebLLM Get Started App\n\nThis folder provides a minimum demo to show WebLLM API in a webapp setting.\nTo try it out, you"
  },
  {
    "path": "examples/embeddings/package.json",
    "chars": 475,
    "preview": "{\n  \"name\": \"embeddings-example\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/embed"
  },
  {
    "path": "examples/embeddings/src/embeddings.html",
    "chars": 448,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open con"
  },
  {
    "path": "examples/embeddings/src/embeddings.ts",
    "chars": 6852,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\nimport { MemoryVectorStore } from \"langchain/vectorstores/memory\";\nimport typ"
  },
  {
    "path": "examples/function-calling/README.md",
    "chars": 869,
    "preview": "### OpenAI API Demos - Function calling\n\nThis folder contains two main ways of using function calling with WebLLM.\n\n`fun"
  },
  {
    "path": "examples/function-calling/function-calling-manual/README.md",
    "chars": 355,
    "preview": "### Demos - Function calling\n\nRun `npm install` first, followed by `npm start`.\n\nNote if you would like to hack WebLLM c"
  },
  {
    "path": "examples/function-calling/function-calling-manual/package.json",
    "chars": 466,
    "preview": "{\n  \"name\": \"openai-api\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/function_call"
  },
  {
    "path": "examples/function-calling/function-calling-manual/src/function_calling_manual.html",
    "chars": 330,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open co"
  },
  {
    "path": "examples/function-calling/function-calling-manual/src/function_calling_manual.ts",
    "chars": 11495,
    "preview": "/* eslint-disable no-useless-escape */\nimport * as webllm from \"@mlc-ai/web-llm\";\n\n// Common helper methods\nfunction set"
  },
  {
    "path": "examples/function-calling/function-calling-openai/README.md",
    "chars": 355,
    "preview": "### Demos - Function calling\n\nRun `npm install` first, followed by `npm start`.\n\nNote if you would like to hack WebLLM c"
  },
  {
    "path": "examples/function-calling/function-calling-openai/package.json",
    "chars": 466,
    "preview": "{\n  \"name\": \"openai-api\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/function_call"
  },
  {
    "path": "examples/function-calling/function-calling-openai/src/function_calling_openai.html",
    "chars": 330,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open co"
  },
  {
    "path": "examples/function-calling/function-calling-openai/src/function_calling_openai.ts",
    "chars": 2362,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\n\nfunction setLabel(id: string, text: string) {\n  const label = document.getEl"
  },
  {
    "path": "examples/get-started/README.md",
    "chars": 473,
    "preview": "# WebLLM Get Started App\n\nThis folder provides a minimum demo to show WebLLM API in a webapp setting.\nTo try it out, you"
  },
  {
    "path": "examples/get-started/package.json",
    "chars": 443,
    "preview": "{\n  \"name\": \"get-started\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/get_started."
  },
  {
    "path": "examples/get-started/src/get_started.html",
    "chars": 449,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open con"
  },
  {
    "path": "examples/get-started/src/get_started.ts",
    "chars": 2769,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\n\nfunction setLabel(id: string, text: string) {\n  const label = document.getEl"
  },
  {
    "path": "examples/get-started-latency-breakdown/README.md",
    "chars": 547,
    "preview": "# WebLLM Get Started App\n\nThis folder provides a minimum demo to show WebLLM API in a webapp setting with\ncollection of "
  },
  {
    "path": "examples/get-started-latency-breakdown/package.json",
    "chars": 497,
    "preview": "{\n  \"name\": \"get-started-latency-breakdown\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parce"
  },
  {
    "path": "examples/get-started-latency-breakdown/src/get_started_latency_breakdown.html",
    "chars": 467,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open con"
  },
  {
    "path": "examples/get-started-latency-breakdown/src/get_started_latency_breakdown.ts",
    "chars": 4762,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\n\nfunction setLabel(id: string, text: string) {\n  const label = document.getEl"
  },
  {
    "path": "examples/get-started-web-worker/README.md",
    "chars": 692,
    "preview": "# WebLLM Get Started with WebWorker\n\nThis folder provides a minimum demo to show WebLLM API using\n[WebWorker](https://de"
  },
  {
    "path": "examples/get-started-web-worker/package.json",
    "chars": 454,
    "preview": "{\n  \"name\": \"get-started-web-worker\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/g"
  },
  {
    "path": "examples/get-started-web-worker/src/get_started.html",
    "chars": 442,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open con"
  },
  {
    "path": "examples/get-started-web-worker/src/main.ts",
    "chars": 3209,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\n\nfunction setLabel(id: string, text: string) {\n  const label = document.getEl"
  },
  {
    "path": "examples/get-started-web-worker/src/worker.ts",
    "chars": 222,
    "preview": "import { WebWorkerMLCEngineHandler } from \"@mlc-ai/web-llm\";\n\n// Hookup an engine to a worker handler\nconst handler = ne"
  },
  {
    "path": "examples/json-mode/README.md",
    "chars": 344,
    "preview": "### OpenAI API Demos\n\nRun `npm install` first, followed by `npm start`.\n\nNote if you would like to hack WebLLM core pack"
  },
  {
    "path": "examples/json-mode/package.json",
    "chars": 438,
    "preview": "{\n  \"name\": \"openai-api\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/json_mode.htm"
  },
  {
    "path": "examples/json-mode/src/json_mode.html",
    "chars": 276,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open co"
  },
  {
    "path": "examples/json-mode/src/json_mode.ts",
    "chars": 1478,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\n\nfunction setLabel(id: string, text: string) {\n  const label = document.getEl"
  },
  {
    "path": "examples/json-schema/README.md",
    "chars": 344,
    "preview": "### OpenAI API Demos\n\nRun `npm install` first, followed by `npm start`.\n\nNote if you would like to hack WebLLM core pack"
  },
  {
    "path": "examples/json-schema/package.json",
    "chars": 442,
    "preview": "{\n  \"name\": \"openai-api\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/json_schema.h"
  },
  {
    "path": "examples/json-schema/src/json_schema.html",
    "chars": 278,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open co"
  },
  {
    "path": "examples/json-schema/src/json_schema.ts",
    "chars": 9950,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\nimport { Type, Static } from \"@sinclair/typebox\";\n\nfunction setLabel(id: stri"
  },
  {
    "path": "examples/logit-processor/README.md",
    "chars": 1390,
    "preview": "# WebLLM Logit Processor and Low-Level API Example\n\nThis folder explains the usage of `LogitProcessor`, demonstrating ho"
  },
  {
    "path": "examples/logit-processor/package.json",
    "chars": 455,
    "preview": "{\n  \"name\": \"logit-processor\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/logit_pr"
  },
  {
    "path": "examples/logit-processor/src/logit_processor.html",
    "chars": 326,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n\n  <body>\n    <h2>WebLLM Logit Processor Test Page<"
  },
  {
    "path": "examples/logit-processor/src/logit_processor.ts",
    "chars": 2537,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\nimport { MyLogitProcessor } from \"./my_logit_processor\";\n\nconst USE_WEB_WORKE"
  },
  {
    "path": "examples/logit-processor/src/my_logit_processor.ts",
    "chars": 585,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\n\n// Define LogitProcessor\nexport class MyLogitProcessor implements webllm.Log"
  },
  {
    "path": "examples/logit-processor/src/worker.ts",
    "chars": 575,
    "preview": "// Serve the chat workload through web worker\nimport * as webllm from \"@mlc-ai/web-llm\";\nimport { MyLogitProcessor } fro"
  },
  {
    "path": "examples/multi-models/README.md",
    "chars": 473,
    "preview": "# WebLLM Get Started App\n\nThis folder provides a minimum demo to show WebLLM API in a webapp setting.\nTo try it out, you"
  },
  {
    "path": "examples/multi-models/package.json",
    "chars": 445,
    "preview": "{\n  \"name\": \"get-started\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/multi_models"
  },
  {
    "path": "examples/multi-models/src/main.ts",
    "chars": 5264,
    "preview": "/**\n * This example demonstrates loading multiple models in the same engine concurrently.\n * sequentialGeneration() show"
  },
  {
    "path": "examples/multi-models/src/multi_models.html",
    "chars": 615,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open con"
  },
  {
    "path": "examples/multi-models/src/worker.ts",
    "chars": 222,
    "preview": "import { WebWorkerMLCEngineHandler } from \"@mlc-ai/web-llm\";\n\n// Hookup an engine to a worker handler\nconst handler = ne"
  },
  {
    "path": "examples/multi-round-chat/README.md",
    "chars": 344,
    "preview": "### OpenAI API Demos\n\nRun `npm install` first, followed by `npm start`.\n\nNote if you would like to hack WebLLM core pack"
  },
  {
    "path": "examples/multi-round-chat/package.json",
    "chars": 452,
    "preview": "{\n  \"name\": \"openai-api\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/multi_round_c"
  },
  {
    "path": "examples/multi-round-chat/src/multi_round_chat.html",
    "chars": 282,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open co"
  },
  {
    "path": "examples/multi-round-chat/src/multi_round_chat.ts",
    "chars": 2765,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\n\nfunction setLabel(id: string, text: string) {\n  const label = document.getEl"
  },
  {
    "path": "examples/next-simple-chat/.gitignore",
    "chars": 368,
    "preview": "# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.\n\n# dependencies\n/node_modules\n/.pn"
  },
  {
    "path": "examples/next-simple-chat/README.md",
    "chars": 299,
    "preview": "This is a [Next.js](https://nextjs.org/) project using web-llm.\n\n## Getting Started\n\nFirst, install web-llm from source."
  },
  {
    "path": "examples/next-simple-chat/next.config.js",
    "chars": 571,
    "preview": "/** @type {import('next').NextConfig} */\nconst nextConfig = {\n  reactStrictMode: true,\n\n  webpack: (config, { isServer }"
  },
  {
    "path": "examples/next-simple-chat/package.json",
    "chars": 591,
    "preview": "{\n  \"name\": \"next-simple-chat\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"dev\": \"next dev\",\n    \"buil"
  },
  {
    "path": "examples/next-simple-chat/postcss.config.js",
    "chars": 83,
    "preview": "module.exports = {\n  plugins: {\n    tailwindcss: {},\n    autoprefixer: {},\n  },\n};\n"
  },
  {
    "path": "examples/next-simple-chat/src/pages/_app.tsx",
    "chars": 181,
    "preview": "import \"~/styles/globals.css\";\nimport type { AppProps } from \"next/app\";\n\nexport default function App({ Component, pageP"
  },
  {
    "path": "examples/next-simple-chat/src/pages/_document.tsx",
    "chars": 233,
    "preview": "import { Html, Head, Main, NextScript } from \"next/document\";\n\nexport default function Document() {\n  return (\n    <Html"
  },
  {
    "path": "examples/next-simple-chat/src/pages/api/hello.ts",
    "chars": 312,
    "preview": "// Next.js API route support: https://nextjs.org/docs/api-routes/introduction\nimport type { NextApiRequest, NextApiRespo"
  },
  {
    "path": "examples/next-simple-chat/src/pages/index.tsx",
    "chars": 626,
    "preview": "import Head from \"next/head\";\nimport ChatComponent from \"~/utils/chat_component\";\nimport { Inter } from \"next/font/googl"
  },
  {
    "path": "examples/next-simple-chat/src/styles/globals.css",
    "chars": 2353,
    "preview": "@tailwind base;\n@tailwind components;\n@tailwind utilities;\n\n:root {\n  --foreground-rgb: 0, 0, 0;\n  --background-start-rg"
  },
  {
    "path": "examples/next-simple-chat/src/utils/chat_component.tsx",
    "chars": 2799,
    "preview": "import { useState } from \"react\";\nimport { MLCEngine } from \"@mlc-ai/web-llm\";\nimport ChatUI from \"~/utils/chat_ui\";\n\nco"
  },
  {
    "path": "examples/next-simple-chat/src/utils/chat_ui.ts",
    "chars": 4777,
    "preview": "import {\n  MLCEngineInterface,\n  ChatCompletionMessageParam,\n  CompletionUsage,\n} from \"@mlc-ai/web-llm\";\n\nexport defaul"
  },
  {
    "path": "examples/next-simple-chat/tailwind.config.js",
    "chars": 481,
    "preview": "/** @type {import('tailwindcss').Config} */\nmodule.exports = {\n  content: [\n    \"./src/pages/**/*.{js,ts,jsx,tsx,mdx}\",\n"
  },
  {
    "path": "examples/next-simple-chat/tsconfig.json",
    "chars": 556,
    "preview": "{\n  \"compilerOptions\": {\n    \"target\": \"es5\",\n    \"lib\": [\"dom\", \"dom.iterable\", \"esnext\"],\n    \"allowJs\": true,\n    \"sk"
  },
  {
    "path": "examples/qwen3/README.md",
    "chars": 353,
    "preview": "### OpenAI API Demos w/ Qwen3\n\nRun `npm install` first, followed by `npm start`.\n\nNote if you would like to hack WebLLM "
  },
  {
    "path": "examples/qwen3/package.json",
    "chars": 449,
    "preview": "{\n  \"name\": \"qwen3_example\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/qwen3_exam"
  },
  {
    "path": "examples/qwen3/src/qwen3_example.html",
    "chars": 341,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open co"
  },
  {
    "path": "examples/qwen3/src/qwen3_example.ts",
    "chars": 4748,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\n\nfunction setLabel(id: string, text: string) {\n  const label = document.getEl"
  },
  {
    "path": "examples/seed-to-reproduce/README.md",
    "chars": 344,
    "preview": "### OpenAI API Demos\n\nRun `npm install` first, followed by `npm start`.\n\nNote if you would like to hack WebLLM core pack"
  },
  {
    "path": "examples/seed-to-reproduce/package.json",
    "chars": 435,
    "preview": "{\n  \"name\": \"seed-to-reproduce\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/seed.h"
  },
  {
    "path": "examples/seed-to-reproduce/src/seed.html",
    "chars": 353,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open co"
  },
  {
    "path": "examples/seed-to-reproduce/src/seed.ts",
    "chars": 2143,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\n\nfunction setLabel(id: string, text: string) {\n  const label = document.getEl"
  },
  {
    "path": "examples/service-worker/README.md",
    "chars": 156,
    "preview": "# WebLLM Service Worker Example\n\nThis example shows how we can create a page with Web-LLM running in service worker.\n\n``"
  },
  {
    "path": "examples/service-worker/package.json",
    "chars": 489,
    "preview": "{\n  \"name\": \"web-llm-service-worker\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"rm -rf .parc"
  },
  {
    "path": "examples/service-worker/src/index.html",
    "chars": 442,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open con"
  },
  {
    "path": "examples/service-worker/src/main.ts",
    "chars": 3747,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\n\nconst registerServiceWorker = async () => {\n  if (\"serviceWorker\" in navigat"
  },
  {
    "path": "examples/service-worker/src/sw.ts",
    "chars": 268,
    "preview": "import { ServiceWorkerMLCEngineHandler } from \"@mlc-ai/web-llm\";\n\nlet handler: ServiceWorkerMLCEngineHandler;\n\nself.addE"
  },
  {
    "path": "examples/simple-chat-js/index.css",
    "chars": 1534,
    "preview": "body,\nhtml {\n  font-family: Arial, sans-serif;\n  padding: 10px 20px;\n}\n\n.download-container {\n  display: flex;\n  justify"
  },
  {
    "path": "examples/simple-chat-js/index.html",
    "chars": 915,
    "preview": "<!doctype html>\n<html>\n  <head>\n    <title>Simple Chatbot</title>\n    <meta name=\"viewport\" content=\"width=device-width,"
  },
  {
    "path": "examples/simple-chat-js/index.js",
    "chars": 4409,
    "preview": "import * as webllm from \"https://esm.run/@mlc-ai/web-llm\";\n\n/*************** WebLLM logic ***************/\nconst message"
  },
  {
    "path": "examples/simple-chat-ts/.gitignore",
    "chars": 18,
    "preview": "src/app-config.js\n"
  },
  {
    "path": "examples/simple-chat-ts/README.md",
    "chars": 1758,
    "preview": "# SimpleChat\n\nThis folder provides a complete implementation of a simple\nchat app based on WebLLM. To try it out, you ca"
  },
  {
    "path": "examples/simple-chat-ts/package.json",
    "chars": 537,
    "preview": "{\n  \"name\": \"simple-chat\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"cp src/gh-config.js src"
  },
  {
    "path": "examples/simple-chat-ts/src/gh-config.js",
    "chars": 142,
    "preview": "import { prebuiltAppConfig } from \"@mlc-ai/web-llm\";\n\nexport default {\n  model_list: prebuiltAppConfig.model_list,\n  use"
  },
  {
    "path": "examples/simple-chat-ts/src/llm_chat.css",
    "chars": 3182,
    "preview": ".chatui {\n  display: flex;\n  position: relative;\n  flex-flow: column wrap;\n  justify-content: space-between;\n  width: 10"
  },
  {
    "path": "examples/simple-chat-ts/src/llm_chat.html",
    "chars": 752,
    "preview": "<link href=\"./llm_chat.css\" rel=\"stylesheet\" type=\"text/css\" />\n\n<div class=\"chatui\">\n  <div class=\"chatui-select-wrappe"
  },
  {
    "path": "examples/simple-chat-ts/src/simple_chat.ts",
    "chars": 11933,
    "preview": "import appConfig from \"./app-config\";\nimport * as webllm from \"@mlc-ai/web-llm\";\n\nfunction getElementAndCheck(id: string"
  },
  {
    "path": "examples/simple-chat-ts/src/worker.ts",
    "chars": 230,
    "preview": "// Serve the engine workload through web worker\nimport { WebWorkerMLCEngineHandler } from \"@mlc-ai/web-llm\";\n\nconst hand"
  },
  {
    "path": "examples/simple-chat-upload/.gitignore",
    "chars": 18,
    "preview": "src/app-config.js\n"
  },
  {
    "path": "examples/simple-chat-upload/README.md",
    "chars": 1758,
    "preview": "# SimpleChat\n\nThis folder provides a complete implementation of a simple\nchat app based on WebLLM. To try it out, you ca"
  },
  {
    "path": "examples/simple-chat-upload/package.json",
    "chars": 537,
    "preview": "{\n  \"name\": \"simple-chat\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"cp src/gh-config.js src"
  },
  {
    "path": "examples/simple-chat-upload/src/gh-config.js",
    "chars": 142,
    "preview": "import { prebuiltAppConfig } from \"@mlc-ai/web-llm\";\n\nexport default {\n  model_list: prebuiltAppConfig.model_list,\n  use"
  },
  {
    "path": "examples/simple-chat-upload/src/llm_chat.css",
    "chars": 3182,
    "preview": ".chatui {\n  display: flex;\n  position: relative;\n  flex-flow: column wrap;\n  justify-content: space-between;\n  width: 10"
  },
  {
    "path": "examples/simple-chat-upload/src/llm_chat.html",
    "chars": 934,
    "preview": "<link href=\"./llm_chat.css\" rel=\"stylesheet\" type=\"text/css\" />\n\n<div class=\"chatui\">\n  <div class=\"chatui-select-wrappe"
  },
  {
    "path": "examples/simple-chat-upload/src/simple_chat.ts",
    "chars": 14149,
    "preview": "import appConfig from \"./app-config\";\nimport * as webllm from \"@mlc-ai/web-llm\";\n\nfunction getElementAndCheck(id: string"
  },
  {
    "path": "examples/simple-chat-upload/src/worker.ts",
    "chars": 230,
    "preview": "// Serve the engine workload through web worker\nimport { WebWorkerMLCEngineHandler } from \"@mlc-ai/web-llm\";\n\nconst hand"
  },
  {
    "path": "examples/streaming/README.md",
    "chars": 344,
    "preview": "### OpenAI API Demos\n\nRun `npm install` first, followed by `npm start`.\n\nNote if you would like to hack WebLLM core pack"
  },
  {
    "path": "examples/streaming/package.json",
    "chars": 437,
    "preview": "{\n  \"name\": \"streaming\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/streaming.html"
  },
  {
    "path": "examples/streaming/src/streaming.html",
    "chars": 337,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open co"
  },
  {
    "path": "examples/streaming/src/streaming.ts",
    "chars": 1590,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\n\nfunction setLabel(id: string, text: string) {\n  const label = document.getEl"
  },
  {
    "path": "examples/structural-tag-tool-use/README.md",
    "chars": 666,
    "preview": "# Structural tag MCP-style tool calls\n\nRun `npm install`, then `npm start` to launch a minimal page that prints progress"
  },
  {
    "path": "examples/structural-tag-tool-use/package.json",
    "chars": 470,
    "preview": "{\n  \"name\": \"structural-tag-tool-use\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/"
  },
  {
    "path": "examples/structural-tag-tool-use/src/mcp_structural_tag.html",
    "chars": 381,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n  <body>\n    <h2>Structural tag MCP-style tool call"
  },
  {
    "path": "examples/structural-tag-tool-use/src/mcp_structural_tag.ts",
    "chars": 6474,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\n\ntype ToolInvocation = {\n  name: string;\n  arguments: Record<string, unknown>"
  },
  {
    "path": "examples/text-completion/README.md",
    "chars": 473,
    "preview": "# WebLLM Get Started App\n\nThis folder provides a minimum demo to show WebLLM API in a webapp setting.\nTo try it out, you"
  },
  {
    "path": "examples/text-completion/package.json",
    "chars": 455,
    "preview": "{\n  \"name\": \"text-completion\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/text_com"
  },
  {
    "path": "examples/text-completion/src/text_completion.html",
    "chars": 453,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open con"
  },
  {
    "path": "examples/text-completion/src/text_completion.ts",
    "chars": 1542,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\n\nfunction setLabel(id: string, text: string) {\n  const label = document.getEl"
  },
  {
    "path": "examples/vision-model/README.md",
    "chars": 473,
    "preview": "# WebLLM Get Started App\n\nThis folder provides a minimum demo to show WebLLM API in a webapp setting.\nTo try it out, you"
  },
  {
    "path": "examples/vision-model/package.json",
    "chars": 445,
    "preview": "{\n  \"name\": \"get-started\",\n  \"version\": \"0.1.0\",\n  \"private\": true,\n  \"scripts\": {\n    \"start\": \"parcel src/vision_model"
  },
  {
    "path": "examples/vision-model/src/utils.ts",
    "chars": 1148,
    "preview": "export function getImageDataFromURL(url: string): Promise<ImageData> {\n  return new Promise((resolve, reject) => {\n    /"
  },
  {
    "path": "examples/vision-model/src/vision_model.html",
    "chars": 450,
    "preview": "<!doctype html>\n<html>\n  <script>\n    webLLMGlobal = {};\n  </script>\n  <body>\n    <h2>WebLLM Test Page</h2>\n    Open con"
  },
  {
    "path": "examples/vision-model/src/vision_model.ts",
    "chars": 3504,
    "preview": "import * as webllm from \"@mlc-ai/web-llm\";\nimport { imageURLToBase64 } from \"./utils\";\n\nfunction setLabel(id: string, te"
  },
  {
    "path": "examples/vision-model/src/worker.ts",
    "chars": 183,
    "preview": "import { WebWorkerMLCEngineHandler } from \"@mlc-ai/web-llm\";\n\nconst handler = new WebWorkerMLCEngineHandler();\n\nself.onm"
  },
  {
    "path": "jest.config.cjs",
    "chars": 553,
    "preview": "module.exports = {\n    preset: \"ts-jest\",\n    testEnvironment: \"node\",\n    roots: [\"<rootDir>/tests\", \"<rootDir>/src\"],\n"
  },
  {
    "path": "licenses/license.openai_node.txt",
    "chars": 11335,
    "preview": "                                 Apache License\n                           Version 2.0, January 2004\n                   "
  },
  {
    "path": "package.json",
    "chars": 1840,
    "preview": "{\n  \"name\": \"@mlc-ai/web-llm\",\n  \"version\": \"0.2.82\",\n  \"description\": \"Hardware accelerated language model chats on bro"
  },
  {
    "path": "rollup.config.js",
    "chars": 771,
    "preview": "import { nodeResolve } from '@rollup/plugin-node-resolve';\nimport ignore from \"rollup-plugin-ignore\";\nimport commonjs fr"
  },
  {
    "path": "scripts/gh_deploy_site.sh",
    "chars": 519,
    "preview": "#!/bin/bash\nset -euxo pipefail\n\nexport PYTHONPATH=$PWD/python\ncd docs && make html && cd ..\ncd site && jekyll b && cd .."
  },
  {
    "path": "scripts/local_deploy_site.sh",
    "chars": 181,
    "preview": "#!/bin/bash\nset -euxo pipefail\n\ncd examples/simple-chat\nrm -rf lib\nnpm run build\ncd ../..\n\ncp examples/simple-chat/lib/*"
  },
  {
    "path": "scripts/prep_deps.sh",
    "chars": 637,
    "preview": "#!/bin/bash\n# This file prepares all the necessary dependencies for the web build.\nset -euxo pipefail\n\nemcc --version\nnp"
  },
  {
    "path": "scripts/serve_mlc_llm_dist.sh",
    "chars": 398,
    "preview": "#!/bin/bash\n# This file prepares all the necessary dependencies for the web build.\nset -euxo pipefail\n\nnpm --version\n\nML"
  },
  {
    "path": "site/.gitignore",
    "chars": 75,
    "preview": "dist\nllm-chat-config.json\n_includes/stable_diffusion.html\n_site\nllm_chat.*\n"
  },
  {
    "path": "site/_config.yml",
    "chars": 699,
    "preview": "name: \"WebLLM\"\nshort_name: \"WebLLM\"\n\nurl: https://webllm.mlc.ai\n\nexclude: [README.md, serve_local.sh]\n\nplugins:\n  - jeky"
  },
  {
    "path": "site/_includes/head.html",
    "chars": 829,
    "preview": "<meta name=\"description\" content=\"WebLLM: High-Performance In-Browser LLM Inference Engine\">\n<meta\n  http-equiv=\"origin-"
  },
  {
    "path": "site/_includes/hero.html",
    "chars": 1825,
    "preview": "<section id=\"hero\">\n  <div class=\"heading-container\">\n    <h1>WebLLM: High-Performance In-Browser LLM Inference Engine</"
  },
  {
    "path": "site/assets/css/hero.scss",
    "chars": 5447,
    "preview": "---\n---\n\n#hero {\n    background: radial-gradient(100% 50rem at center 50rem, #3352cb, #ffffff);\n    padding: 3rem;\n    w"
  },
  {
    "path": "site/index.md",
    "chars": 3406,
    "preview": "---\nlayout: default\ntitle: Home\nnotitle: true\n---\n\n{% include hero.html %}\n\n## Overview\n\nWe have been seeing amazing pro"
  },
  {
    "path": "src/cache_util.ts",
    "chars": 5296,
    "preview": "import * as tvmjs from \"@mlc-ai/web-runtime\";\nimport {\n  AppConfig,\n  ChatConfig,\n  ModelRecord,\n  prebuiltAppConfig,\n} "
  },
  {
    "path": "src/config.ts",
    "chars": 73326,
    "preview": "import log from \"loglevel\";\nimport { ResponseFormat } from \"./openai_api_protocols\";\nimport { LogitProcessor, InitProgre"
  },
  {
    "path": "src/conversation.ts",
    "chars": 19205,
    "preview": "import {\n  ChatConfig,\n  ConvTemplateConfig,\n  MessagePlaceholders,\n  Role,\n} from \"./config\";\nimport {\n  ChatCompletion"
  },
  {
    "path": "src/embedding.ts",
    "chars": 10287,
    "preview": "import * as tvmjs from \"@mlc-ai/web-runtime\";\nimport log from \"loglevel\";\nimport { Tokenizer } from \"@mlc-ai/web-tokeniz"
  },
  {
    "path": "src/engine.ts",
    "chars": 47802,
    "preview": "import * as tvmjs from \"@mlc-ai/web-runtime\";\nimport log from \"loglevel\";\nimport {\n  ChatConfig,\n  ChatOptions,\n  AppCon"
  },
  {
    "path": "src/error.ts",
    "chars": 19857,
    "preview": "export class ModelNotFoundError extends Error {\n  constructor(modelId: string) {\n    super(\n      `Cannot find model rec"
  },
  {
    "path": "src/extension_service_worker.ts",
    "chars": 6233,
    "preview": "import * as tvmjs from \"@mlc-ai/web-runtime\";\nimport log from \"loglevel\";\nimport { ChatOptions, MLCEngineConfig } from \""
  },
  {
    "path": "src/index.ts",
    "chars": 1170,
    "preview": "export {\n  ModelRecord,\n  AppConfig,\n  ChatOptions,\n  MLCEngineConfig,\n  GenerationConfig,\n  ModelType,\n  prebuiltAppCon"
  },
  {
    "path": "src/llm_chat.ts",
    "chars": 59263,
    "preview": "import * as tvmjs from \"@mlc-ai/web-runtime\";\nimport * as xgr from \"@mlc-ai/web-xgrammar\";\nimport log from \"loglevel\";\ni"
  },
  {
    "path": "src/message.ts",
    "chars": 4199,
    "preview": "import { AppConfig, ChatOptions } from \"./config\";\nimport { InitProgressReport, LogLevel } from \"./types\";\nimport {\n  Ch"
  },
  {
    "path": "src/openai_api_protocols/chat_completion.ts",
    "chars": 38656,
    "preview": "/**\n * The input to OpenAI API, directly adopted from openai-node with small tweaks:\n * https://github.com/openai/openai"
  },
  {
    "path": "src/openai_api_protocols/completion.ts",
    "chars": 12500,
    "preview": "/**\n * The input to OpenAI API, directly adopted from openai-node with small tweaks:\n * https://github.com/openai/openai"
  },
  {
    "path": "src/openai_api_protocols/embedding.ts",
    "chars": 5416,
    "preview": "/**\n * The input to OpenAI API, directly adopted from openai-node with small tweaks:\n * https://github.com/openai/openai"
  },
  {
    "path": "src/openai_api_protocols/index.ts",
    "chars": 2115,
    "preview": "/**\n * The input to OpenAI API, directly adopted from openai-node with small tweaks:\n * https://github.com/openai/openai"
  },
  {
    "path": "src/service_worker.ts",
    "chars": 8027,
    "preview": "import * as tvmjs from \"@mlc-ai/web-runtime\";\nimport log from \"loglevel\";\nimport { ChatOptions, MLCEngineConfig } from \""
  }
]

// ... and 35 more files (download for full content)

About this extraction

This page contains the full source code of the mlc-ai/web-llm GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 235 files (821.6 KB), approximately 222.2k tokens, and a symbol index with 629 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo