Repository: s-zx/edgeFlow.js
Branch: main
Commit: ba87394114d1
Files: 179
Total size: 1.7 MB
Directory structure:
gitextract_2xh90dxw/
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug_report.md
│ │ └── feature_request.md
│ ├── pull_request_template.md
│ └── workflows/
│ ├── ci.yml
│ └── publish.yml
├── .gitignore
├── CLAUDE.md
├── CONTRIBUTING.md
├── README.md
├── README_CN.md
├── benchmarks/
│ └── README.md
├── demo/
│ ├── demo.js
│ ├── index.html
│ ├── server.js
│ └── styles.css
├── dist/
│ ├── backends/
│ │ ├── index.d.ts
│ │ ├── index.js
│ │ ├── onnx.d.ts
│ │ ├── onnx.js
│ │ ├── transformers-adapter.d.ts
│ │ ├── transformers-adapter.js
│ │ ├── wasm.d.ts
│ │ ├── wasm.js
│ │ ├── webgpu.d.ts
│ │ ├── webgpu.js
│ │ ├── webnn.d.ts
│ │ └── webnn.js
│ ├── core/
│ │ ├── composer.d.ts
│ │ ├── composer.js
│ │ ├── device-profiler.d.ts
│ │ ├── device-profiler.js
│ │ ├── index.d.ts
│ │ ├── index.js
│ │ ├── memory.d.ts
│ │ ├── memory.js
│ │ ├── plugin.d.ts
│ │ ├── plugin.js
│ │ ├── runtime.d.ts
│ │ ├── runtime.js
│ │ ├── scheduler.d.ts
│ │ ├── scheduler.js
│ │ ├── tensor.d.ts
│ │ ├── tensor.js
│ │ ├── types.d.ts
│ │ ├── types.js
│ │ ├── worker.d.ts
│ │ └── worker.js
│ ├── edgeflow.browser.js
│ ├── index.d.ts
│ ├── index.js
│ ├── pipelines/
│ │ ├── automatic-speech-recognition.d.ts
│ │ ├── automatic-speech-recognition.js
│ │ ├── base.d.ts
│ │ ├── base.js
│ │ ├── feature-extraction.d.ts
│ │ ├── feature-extraction.js
│ │ ├── image-classification.d.ts
│ │ ├── image-classification.js
│ │ ├── image-segmentation.d.ts
│ │ ├── image-segmentation.js
│ │ ├── index.d.ts
│ │ ├── index.js
│ │ ├── object-detection.d.ts
│ │ ├── object-detection.js
│ │ ├── question-answering.d.ts
│ │ ├── question-answering.js
│ │ ├── text-classification.d.ts
│ │ ├── text-classification.js
│ │ ├── text-generation.d.ts
│ │ ├── text-generation.js
│ │ ├── zero-shot-classification.d.ts
│ │ └── zero-shot-classification.js
│ ├── tools/
│ │ ├── benchmark.d.ts
│ │ ├── benchmark.js
│ │ ├── debugger.d.ts
│ │ ├── debugger.js
│ │ ├── index.d.ts
│ │ ├── index.js
│ │ ├── monitor.d.ts
│ │ ├── monitor.js
│ │ ├── quantization.d.ts
│ │ └── quantization.js
│ └── utils/
│ ├── cache.d.ts
│ ├── cache.js
│ ├── hub.d.ts
│ ├── hub.js
│ ├── index.d.ts
│ ├── index.js
│ ├── model-loader.d.ts
│ ├── model-loader.js
│ ├── offline.d.ts
│ ├── offline.js
│ ├── preprocessor.d.ts
│ ├── preprocessor.js
│ ├── tokenizer.d.ts
│ └── tokenizer.js
├── docs/
│ ├── .vitepress/
│ │ └── config.ts
│ ├── api/
│ │ ├── model-loader.md
│ │ ├── pipeline.md
│ │ ├── tensor.md
│ │ └── tokenizer.md
│ ├── cookbook/
│ │ ├── composition.md
│ │ └── transformers-adapter.md
│ ├── guide/
│ │ ├── architecture.md
│ │ ├── concepts.md
│ │ ├── device-profiling.md
│ │ ├── installation.md
│ │ ├── plugins.md
│ │ └── quickstart.md
│ ├── index.md
│ └── tutorials/
│ └── text-classification.md
├── examples/
│ ├── basic-usage.ts
│ ├── multi-model-dashboard/
│ │ └── index.html
│ ├── offline-notepad/
│ │ └── index.html
│ └── orchestration.ts
├── package.json
├── playwright.config.ts
├── scripts/
│ └── build-browser.js
├── src/
│ ├── backends/
│ │ ├── index.ts
│ │ ├── onnx.ts
│ │ ├── transformers-adapter.ts
│ │ ├── wasm.ts
│ │ ├── webgpu.ts
│ │ └── webnn.ts
│ ├── core/
│ │ ├── composer.ts
│ │ ├── device-profiler.ts
│ │ ├── index.ts
│ │ ├── memory.ts
│ │ ├── plugin.ts
│ │ ├── runtime.ts
│ │ ├── scheduler.ts
│ │ ├── tensor.ts
│ │ ├── types.ts
│ │ └── worker.ts
│ ├── index.ts
│ ├── pipelines/
│ │ ├── automatic-speech-recognition.ts
│ │ ├── base.ts
│ │ ├── feature-extraction.ts
│ │ ├── image-classification.ts
│ │ ├── image-segmentation.ts
│ │ ├── index.ts
│ │ ├── object-detection.ts
│ │ ├── question-answering.ts
│ │ ├── text-classification.ts
│ │ ├── text-generation.ts
│ │ └── zero-shot-classification.ts
│ ├── tools/
│ │ ├── benchmark.ts
│ │ ├── debugger.ts
│ │ ├── index.ts
│ │ ├── monitor.ts
│ │ └── quantization.ts
│ └── utils/
│ ├── cache.ts
│ ├── hub.ts
│ ├── index.ts
│ ├── model-loader.ts
│ ├── offline.ts
│ ├── preprocessor.ts
│ └── tokenizer.ts
├── tests/
│ ├── e2e/
│ │ ├── browser.spec.ts
│ │ ├── browser.test.ts
│ │ ├── localai-10s-check.spec.ts
│ │ ├── localai-clear-cache-load.spec.ts
│ │ ├── localai-knowledge-base.spec.ts
│ │ ├── localai-load-models.spec.ts
│ │ ├── localai-loading-check.spec.ts
│ │ ├── localai-network-audit.spec.ts
│ │ ├── localai-network-failures.spec.ts
│ │ └── localai-network-full.spec.ts
│ ├── integration/
│ │ └── pipeline.test.ts
│ └── unit/
│ ├── memory.test.ts
│ ├── model-loader.test.ts
│ ├── runtime.test.ts
│ ├── scheduler.test.ts
│ ├── tensor.test.ts
│ ├── tokenizer.test.ts
│ └── worker.test.ts
├── tsconfig.json
├── vercel.json
└── vitest.config.ts
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Report a bug to help improve edgeFlow.js
title: '[Bug] '
labels: bug
assignees: ''
---
**Describe the bug**
A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1. Import '...'
2. Call '...'
3. See error
**Expected behavior**
A clear description of what you expected to happen.
**Code sample**
```typescript
// Minimal reproduction
```
**Environment**
- Browser: [e.g. Chrome 120, Firefox 118]
- OS: [e.g. macOS 14, Windows 11]
- edgeFlow.js version: [e.g. 0.1.0]
- Runtime: [e.g. WebGPU, WASM]
**Additional context**
Any other context about the problem.
================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest a feature for edgeFlow.js
title: '[Feature] '
labels: enhancement
assignees: ''
---
**Is your feature request related to a problem?**
A clear description of the problem. Ex. "I'm always frustrated when..."
**Describe the solution you'd like**
A clear description of what you want to happen.
**Describe alternatives you've considered**
Any alternative solutions or features you've considered.
**Additional context**
Any other context, code examples, or screenshots.
================================================
FILE: .github/pull_request_template.md
================================================
## Summary
Brief description of the changes.
## Motivation
Why is this change needed?
## Changes
- Change 1
- Change 2
## Testing
- [ ] Unit tests pass (`npm run test:unit`)
- [ ] TypeScript compiles (`npx tsc --noEmit`)
- [ ] Lint passes (`npm run lint`)
- [ ] Tested in browser (if applicable)
## Breaking Changes
List any breaking changes, or "None".
================================================
FILE: .github/workflows/ci.yml
================================================
name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
lint-and-typecheck:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 20
cache: npm
- run: npm ci
- run: npm run lint
- run: npx tsc --noEmit
test:
runs-on: ubuntu-latest
needs: lint-and-typecheck
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 20
cache: npm
- run: npm ci
- run: npm run test:unit
- run: npm run test:coverage
- uses: actions/upload-artifact@v4
if: always()
with:
name: coverage-report
path: coverage/
build:
runs-on: ubuntu-latest
needs: test
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 20
cache: npm
- run: npm ci
- run: npm run build
- uses: actions/upload-artifact@v4
with:
name: dist
path: dist/
================================================
FILE: .github/workflows/publish.yml
================================================
name: Publish to npm
on:
release:
types: [published]
jobs:
publish:
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 20
cache: npm
registry-url: https://registry.npmjs.org
- run: npm ci
- run: npm run build
- run: npm run test:unit
- run: npm publish --provenance --access public
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
================================================
FILE: .gitignore
================================================
# Dependencies
node_modules/
# Build outputs (keep dist/ for npm publishing)
# dist/
# IDE
.idea/
.vscode/
*.swp
*.swo
.DS_Store
# Logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# Test coverage
coverage/
# Playwright / E2E test output
test-results/
# Environment
.env
.env.local
.env.*.local
# TypeScript cache
*.tsbuildinfo
# Temporary files
tmp/
temp/
.tmp/
.temp/
.vercel
.env*.local
# Personal docs (not for public repo)
INTERVIEW_PREP.md
================================================
FILE: CLAUDE.md
================================================
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Commands
- **Build:** `npm run build` (runs `tsc` then `scripts/build-browser.js` which produces `dist/edgeflow.browser.js` via esbuild; `onnxruntime-web` is marked external).
- **Watch compile:** `npm run dev`
- **Lint:** `npm run lint` (ESLint on `src/**/*.ts`)
- **Unit/integration tests (vitest, happy-dom):** `npm test` / `npm run test:unit` / `npm run test:integration`
- Single test file: `npx vitest run tests/unit/tokenizer.test.ts`
- Single test by name: `npx vitest run -t "test name pattern"`
- **E2E (Playwright, Chromium):** `npm run test:e2e`
- Uses `playwright.config.ts` by default; alternate configs exist for `localai`, `network`, `privatedoc` scenarios (run with `npx playwright test -c playwright.localai.config.ts`).
- Playwright auto-starts `npm run demo:server` on `localhost:3000`.
- **Demo app:** `npm run demo` (builds then serves `demo/server.js` on port 3000). Load a Hugging Face ONNX URL in the browser UI to exercise pipelines.
- **Docs (VitePress):** `npm run docs:dev` / `npm run docs:build`
## Architecture
edgeFlow.js is a browser-first ML inference framework. The runtime graph is: **Pipeline → BasePipeline → RuntimeManager → Runtime backend (ONNX/WebGPU/WebNN/WASM) → Scheduler → MemoryManager**. All public exports flow through `src/index.ts`.
### Layered structure (`src/`)
- **`core/`** — framework internals. `types.ts` is the canonical type/error surface (`EdgeFlowError`, `ErrorCodes`, all `Tensor`/`Runtime`/`Pipeline` interfaces — most other files import from here). `runtime.ts` holds the `RuntimeManager` singleton, runtime factory registry, and priority-based automatic backend selection (webgpu > webnn > wasm). `scheduler.ts` implements the global priority queue / concurrency-limited `InferenceScheduler` that every runtime dispatches through. `memory.ts` provides `MemoryManager`, `MemoryScope`, and `ModelCache` with reference-counted cleanup. `composer.ts` enables `compose()`/`parallel()` multi-stage pipelines. `plugin.ts` is the extension point for third-party pipelines/backends/middleware. `device-profiler.ts` recommends quantization/model variant based on device tier. `worker.ts` runs inference in a Web Worker.
- **`backends/`** — concrete `Runtime` implementations: `onnx.ts` (onnxruntime-web, peer dep), `webgpu.ts`, `webnn.ts`, `wasm.ts`, plus `transformers-adapter.ts` for interop with transformers.js. `registerAllBackends()` wires factories into `RuntimeManager`.
- **`pipelines/`** — task-specific wrappers extending `base.ts`'s `BasePipeline`. The `pipeline(task, options?)` factory in `index.ts` looks up a registered pipeline factory (built-in or plugin) and returns a ready-to-run instance. Each pipeline owns its own tokenizer/preprocessor, model loading, and result formatting.
- **`utils/`** — `tokenizer.ts` (BPE, WordPiece, Unigram — loads `tokenizer.json` directly), `preprocessor.ts` (image/audio/text), `model-loader.ts` (preloading, sharding, resumable downloads), `cache.ts` (`InferenceCache`, `ModelDownloadCache` — IndexedDB-backed), `hub.ts` (HuggingFace Hub download helpers + `POPULAR_MODELS`), `offline.ts`.
- **`tools/`** — developer tooling surface: `quantization.ts` (int8/uint8/float16 quant + dequant), `debugger.ts` (tensor inspection, histograms, heatmaps, trace events), `monitor.ts` (`PerformanceMonitor` + dashboard generators), `benchmark.ts`.
### Cross-cutting conventions
- **ESM only** (`"type": "module"`, `sideEffects: false`). All intra-repo imports use `.js` extensions even from `.ts` source — required for Node ESM resolution after `tsc` emits.
- **`onnxruntime-web` is an optional peer dep** marked `external` in the browser bundle; consumer bundlers resolve it. Do not import it eagerly in code paths that should run without ONNX.
- **Errors always use `EdgeFlowError` + `ErrorCodes`** from `core/types.ts` — do not throw bare `Error` from library code.
- **Scheduling is mandatory:** runtime inference paths go through `getScheduler()`. New backends should dispatch via the scheduler rather than calling model.run directly, so priority/concurrency controls are honored.
- **Tests:** unit/integration run under happy-dom (no real WebGPU/WebNN); those backends are exercised via Playwright E2E against the demo server. Test timeout is 30s.
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to edgeFlow.js
Thank you for your interest in contributing to edgeFlow.js! This guide will help you get started.
## Development Setup
```bash
# Clone the repository
git clone https://github.com/s-zx/edgeflow.js.git
cd edgeflow.js
# Install dependencies
npm install
# Build the project
npm run build
# Run tests
npm run test:unit
# Start development mode (watch)
npm run dev
```
## Project Structure
```
src/
├── core/ # Runtime, scheduler, memory, tensor, types
├── backends/ # ONNX Runtime (production), WebGPU/WebNN (planned)
├── pipelines/ # Task pipelines (text-generation, image-segmentation, etc.)
├── utils/ # Tokenizer, preprocessor, cache, model-loader, hub
└── tools/ # Quantization, benchmark, debugger, monitor
```
## How to Contribute
### Reporting Bugs
Open an issue using the bug report template. Include:
- A minimal code reproduction
- Browser and OS information
- edgeFlow.js version
### Suggesting Features
Open an issue using the feature request template describing:
- The problem you're trying to solve
- Your proposed solution
- Alternatives you've considered
### Submitting Code
1. Fork the repository
2. Create a feature branch: `git checkout -b feature/my-feature`
3. Make your changes
4. Run checks: `npm run lint && npx tsc --noEmit && npm run test:unit`
5. Commit with a descriptive message
6. Push and open a pull request
### Good First Issues
Look for issues labeled `good first issue`. These are scoped tasks ideal for newcomers:
- Adding tests for uncovered modules
- Improving error messages
- Adding examples
- Documentation improvements
## Code Standards
- **TypeScript strict mode** — all strict options are enabled
- **No `any`** — use proper types; `unknown` if truly dynamic
- **ESM only** — use `.js` extensions in imports
- **No console.log in library code** — use the event system or `console.warn` for important warnings
- **Dispose pattern** — all resources must be disposable to prevent memory leaks
## Testing
```bash
npm run test:unit # Run unit tests
npm run test:integration # Run integration tests
npm run test:coverage # Generate coverage report
npm run test:watch # Watch mode
```
Tests use [Vitest](https://vitest.dev/). Place tests in:
- `tests/unit/` — for isolated unit tests
- `tests/integration/` — for pipeline/backend integration tests
- `tests/e2e/` — for browser-based tests
## Architecture Decisions
edgeFlow.js is designed as an **orchestration layer**, not an inference engine. Key principles:
1. **Backend agnostic** — work with any inference engine (ONNX Runtime, transformers.js, custom)
2. **Production-first** — scheduling, memory management, error recovery matter more than model count
3. **Honest API** — experimental features are clearly labeled, not presented as production-ready
4. **Plugin-friendly** — custom pipelines and backends can be registered at runtime
## License
By contributing, you agree that your contributions will be licensed under the MIT License.
================================================
FILE: README.md
================================================
# edgeFlow.js
**Browser ML inference framework with task scheduling and smart caching.**
[](https://www.npmjs.com/package/edgeflowjs)
[](https://packagephobia.com/result?p=edgeflowjs)
[](LICENSE)
[Documentation](https://edgeflow.js.org) · [Examples](examples/) · [API Reference](https://edgeflow.js.org/api) · [English](README.md) | [中文](README_CN.md)
---
## ✨ Features
- 📋 **Task Scheduler** - Priority queue, concurrency control, task cancellation
- 🔄 **Batch Processing** - Efficient batch inference out of the box
- 💾 **Memory Management** - Automatic memory tracking and cleanup with scopes
- 📥 **Smart Model Loading** - Preloading, sharding, resume download support
- 💿 **Offline Caching** - IndexedDB-based model caching for offline use
- ⚡ **Multi-Backend** - ONNX Runtime with WebGPU/WASM execution providers, automatic fallback
- 🤗 **HuggingFace Hub** - Direct model download with one line
- 🔤 **Real Tokenizers** - BPE & WordPiece tokenizers, load tokenizer.json directly
- 👷 **Web Worker Support** - Run inference in background threads
- 📦 **Batteries Included** - ONNX Runtime bundled, zero configuration needed
- 🎯 **TypeScript First** - Full type support with intuitive APIs
## 📦 Installation
```bash
npm install edgeflowjs
```
```bash
yarn add edgeflowjs
```
```bash
pnpm add edgeflowjs
```
> **Note**: ONNX Runtime is included as a dependency. No additional setup required.
## 🚀 Quick Start
### Try the Demo
Run the interactive demo locally to test all features:
```bash
# Clone and install
git clone https://github.com/user/edgeflow.js.git
cd edgeflow.js
npm install
# Build and start demo server
npm run demo
```
Open **http://localhost:3000** in your browser:
1. **Load Model** - Enter a Hugging Face ONNX model URL and click "Load Model"
```
https://huggingface.co/Xenova/distilbert-base-uncased-finetuned-sst-2-english/resolve/main/onnx/model_quantized.onnx
```
2. **Test Features**:
- 🧮 **Tensor Operations** - Test tensor creation, math ops, softmax, relu
- 📝 **Text Classification** - Run sentiment analysis on text
- 🔍 **Feature Extraction** - Extract embeddings from text
- ⚡ **Task Scheduling** - Test priority-based scheduling
- 📋 **Task Scheduler** - Test priority-based task scheduling
- 💾 **Memory Management** - Test allocation and cleanup
### Basic Usage
```typescript
import { pipeline } from 'edgeflowjs';
// Create a sentiment analysis pipeline
const sentiment = await pipeline('sentiment-analysis');
// Run inference
const result = await sentiment.run('I love this product!');
console.log(result);
// { label: 'positive', score: 0.98, processingTime: 12.5 }
```
### Batch Processing
```typescript
// Native batch processing support
const results = await sentiment.run([
'This is amazing!',
'This is terrible.',
'It\'s okay I guess.'
]);
console.log(results);
// [
// { label: 'positive', score: 0.95 },
// { label: 'negative', score: 0.92 },
// { label: 'neutral', score: 0.68 }
// ]
```
### Multiple Pipelines
```typescript
import { pipeline } from 'edgeflowjs';
// Create multiple pipelines
const classifier = await pipeline('text-classification');
const extractor = await pipeline('feature-extraction');
// Run in parallel with Promise.all
const [classification, features] = await Promise.all([
classifier.run('Sample text'),
extractor.run('Sample text')
]);
```
### Image Classification
```typescript
import { pipeline } from 'edgeflowjs';
const classifier = await pipeline('image-classification');
// From URL
const result = await classifier.run('https://example.com/image.jpg');
// From HTMLImageElement
const img = document.getElementById('myImage');
const result = await classifier.run(img);
// Batch
const results = await classifier.run([img1, img2, img3]);
```
### Text Generation (Streaming)
```typescript
import { pipeline } from 'edgeflowjs';
const generator = await pipeline('text-generation');
// Simple generation
const result = await generator.run('Once upon a time', {
maxNewTokens: 50,
temperature: 0.8,
});
console.log(result.generatedText);
// Streaming output
for await (const event of generator.stream('Hello, ')) {
process.stdout.write(event.token);
if (event.done) break;
}
```
### Zero-shot Classification
```typescript
import { pipeline } from 'edgeflowjs';
const classifier = await pipeline('zero-shot-classification');
const result = await classifier.classify(
'I love playing soccer on weekends',
['sports', 'politics', 'technology', 'entertainment']
);
console.log(result.labels[0], result.scores[0]);
// 'sports', 0.92
```
### Question Answering
```typescript
import { pipeline } from 'edgeflowjs';
const qa = await pipeline('question-answering');
const result = await qa.run({
question: 'What is the capital of France?',
context: 'Paris is the capital and largest city of France.'
});
console.log(result.answer); // 'Paris'
```
### Load from HuggingFace Hub
```typescript
import { fromHub, fromTask } from 'edgeflowjs';
// Load by model ID (auto-downloads model, tokenizer, config)
const bundle = await fromHub('Xenova/distilbert-base-uncased-finetuned-sst-2-english');
console.log(bundle.tokenizer); // Tokenizer instance
console.log(bundle.config); // Model config
// Load by task name (uses recommended model)
const sentimentBundle = await fromTask('sentiment-analysis');
```
### Web Workers (Background Inference)
```typescript
import { runInWorker, WorkerPool, isWorkerSupported } from 'edgeflowjs';
// Simple: run inference in background thread
if (isWorkerSupported()) {
const outputs = await runInWorker(modelUrl, inputs);
}
// Advanced: use worker pool for parallel processing
const pool = new WorkerPool({ numWorkers: 4 });
await pool.init();
const modelId = await pool.loadModel(modelUrl);
const results = await pool.runBatch(modelId, batchInputs);
pool.terminate();
```
## 🎯 Supported Tasks
| Task | Pipeline | Status |
|------|----------|--------|
| Text Generation | `text-generation` | ✅ Production (TinyLlama, streaming, KV cache) |
| Image Segmentation | `image-segmentation` | ✅ Production (SlimSAM, interactive prompts) |
| Text Classification | `text-classification` | ⚠️ Experimental (heuristic, provide own model) |
| Sentiment Analysis | `sentiment-analysis` | ⚠️ Experimental (heuristic, provide own model) |
| Feature Extraction | `feature-extraction` | ⚠️ Experimental (mock embeddings, provide own model) |
| Image Classification | `image-classification` | ⚠️ Experimental (heuristic, provide own model) |
| Object Detection | `object-detection` | ⚠️ Experimental (real NMS/IoU, needs own model) |
| Speech Recognition | `automatic-speech-recognition` | ⚠️ Experimental (preprocessing only, needs model) |
| Zero-shot Classification | `zero-shot-classification` | ⚠️ Experimental (random scoring, needs NLI model) |
| Question Answering | `question-answering` | ⚠️ Experimental (word overlap heuristic, needs model) |
> **Note:** Experimental pipelines work for demos and testing the API surface. For production accuracy, provide a real ONNX model via `options.model` or use the **transformers.js adapter backend** to leverage HuggingFace's model ecosystem.
## ⚡ Key Differentiators
edgeFlow.js is not a replacement for transformers.js — it is a **production orchestration layer** that can wrap any inference engine (including transformers.js) and add the features real apps need.
### What edgeFlow.js adds on top of inference engines
| Feature | Inference engines alone | With edgeFlow.js |
|---------|------------------------|------------------|
| Task Scheduling | None — run and hope | Priority queue with concurrency limits |
| Task Cancellation | Not possible | Cancel pending/queued tasks |
| Batch Processing | Manual | Built-in batching with configurable size |
| Memory Management | Manual cleanup | Automatic scopes, leak detection, GC hints |
| Model Preloading | Manual | Background preloading with priority queue |
| Resume Download | Start over on failure | Chunked download with automatic resume |
| Model Caching | Basic or none | IndexedDB cache with stats and eviction |
| Pipeline Composition | Not available | Chain multiple models (ASR → translate → TTS) |
| Device Adaptation | Manual model selection | Auto-select model variant by device capability |
| Performance Monitoring | External tooling needed | Built-in dashboard and alerting |
## 🔌 transformers.js Adapter (Recommended)
Use edgeFlow.js as an orchestration layer on top of [transformers.js](https://huggingface.co/docs/transformers.js) to get access to 1000+ HuggingFace models with scheduling, caching, and memory management:
```typescript
import { pipeline as tfPipeline } from '@xenova/transformers';
import { useTransformersBackend, pipeline } from 'edgeflowjs';
// Register transformers.js as the inference backend
useTransformersBackend({
pipelineFactory: tfPipeline,
device: 'webgpu', // GPU acceleration
dtype: 'fp16', // Half precision
});
// Use edgeFlow.js API — scheduling, caching, memory management included
const classifier = await pipeline('text-classification', {
model: 'Xenova/distilbert-base-uncased-finetuned-sst-2-english',
});
const result = await classifier.run('I love this product!');
```
> **Why?** transformers.js is excellent at loading and running single models. edgeFlow.js adds the production features you need when running multiple models, managing memory on constrained devices, caching for offline use, and scheduling concurrent inference.
## 🔧 Configuration
### Runtime Selection
```typescript
import { pipeline } from 'edgeflowjs';
// Automatic (recommended)
const model = await pipeline('text-classification');
// Specify runtime
const model = await pipeline('text-classification', {
runtime: 'webgpu' // or 'webnn', 'wasm', 'auto'
});
```
### Memory Management
```typescript
import { pipeline, getMemoryStats, gc } from 'edgeflowjs';
const model = await pipeline('text-classification');
// Use the model
await model.run('text');
// Check memory usage
console.log(getMemoryStats());
// { allocated: 50MB, used: 45MB, peak: 52MB, tensorCount: 12 }
// Explicit cleanup
model.dispose();
// Force garbage collection
gc();
```
### Scheduler Configuration
```typescript
import { configureScheduler } from 'edgeflowjs';
configureScheduler({
maxConcurrentTasks: 4,
maxConcurrentPerModel: 1,
defaultTimeout: 30000,
enableBatching: true,
maxBatchSize: 32,
});
```
### Caching
```typescript
import { pipeline, Cache } from 'edgeflowjs';
// Create a cache
const cache = new Cache({
strategy: 'lru',
maxSize: 100 * 1024 * 1024, // 100MB
persistent: true, // Use IndexedDB
});
const model = await pipeline('text-classification', {
cache: true
});
```
## 🛠️ Advanced Usage
### Custom Model Loading
```typescript
import { loadModel, runInference } from 'edgeflowjs';
// Load from URL with caching, sharding, and resume support
const model = await loadModel('https://example.com/model.bin', {
runtime: 'webgpu',
quantization: 'int8',
cache: true, // Enable IndexedDB caching (default: true)
resumable: true, // Enable resume download (default: true)
chunkSize: 5 * 1024 * 1024, // 5MB chunks for large models
onProgress: (progress) => console.log(`Loading: ${progress * 100}%`)
});
// Run inference
const outputs = await runInference(model, inputs);
// Cleanup
model.dispose();
```
### Preloading Models
```typescript
import { preloadModel, preloadModels, getPreloadStatus } from 'edgeflowjs';
// Preload a single model in background (with priority)
preloadModel('https://example.com/model1.onnx', { priority: 10 });
// Preload multiple models
preloadModels([
{ url: 'https://example.com/model1.onnx', priority: 10 },
{ url: 'https://example.com/model2.onnx', priority: 5 },
]);
// Check preload status
const status = getPreloadStatus('https://example.com/model1.onnx');
// 'pending' | 'loading' | 'complete' | 'error' | 'not_found'
```
### Model Caching
```typescript
import {
isModelCached,
getCachedModel,
deleteCachedModel,
clearModelCache,
getModelCacheStats
} from 'edgeflowjs';
// Check if model is cached
if (await isModelCached('https://example.com/model.onnx')) {
console.log('Model is cached!');
}
// Get cached model data directly
const modelData = await getCachedModel('https://example.com/model.onnx');
// Delete a specific cached model
await deleteCachedModel('https://example.com/model.onnx');
// Clear all cached models
await clearModelCache();
// Get cache statistics
const stats = await getModelCacheStats();
console.log(`${stats.models} models cached, ${stats.totalSize} bytes total`);
```
### Resume Downloads
Large model downloads automatically support resuming from where they left off:
```typescript
import { loadModelData } from 'edgeflowjs';
// Download with progress and resume support
const modelData = await loadModelData('https://example.com/large-model.onnx', {
resumable: true,
chunkSize: 10 * 1024 * 1024, // 10MB chunks
parallelConnections: 4, // Download 4 chunks in parallel
onProgress: (progress) => {
console.log(`${progress.percent.toFixed(1)}% downloaded`);
console.log(`Speed: ${(progress.speed / 1024 / 1024).toFixed(2)} MB/s`);
console.log(`ETA: ${(progress.eta / 1000).toFixed(0)}s`);
console.log(`Chunk ${progress.currentChunk}/${progress.totalChunks}`);
}
});
```
### Model Quantization
```typescript
import { quantize } from 'edgeflowjs/tools';
const quantized = await quantize(model, {
method: 'int8',
calibrationData: samples,
});
console.log(`Compression: ${quantized.compressionRatio}x`);
// Compression: 3.8x
```
### Benchmarking
```typescript
import { benchmark } from 'edgeflowjs/tools';
const result = await benchmark(
() => model.run('sample text'),
{ warmupRuns: 5, runs: 100 }
);
console.log(result);
// {
// avgTime: 12.5,
// minTime: 10.2,
// maxTime: 18.3,
// throughput: 80 // inferences/sec
// }
```
### Memory Scope
```typescript
import { withMemoryScope, tensor } from 'edgeflowjs';
const result = await withMemoryScope(async (scope) => {
// Tensors tracked in scope
const a = scope.track(tensor([1, 2, 3]));
const b = scope.track(tensor([4, 5, 6]));
// Process...
const output = process(a, b);
// Keep result, dispose others
return scope.keep(output);
});
// a and b automatically disposed
```
## 🔌 Tensor Operations
```typescript
import { tensor, zeros, ones, matmul, softmax, relu } from 'edgeflowjs';
// Create tensors
const a = tensor([[1, 2], [3, 4]]);
const b = zeros([2, 2]);
const c = ones([2, 2]);
// Operations
const d = matmul(a, c);
const probs = softmax(d);
const activated = relu(d);
// Cleanup
a.dispose();
b.dispose();
c.dispose();
```
## 🌐 Browser Support
| Browser | WebGPU | WebNN | WASM |
|---------|--------|-------|------|
| Chrome 113+ | ✅ | ✅ | ✅ |
| Edge 113+ | ✅ | ✅ | ✅ |
| Firefox 118+ | ⚠️ Flag | ❌ | ✅ |
| Safari 17+ | ⚠️ Preview | ❌ | ✅ |
## Star History
[](https://www.star-history.com/?repos=s-zx%2FedgeFlow.js&type=date&legend=top-left)
## 📖 API Reference
### Core
- `pipeline(task, options?)` - Create a pipeline for a task
- `loadModel(url, options?)` - Load a model from URL
- `runInference(model, inputs)` - Run model inference
- `getScheduler()` - Get the global scheduler
- `getMemoryManager()` - Get the memory manager
- `runInWorker(url, inputs)` - Run inference in a Web Worker
- `WorkerPool` - Manage multiple workers for parallel inference
### Pipelines
- `TextClassificationPipeline` - Text/sentiment classification
- `SentimentAnalysisPipeline` - Sentiment analysis
- `FeatureExtractionPipeline` - Text embeddings
- `ImageClassificationPipeline` - Image classification
- `TextGenerationPipeline` - Text generation with streaming
- `ObjectDetectionPipeline` - Object detection with bounding boxes
- `AutomaticSpeechRecognitionPipeline` - Speech to text
- `ZeroShotClassificationPipeline` - Classify without training
- `QuestionAnsweringPipeline` - Extractive QA
### HuggingFace Hub
- `fromHub(modelId, options?)` - Load model bundle from HuggingFace
- `fromTask(task, options?)` - Load recommended model for task
- `downloadTokenizer(modelId)` - Download tokenizer only
- `downloadConfig(modelId)` - Download config only
- `POPULAR_MODELS` - Registry of popular models by task
### Utilities
- `Tokenizer` - BPE/WordPiece tokenization with HuggingFace support
- `ImagePreprocessor` - Image preprocessing with HuggingFace config support
- `AudioPreprocessor` - Audio preprocessing for Whisper/wav2vec
- `Cache` - LRU caching utilities
### Tools
- `quantize(model, options)` - Quantize a model
- `prune(model, options)` - Prune model weights
- `benchmark(fn, options)` - Benchmark inference
- `analyzeModel(model)` - Analyze model structure
## 🤝 Contributing
We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
1. Fork the repository
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
3. Commit your changes (`git commit -m 'Add amazing feature'`)
4. Push to the branch (`git push origin feature/amazing-feature`)
5. Open a Pull Request
## 📄 License
MIT © edgeFlow.js Contributors
---
**[Get Started](https://edgeflow.js.org/getting-started) · [API Docs](https://edgeflow.js.org/api) · [Examples](examples/)**
Made with ❤️ for the edge AI community
================================================
FILE: README_CN.md
================================================
# edgeFlow.js
**浏览器端机器学习推理框架,内置任务调度和智能缓存**
[](https://www.npmjs.com/package/edgeflowjs)
[](https://packagephobia.com/result?p=edgeflowjs)
[](LICENSE)
[文档](https://edgeflow.js.org) · [示例](examples/) · [API 参考](https://edgeflow.js.org/api) · [English](README.md) | [中文](README_CN.md)
---
## ✨ 特性
- 📋 **任务调度器** - 优先级队列、并发控制、任务取消
- 🔄 **批量处理** - 开箱即用的高效批量推理
- 💾 **内存管理** - 自动内存追踪和作用域清理
- 📥 **智能模型加载** - 支持预加载、分片下载、断点续传
- 💿 **离线缓存** - 基于 IndexedDB 的模型缓存,支持离线使用
- ⚡ **多后端支持** - WebGPU、WebNN、WASM 自动降级
- 🤗 **HuggingFace Hub** - 一行代码从 HuggingFace 下载模型
- 🔤 **真实分词器** - BPE 和 WordPiece 分词器,直接加载 tokenizer.json
- 👷 **Web Worker 支持** - 在后台线程运行推理
- 📦 **开箱即用** - 内置 ONNX Runtime,零配置直接使用
- 🎯 **TypeScript 优先** - 完整的类型支持和直观的 API
## 📦 安装
```bash
npm install edgeflowjs
```
```bash
yarn add edgeflowjs
```
```bash
pnpm add edgeflowjs
```
> **注意**: ONNX Runtime 已作为依赖包含,无需额外配置。
## 🚀 快速开始
### 体验 Demo
在本地运行交互式 Demo 测试所有功能:
```bash
# 克隆并安装
git clone https://github.com/user/edgeflow.js.git
cd edgeflow.js
npm install
# 构建并启动 Demo 服务器
npm run demo
```
在浏览器中打开 **http://localhost:3000**:
1. **加载模型** - 输入 Hugging Face ONNX 模型 URL 并点击 "Load Model"
```
https://huggingface.co/Xenova/distilbert-base-uncased-finetuned-sst-2-english/resolve/main/onnx/model_quantized.onnx
```
2. **测试功能**:
- 🧮 **张量运算** - 测试张量创建、数学运算、softmax、relu
- 📝 **文本分类** - 对文本进行情感分析
- 🔍 **特征提取** - 从文本中提取嵌入向量
- ⚡ **任务调度** - 测试优先级调度
- 📋 **任务调度** - 测试基于优先级的任务调度
- 💾 **内存管理** - 测试内存分配和清理
### 基础用法
```typescript
import { pipeline } from 'edgeflowjs';
// 创建情感分析流水线
const sentiment = await pipeline('sentiment-analysis');
// 运行推理
const result = await sentiment.run('I love this product!');
console.log(result);
// { label: 'positive', score: 0.98, processingTime: 12.5 }
```
### 批量处理
```typescript
// 原生批处理支持
const results = await sentiment.run([
'This is amazing!',
'This is terrible.',
'It\'s okay I guess.'
]);
console.log(results);
// [
// { label: 'positive', score: 0.95 },
// { label: 'negative', score: 0.92 },
// { label: 'neutral', score: 0.68 }
// ]
```
### 多流水线
```typescript
import { pipeline } from 'edgeflowjs';
// 创建多个流水线
const classifier = await pipeline('text-classification');
const extractor = await pipeline('feature-extraction');
// 使用 Promise.all 并行运行
const [classification, features] = await Promise.all([
classifier.run('Sample text'),
extractor.run('Sample text')
]);
```
### 图像分类
```typescript
import { pipeline } from 'edgeflowjs';
const classifier = await pipeline('image-classification');
// 从 URL 加载
const result = await classifier.run('https://example.com/image.jpg');
// 从 HTMLImageElement 加载
const img = document.getElementById('myImage');
const result = await classifier.run(img);
// 批量处理
const results = await classifier.run([img1, img2, img3]);
```
### 文本生成(流式输出)
```typescript
import { pipeline } from 'edgeflowjs';
const generator = await pipeline('text-generation');
// 简单生成
const result = await generator.run('从前有座山', {
maxNewTokens: 50,
temperature: 0.8,
});
console.log(result.generatedText);
// 流式输出
for await (const event of generator.stream('你好,')) {
process.stdout.write(event.token);
if (event.done) break;
}
```
### 零样本分类
```typescript
import { pipeline } from 'edgeflowjs';
const classifier = await pipeline('zero-shot-classification');
const result = await classifier.classify(
'周末我喜欢踢足球',
['体育', '政治', '科技', '娱乐']
);
console.log(result.labels[0], result.scores[0]);
// '体育', 0.92
```
### 问答系统
```typescript
import { pipeline } from 'edgeflowjs';
const qa = await pipeline('question-answering');
const result = await qa.run({
question: '法国的首都是什么?',
context: '巴黎是法国的首都和最大城市。'
});
console.log(result.answer); // '巴黎'
```
### 从 HuggingFace Hub 加载
```typescript
import { fromHub, fromTask } from 'edgeflowjs';
// 通过模型 ID 加载(自动下载模型、分词器、配置)
const bundle = await fromHub('Xenova/distilbert-base-uncased-finetuned-sst-2-english');
console.log(bundle.tokenizer); // Tokenizer 实例
console.log(bundle.config); // 模型配置
// 通过任务名称加载(使用推荐模型)
const sentimentBundle = await fromTask('sentiment-analysis');
```
### Web Workers(后台推理)
```typescript
import { runInWorker, WorkerPool, isWorkerSupported } from 'edgeflowjs';
// 简单:在后台线程运行推理
if (isWorkerSupported()) {
const outputs = await runInWorker(modelUrl, inputs);
}
// 高级:使用 Worker 池进行并行处理
const pool = new WorkerPool({ numWorkers: 4 });
await pool.init();
const modelId = await pool.loadModel(modelUrl);
const results = await pool.runBatch(modelId, batchInputs);
pool.terminate();
```
## 🎯 支持的任务
| 任务 | 流水线 | 状态 |
|------|--------|------|
| 文本分类 | `text-classification` | ✅ |
| 情感分析 | `sentiment-analysis` | ✅ |
| 特征提取 | `feature-extraction` | ✅ |
| 图像分类 | `image-classification` | ✅ |
| 文本生成 | `text-generation` | ✅ |
| 目标检测 | `object-detection` | ✅ |
| 语音识别 | `automatic-speech-recognition` | ✅ |
| 零样本分类 | `zero-shot-classification` | ✅ |
| 问答系统 | `question-answering` | ✅ |
## ⚡ 核心差异
### 与 transformers.js 对比
| 特性 | transformers.js | edgeFlow.js |
|------|-----------------|-------------|
| 任务调度器 | ❌ 无 | ✅ 优先级队列 + 并发限制 |
| 任务取消 | ❌ 无 | ✅ 支持取消排队任务 |
| 批量处理 | ⚠️ 手动 | ✅ 内置批处理 |
| 内存作用域 | ❌ 无 | ✅ 作用域自动清理 |
| 模型预加载 | ❌ 无 | ✅ 后台加载 |
| 断点续传 | ❌ 无 | ✅ 分片 + 续传 |
| 模型缓存 | ⚠️ 基础 | ✅ IndexedDB + 统计 |
| TypeScript | ✅ 完整 | ✅ 完整 |
## 🔧 配置
### 运行时选择
```typescript
import { pipeline } from 'edgeflowjs';
// 自动选择(推荐)
const model = await pipeline('text-classification');
// 指定运行时
const model = await pipeline('text-classification', {
runtime: 'webgpu' // 或 'webnn', 'wasm', 'auto'
});
```
### 内存管理
```typescript
import { pipeline, getMemoryStats, gc } from 'edgeflowjs';
const model = await pipeline('text-classification');
// 使用模型
await model.run('text');
// 检查内存使用
console.log(getMemoryStats());
// { allocated: 50MB, used: 45MB, peak: 52MB, tensorCount: 12 }
// 显式清理
model.dispose();
// 强制垃圾回收
gc();
```
### 调度器配置
```typescript
import { configureScheduler } from 'edgeflowjs';
configureScheduler({
maxConcurrentTasks: 4,
maxConcurrentPerModel: 1,
defaultTimeout: 30000,
enableBatching: true,
maxBatchSize: 32,
});
```
### 缓存
```typescript
import { pipeline, Cache } from 'edgeflowjs';
// 创建缓存
const cache = new Cache({
strategy: 'lru',
maxSize: 100 * 1024 * 1024, // 100MB
persistent: true, // 使用 IndexedDB
});
const model = await pipeline('text-classification', {
cache: true
});
```
## 🛠️ 高级用法
### 自定义模型加载
```typescript
import { loadModel, runInference } from 'edgeflowjs';
// 从 URL 加载,支持缓存、分片和断点续传
const model = await loadModel('https://example.com/model.bin', {
runtime: 'webgpu',
quantization: 'int8',
cache: true, // 启用 IndexedDB 缓存(默认: true)
resumable: true, // 启用断点续传(默认: true)
chunkSize: 5 * 1024 * 1024, // 大模型使用 5MB 分片
onProgress: (progress) => console.log(`加载中: ${progress * 100}%`)
});
// 运行推理
const outputs = await runInference(model, inputs);
// 清理
model.dispose();
```
### 模型预加载
```typescript
import { preloadModel, preloadModels, getPreloadStatus } from 'edgeflowjs';
// 后台预加载单个模型(支持优先级)
preloadModel('https://example.com/model1.onnx', { priority: 10 });
// 预加载多个模型
preloadModels([
{ url: 'https://example.com/model1.onnx', priority: 10 },
{ url: 'https://example.com/model2.onnx', priority: 5 },
]);
// 检查预加载状态
const status = getPreloadStatus('https://example.com/model1.onnx');
// 'pending' | 'loading' | 'complete' | 'error' | 'not_found'
```
### 模型缓存
```typescript
import {
isModelCached,
getCachedModel,
deleteCachedModel,
clearModelCache,
getModelCacheStats
} from 'edgeflowjs';
// 检查模型是否已缓存
if (await isModelCached('https://example.com/model.onnx')) {
console.log('模型已缓存!');
}
// 直接获取缓存的模型数据
const modelData = await getCachedModel('https://example.com/model.onnx');
// 删除特定缓存的模型
await deleteCachedModel('https://example.com/model.onnx');
// 清空所有缓存的模型
await clearModelCache();
// 获取缓存统计
const stats = await getModelCacheStats();
console.log(`${stats.models} 个模型已缓存,共 ${stats.totalSize} 字节`);
```
### 断点续传下载
大模型下载自动支持从断点处继续:
```typescript
import { loadModelData } from 'edgeflowjs';
// 带进度和断点续传的下载
const modelData = await loadModelData('https://example.com/large-model.onnx', {
resumable: true,
chunkSize: 10 * 1024 * 1024, // 10MB 分片
parallelConnections: 4, // 并行下载 4 个分片
onProgress: (progress) => {
console.log(`${progress.percent.toFixed(1)}% 已下载`);
console.log(`速度: ${(progress.speed / 1024 / 1024).toFixed(2)} MB/s`);
console.log(`预计剩余: ${(progress.eta / 1000).toFixed(0)}秒`);
console.log(`分片 ${progress.currentChunk}/${progress.totalChunks}`);
}
});
```
### 模型量化
```typescript
import { quantize } from 'edgeflowjs/tools';
const quantized = await quantize(model, {
method: 'int8',
calibrationData: samples,
});
console.log(`压缩比: ${quantized.compressionRatio}x`);
// 压缩比: 3.8x
```
### 性能测试
```typescript
import { benchmark } from 'edgeflowjs/tools';
const result = await benchmark(
() => model.run('sample text'),
{ warmupRuns: 5, runs: 100 }
);
console.log(result);
// {
// avgTime: 12.5,
// minTime: 10.2,
// maxTime: 18.3,
// throughput: 80 // 推理次数/秒
// }
```
### 内存作用域
```typescript
import { withMemoryScope, tensor } from 'edgeflowjs';
const result = await withMemoryScope(async (scope) => {
// 在作用域中追踪张量
const a = scope.track(tensor([1, 2, 3]));
const b = scope.track(tensor([4, 5, 6]));
// 处理...
const output = process(a, b);
// 保留结果,释放其他
return scope.keep(output);
});
// a 和 b 自动释放
```
## 🔌 张量操作
```typescript
import { tensor, zeros, ones, matmul, softmax, relu } from 'edgeflowjs';
// 创建张量
const a = tensor([[1, 2], [3, 4]]);
const b = zeros([2, 2]);
const c = ones([2, 2]);
// 运算
const d = matmul(a, c);
const probs = softmax(d);
const activated = relu(d);
// 清理
a.dispose();
b.dispose();
c.dispose();
```
## 🌐 浏览器支持
| 浏览器 | WebGPU | WebNN | WASM |
|--------|--------|-------|------|
| Chrome 113+ | ✅ | ✅ | ✅ |
| Edge 113+ | ✅ | ✅ | ✅ |
| Firefox 118+ | ⚠️ 需开启 | ❌ | ✅ |
| Safari 17+ | ⚠️ 预览版 | ❌ | ✅ |
## Star History
[](https://www.star-history.com/?repos=s-zx%2FedgeFlow.js&type=date&legend=top-left)
## 📖 API 参考
### 核心
- `pipeline(task, options?)` - 为任务创建流水线
- `loadModel(url, options?)` - 从 URL 加载模型
- `runInference(model, inputs)` - 运行模型推理
- `getScheduler()` - 获取全局调度器
- `getMemoryManager()` - 获取内存管理器
- `runInWorker(url, inputs)` - 在 Web Worker 中运行推理
- `WorkerPool` - 管理多个 Worker 进行并行推理
### 流水线
- `TextClassificationPipeline` - 文本/情感分类
- `SentimentAnalysisPipeline` - 情感分析
- `FeatureExtractionPipeline` - 文本嵌入
- `ImageClassificationPipeline` - 图像分类
- `TextGenerationPipeline` - 文本生成(支持流式输出)
- `ObjectDetectionPipeline` - 目标检测(带边界框)
- `AutomaticSpeechRecognitionPipeline` - 语音转文字
- `ZeroShotClassificationPipeline` - 零样本分类
- `QuestionAnsweringPipeline` - 抽取式问答
### HuggingFace Hub
- `fromHub(modelId, options?)` - 从 HuggingFace 加载模型包
- `fromTask(task, options?)` - 按任务加载推荐模型
- `downloadTokenizer(modelId)` - 仅下载分词器
- `downloadConfig(modelId)` - 仅下载配置
- `POPULAR_MODELS` - 按任务分类的热门模型注册表
### 工具类
- `Tokenizer` - BPE/WordPiece 分词器,支持 HuggingFace 格式
- `ImagePreprocessor` - 图像预处理器,支持 HuggingFace 配置
- `AudioPreprocessor` - 音频预处理器,支持 Whisper/wav2vec
- `Cache` - LRU 缓存工具
### 工具
- `quantize(model, options)` - 模型量化
- `prune(model, options)` - 模型剪枝
- `benchmark(fn, options)` - 性能基准测试
- `analyzeModel(model)` - 分析模型结构
## 🤝 贡献
欢迎贡献!请查看我们的 [贡献指南](CONTRIBUTING.md) 了解详情。
1. Fork 本仓库
2. 创建特性分支 (`git checkout -b feature/amazing-feature`)
3. 提交更改 (`git commit -m 'Add amazing feature'`)
4. 推送到分支 (`git push origin feature/amazing-feature`)
5. 发起 Pull Request
## 📄 许可证
MIT © edgeFlow.js Contributors
---
**[快速开始](https://edgeflow.js.org/getting-started) · [API 文档](https://edgeflow.js.org/api) · [示例](examples/)**
用 ❤️ 为边缘 AI 社区打造
================================================
FILE: benchmarks/README.md
================================================
# edgeFlow.js Benchmarks
This directory contains performance benchmarks for edgeFlow.js.
## Running Benchmarks
```bash
npm install
npm run build
npm run test -- --run tests/unit/
```
> **Note:** A dedicated `npm run benchmark` script with browser-based benchmarks is planned. The unit tests include basic tensor and scheduler performance validation.
## Benchmark Types
### 1. Tensor Operations
- Tensor creation and disposal
- Shape transformation (reshape, transpose)
- Math operations (add, matmul, softmax)
### 2. Scheduler Throughput
- Priority queue ordering under load
- Concurrent task execution
- Task cancellation overhead
### 3. Model Loading
- Cached vs uncached loads (IndexedDB)
- Chunked download with resume
- Preloading pipeline
### 4. Inference Latency
- Text generation (TinyLlama) end-to-end
- Image segmentation (SlimSAM) encode + decode
## How edgeFlow.js Adds Value
edgeFlow.js is not a replacement for inference engines like ONNX Runtime or transformers.js. It is an **orchestration layer** that adds production features on top of them:
| Scenario | Without edgeFlow.js | With edgeFlow.js |
|----------|---------------------|------------------|
| 5 concurrent model calls | Uncontrolled, may OOM | Scheduled with concurrency limits |
| Repeated inference on same input | Recomputed every time | Cached results (LRU/TTL) |
| Large model download interrupted | Start from scratch | Resume from last chunk |
| Memory leak from undisposed tensors | Silent leak | Detected and warned |
> All benchmark claims will be backed by reproducible scripts before the 1.0 release.
## Custom Benchmarks
```typescript
import { runBenchmark, benchmarkSuite } from 'edgeflowjs/tools';
const result = await runBenchmark(
async () => {
await model.run(input);
},
{
warmupRuns: 5,
runs: 20,
verbose: true,
}
);
console.log(`Average: ${result.avgTime.toFixed(2)}ms`);
console.log(`Throughput: ${result.throughput.toFixed(2)} ops/sec`);
const results = await benchmarkSuite({
'small-model': async () => smallModel.run(input),
'large-model': async () => largeModel.run(input),
});
```
================================================
FILE: demo/demo.js
================================================
/**
* edgeFlow.js Interactive Demo
*
* Organized into modules:
* 1. State & Config
* 2. Utilities
* 3. UI Helpers
* 4. Core Features
* 5. SAM Interactive Segmentation (Real Model)
* 6. AI Chat (Real Model)
* 7. Demo Class (Public API)
* 8. Initialization
*/
import * as edgeFlow from '/dist/edgeflow.browser.js';
// Expose edgeFlow globally for debugging
window.edgeFlow = edgeFlow;
/* ==========================================================================
1. State & Config
========================================================================== */
const state = {
model: null,
testTensors: [],
monitor: null,
// SAM state
samPipeline: null,
samModelLoaded: false,
samImage: null,
samPoints: [],
samCanvas: null,
samMaskCanvas: null,
samCtx: null,
samMaskCtx: null,
// Chat state
chatPipeline: null,
chatModelLoaded: false,
chatHistory: [],
chatGenerating: false,
};
const config = {
defaultSeqLen: 128,
monitorSampleInterval: 500,
monitorHistorySize: 30,
};
/* ==========================================================================
2. Utilities
========================================================================== */
const utils = {
/**
* Format bytes to human readable string
*/
formatBytes(bytes) {
if (!bytes) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i];
},
/**
* Sleep for given milliseconds
*/
sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
},
/**
* Generate placeholder model inputs based on model metadata
*/
createModelInputs(model, seqLen = config.defaultSeqLen) {
return model.metadata.inputs.map(spec => {
const data = new Array(seqLen).fill(0);
if (spec.name.includes('input')) {
data[0] = 101; // [CLS]
data[1] = 2054; // sample token
data[2] = 102; // [SEP]
} else if (spec.name.includes('mask')) {
data[0] = 1;
data[1] = 1;
data[2] = 1;
}
return edgeFlow.tensor(data, [1, seqLen], 'int64');
});
},
/**
* Simple tokenization and inference
*/
async inferText(text) {
if (!state.model) throw new Error('Model not loaded');
const tokens = text.toLowerCase().split(/\s+/);
const maxLen = config.defaultSeqLen;
const numTokens = Math.min(tokens.length + 2, maxLen);
const inputs = state.model.metadata.inputs.map(spec => {
const data = new Array(maxLen).fill(0);
if (spec.name.includes('input')) {
data[0] = 101; // [CLS]
tokens.slice(0, maxLen - 2).forEach((t, i) => {
// Simple hash-based token ID (demo only)
data[i + 1] = Math.abs(t.split('').reduce((a, c) => a + c.charCodeAt(0), 0)) % 30000;
});
data[numTokens - 1] = 102; // [SEP]
} else if (spec.name.includes('mask')) {
for (let i = 0; i < numTokens; i++) data[i] = 1;
}
return edgeFlow.tensor(data, [1, maxLen], 'int64');
});
const outputs = await edgeFlow.runInference(state.model, inputs);
const outputData = outputs[0].toArray();
// Calculate sentiment score
const score = outputData.length >= 2
? Math.exp(outputData[1]) / (Math.exp(outputData[0]) + Math.exp(outputData[1]))
: outputData[0] > 0.5 ? outputData[0] : 1 - outputData[0];
// Cleanup
inputs.forEach(t => t.dispose());
outputs.forEach(t => t.dispose());
return {
label: score > 0.5 ? 'positive' : 'negative',
score,
};
},
};
/* ==========================================================================
3. UI Helpers
========================================================================== */
const ui = {
/**
* Get element by ID
*/
$(id) {
return document.getElementById(id);
},
/**
* Set output content
*/
setOutput(id, content, type = '') {
const el = this.$(id);
if (!el) return;
const className = type ? `class="${type}"` : '';
el.innerHTML = `${content} `;
},
/**
* Show loading state
*/
showLoading(id, message = 'Loading...') {
this.setOutput(id, ` ${message}`);
},
/**
* Show success message
*/
showSuccess(id, message) {
this.setOutput(id, `✓ ${message}`, 'success');
},
/**
* Show error message
*/
showError(id, error) {
const message = error instanceof Error ? error.message : String(error);
this.setOutput(id, `Error: ${message}`, 'error');
},
/**
* Render status list
*/
renderStatusList(id, items) {
const el = this.$(id);
if (!el) return;
el.innerHTML = items.map(({ label, value, status }) => `
${label}
${value}
`).join('');
},
/**
* Render metrics
*/
renderMetrics(id, metrics) {
const el = this.$(id);
if (!el) return;
el.innerHTML = metrics.map(({ value, label }) => `
`).join('');
el.classList.remove('hidden');
},
/**
* Update runtime status
*/
async updateRuntimeStatus() {
try {
const runtimes = await edgeFlow.getAvailableRuntimes();
this.renderStatusList('runtime-status', [
{ label: 'WebGPU', value: runtimes.get('webgpu') ? 'Ready' : 'N/A', status: runtimes.get('webgpu') ? 'success' : 'error' },
{ label: 'WebNN', value: runtimes.get('webnn') ? 'Ready' : 'N/A', status: runtimes.get('webnn') ? 'success' : 'error' },
{ label: 'WASM', value: runtimes.get('wasm') ? 'Ready' : 'N/A', status: runtimes.get('wasm') ? 'success' : 'error' },
]);
} catch {
this.renderStatusList('runtime-status', [
{ label: 'WebGPU', value: 'N/A', status: 'error' },
{ label: 'WebNN', value: 'N/A', status: 'error' },
{ label: 'WASM', value: 'N/A', status: 'error' },
]);
}
},
/**
* Update memory status
*/
updateMemoryStatus() {
try {
const stats = edgeFlow.getMemoryStats();
this.renderStatusList('memory-status', [
{ label: 'Allocated', value: utils.formatBytes(stats.allocated || 0) },
{ label: 'Peak', value: utils.formatBytes(stats.peak || 0) },
{ label: 'Tensors', value: String(stats.tensorCount || 0) },
]);
} catch {
this.renderStatusList('memory-status', [
{ label: 'Allocated', value: '0 B' },
{ label: 'Peak', value: '0 B' },
{ label: 'Tensors', value: '0' },
]);
}
},
/**
* Update monitor metrics
*/
updateMonitorMetrics(sample) {
this.renderMetrics('monitor-metrics', [
{ value: sample.inference.count, label: 'Inferences' },
{ value: sample.inference.avgTime.toFixed(1) + 'ms', label: 'Avg Time' },
{ value: sample.inference.throughput.toFixed(1), label: 'Ops/sec' },
{ value: utils.formatBytes(sample.memory.usedHeap), label: 'Memory' },
{ value: sample.system.fps || '-', label: 'FPS' },
]);
},
/**
* Initialize default outputs
*/
initOutputs() {
const defaults = {
'model-output': ['Click "Load Model" to download an ONNX model', 'info'],
'tensor-output': ['Click "Run Tests" to test tensor operations...', ''],
'text-output': ['Load model first, then classify text...', ''],
'feature-output': ['Enter text and extract features...', ''],
'quant-output': ['Test in-browser quantization...', ''],
'debugger-output': ['Inspect tensor values and statistics...', ''],
'benchmark-output': ['Benchmark tensor operations...', ''],
'scheduler-output': ['Test task scheduling with priorities...', ''],
'memory-output': ['Test memory allocation and cleanup...', ''],
'concurrency-output': ['Test concurrent inference...', ''],
};
for (const [id, [msg, type]] of Object.entries(defaults)) {
this.setOutput(id, msg, type);
}
// Initialize monitor metrics
this.renderMetrics('monitor-metrics', [
{ value: '0', label: 'Inferences' },
{ value: '0ms', label: 'Avg Time' },
{ value: '0', label: 'Ops/sec' },
{ value: '0 B', label: 'Memory' },
{ value: '-', label: 'FPS' },
]);
},
};
/* ==========================================================================
4. Core Features
========================================================================== */
const features = {
/**
* Load ONNX model
*/
async loadModel() {
const url = ui.$('model-url')?.value;
if (!url) {
ui.setOutput('model-output', 'Enter a model URL', 'warn');
return;
}
ui.showLoading('model-output', 'Loading model...');
try {
const start = performance.now();
state.model = await edgeFlow.loadModel(url, { runtime: 'wasm' });
const time = ((performance.now() - start) / 1000).toFixed(2);
const info = [
`✓ Model loaded in ${time}s `,
`Name: ${state.model.metadata.name}`,
`Size: ${utils.formatBytes(state.model.metadata.sizeBytes)}`,
`Inputs: ${state.model.metadata.inputs.map(i => i.name).join(', ')}`,
].join('\n');
ui.$('model-output').innerHTML = `${info} `;
ui.updateMemoryStatus();
} catch (e) {
ui.showError('model-output', e);
}
},
/**
* Test model inference
*/
async testModel() {
if (!state.model) {
ui.setOutput('model-output', 'Load model first', 'warn');
return;
}
ui.showLoading('model-output', 'Running inference...');
try {
const inputs = utils.createModelInputs(state.model);
const start = performance.now();
const outputs = await edgeFlow.runInference(state.model, inputs);
const time = (performance.now() - start).toFixed(2);
const data = outputs[0].toArray();
const info = [
`✓ Inference: ${time}ms `,
`Output: [${data.slice(0, 5).map(x => x.toFixed(4)).join(', ')}...]`,
].join('\n');
ui.$('model-output').innerHTML = `${info} `;
inputs.forEach(t => t.dispose());
outputs.forEach(t => t.dispose());
} catch (e) {
ui.showError('model-output', e);
}
},
/**
* Run tensor operation tests
*/
testTensors() {
try {
const a = edgeFlow.tensor([[1, 2], [3, 4]]);
const b = edgeFlow.tensor([[5, 6], [7, 8]]);
const sum = edgeFlow.add(a, b);
const rand = edgeFlow.random([10]);
const probs = edgeFlow.softmax(edgeFlow.tensor([1, 2, 3, 4]));
const info = [
`✓ All tensor tests passed `,
`• Created 2x2 tensor`,
`• Addition: [${sum.toArray()}]`,
`• Random: [${rand.toArray().slice(0, 5).map(x => x.toFixed(2))}...]`,
`• Softmax: [${probs.toArray().map(x => x.toFixed(3))}]`,
].join('\n');
ui.$('tensor-output').innerHTML = `${info} `;
[a, b, sum, rand, probs].forEach(t => t.dispose());
ui.updateMemoryStatus();
} catch (e) {
ui.showError('tensor-output', e);
}
},
/**
* Classify single text
*/
async classifyText() {
if (!state.model) {
ui.setOutput('text-output', 'Load model first', 'warn');
return;
}
const text = ui.$('text-input')?.value;
if (!text) return;
ui.showLoading('text-output', 'Classifying...');
try {
const result = await utils.inferText(text);
const emoji = result.label === 'positive' ? '😊' : '😞';
const pct = (result.score * 100).toFixed(1);
ui.$('text-output').innerHTML = `${emoji} ${result.label.toUpperCase()} (${pct}%) `;
} catch (e) {
ui.showError('text-output', e);
}
},
/**
* Batch classification
*/
async classifyBatch() {
if (!state.model) {
ui.setOutput('text-output', 'Load model first', 'warn');
return;
}
const texts = ['I love this!', 'This is terrible.', 'Amazing!', 'Worst ever.', 'Pretty good.'];
ui.showLoading('text-output', 'Processing batch...');
try {
const start = performance.now();
const results = await Promise.all(texts.map(t => utils.inferText(t)));
const time = (performance.now() - start).toFixed(0);
const lines = results.map((r, i) => {
const emoji = r.label === 'positive' ? '😊' : '😞';
return `${emoji} "${texts[i]}" → ${r.label}`;
});
lines.push('', `Total: ${time}ms `);
ui.$('text-output').innerHTML = `${lines.join('\n')} `;
} catch (e) {
ui.showError('text-output', e);
}
},
/**
* Extract features
*/
async extractFeatures() {
if (!state.model) {
ui.setOutput('feature-output', 'Load model first', 'warn');
return;
}
const text = ui.$('feature-input')?.value;
if (!text) return;
ui.showLoading('feature-output', 'Extracting...');
try {
const inputs = utils.createModelInputs(state.model);
const start = performance.now();
const outputs = await edgeFlow.runInference(state.model, inputs);
const time = (performance.now() - start).toFixed(2);
const embeddings = outputs[0].toArray();
const norm = Math.sqrt(embeddings.reduce((a, b) => a + b * b, 0));
const info = [
`✓ Features extracted in ${time}ms `,
`Dimension: ${embeddings.length}`,
`L2 Norm: ${norm.toFixed(4)}`,
`Sample: [${embeddings.slice(0, 5).map(x => x.toFixed(4)).join(', ')}...]`,
].join('\n');
ui.$('feature-output').innerHTML = `${info} `;
inputs.forEach(t => t.dispose());
outputs.forEach(t => t.dispose());
} catch (e) {
ui.showError('feature-output', e);
}
},
/**
* Quantization demo
*/
quantize() {
try {
const weights = edgeFlow.tensor([0.5, -0.3, 0.8, -0.1, 0.9, -0.7, 0.2, -0.4], [2, 4], 'float32');
const { tensor: quantized, scale, zeroPoint } = edgeFlow.quantizeTensor(weights, 'int8');
const dequantized = edgeFlow.dequantizeTensor(quantized, scale, zeroPoint, 'int8');
const original = weights.toArray();
const recovered = dequantized.toArray();
const maxError = Math.max(...original.map((v, i) => Math.abs(v - recovered[i])));
const info = [
`✓ Int8 Quantization `,
`Original: [${original.map(v => v.toFixed(3)).join(', ')}]`,
`Quantized: [${quantized.toArray().join(', ')}]`,
`Dequantized: [${recovered.map(v => v.toFixed(3)).join(', ')}]`,
`Scale: ${scale.toFixed(6)}, Max Error: ${maxError.toFixed(6)}`,
].join('\n');
ui.$('quant-output').innerHTML = `${info} `;
[weights, quantized, dequantized].forEach(t => t.dispose());
} catch (e) {
ui.showError('quant-output', e);
}
},
/**
* Pruning demo
*/
prune() {
try {
const weights = edgeFlow.tensor([0.5, -0.1, 0.8, -0.05, 0.9, -0.02, 0.2, -0.4], [2, 4], 'float32');
const { tensor: pruned, sparsity } = edgeFlow.pruneTensor(weights, { ratio: 0.5 });
const info = [
`✓ Magnitude Pruning (50%) `,
`Original: [${weights.toArray().map(v => v.toFixed(2)).join(', ')}]`,
`Pruned: [${pruned.toArray().map(v => v.toFixed(2)).join(', ')}]`,
`Sparsity: ${(sparsity * 100).toFixed(1)}%`,
].join('\n');
ui.$('quant-output').innerHTML = `${info} `;
[weights, pruned].forEach(t => t.dispose());
} catch (e) {
ui.showError('quant-output', e);
}
},
/**
* Debugger demo
*/
debug() {
try {
const data = Array.from({ length: 100 }, () => Math.random() * 2 - 1);
const tensor = edgeFlow.tensor(data, [10, 10], 'float32');
const inspection = edgeFlow.inspectTensor(tensor, 'random_weights');
const histogram = edgeFlow.createAsciiHistogram(inspection.histogram, 25, 4);
const info = [
`Tensor: ${inspection.name} `,
`Shape: [${inspection.shape}], Size: ${inspection.size}`,
`Statistics: `,
` Min: ${inspection.stats.min.toFixed(4)}`,
` Max: ${inspection.stats.max.toFixed(4)}`,
` Mean: ${inspection.stats.mean.toFixed(4)}`,
` Std: ${inspection.stats.std.toFixed(4)}`,
'',
histogram,
].join('\n');
ui.$('debugger-output').innerHTML = `${info} `;
tensor.dispose();
} catch (e) {
ui.showError('debugger-output', e);
}
},
/**
* Benchmark demo
*/
async benchmark() {
ui.showLoading('benchmark-output', 'Running benchmark...');
try {
const result = await edgeFlow.runBenchmark(async () => {
const t = edgeFlow.tensor(Array.from({ length: 1000 }, () => Math.random()), [1000], 'float32');
const sum = t.toArray().reduce((a, b) => a + b, 0);
t.dispose();
return sum;
}, { warmupRuns: 2, runs: 5, name: 'Tensor Sum (1000)' });
const info = [
`Benchmark: ${result.name} `,
`Avg: ${result.avgTime.toFixed(2)}ms`,
`Min: ${result.minTime.toFixed(2)}ms`,
`Max: ${result.maxTime.toFixed(2)}ms`,
`Throughput: ${result.throughput.toFixed(0)} ops/sec`,
].join('\n');
ui.$('benchmark-output').innerHTML = `${info} `;
} catch (e) {
ui.showError('benchmark-output', e);
}
},
/**
* Scheduler test
*/
async testScheduler() {
ui.showLoading('scheduler-output', 'Testing scheduler...');
try {
const scheduler = edgeFlow.getScheduler();
const task1 = scheduler.schedule('model-a', async () => { await utils.sleep(100); return 'Task 1'; }, 'high');
const task2 = scheduler.schedule('model-b', async () => { await utils.sleep(50); return 'Task 2'; }, 'normal');
const task3 = scheduler.schedule('model-a', async () => { await utils.sleep(75); return 'Task 3'; }, 'low');
const [r1, r2, r3] = await Promise.all([task1.wait(), task2.wait(), task3.wait()]);
const info = [
`✓ Scheduler Test Passed `,
`• ${r1} (high priority)`,
`• ${r2} (normal priority)`,
`• ${r3} (low priority)`,
].join('\n');
ui.$('scheduler-output').innerHTML = `${info} `;
} catch (e) {
ui.showError('scheduler-output', e);
}
},
/**
* Memory allocation test
*/
allocateMemory() {
try {
const before = edgeFlow.getMemoryStats();
for (let i = 0; i < 10; i++) {
state.testTensors.push(edgeFlow.random([100, 100]));
}
const after = edgeFlow.getMemoryStats();
const info = [
`✓ Allocated 10 tensors (100x100) `,
`Before: ${utils.formatBytes(before.allocated || 0)}, ${before.tensorCount || 0} tensors`,
`After: ${utils.formatBytes(after.allocated || 0)}, ${after.tensorCount || 0} tensors`,
].join('\n');
ui.$('memory-output').innerHTML = `${info} `;
ui.updateMemoryStatus();
} catch (e) {
ui.showError('memory-output', e);
}
},
/**
* Memory cleanup
*/
cleanupMemory() {
state.testTensors.forEach(t => {
if (!t.isDisposed) t.dispose();
});
state.testTensors = [];
edgeFlow.gc();
ui.showSuccess('memory-output', 'Memory cleaned up');
ui.updateMemoryStatus();
},
/**
* Concurrency test
*/
async testConcurrency() {
if (!state.model) {
ui.setOutput('concurrency-output', 'Load model first', 'warn');
ui.$('concurrency-metrics')?.classList.add('hidden');
return;
}
ui.showLoading('concurrency-output', 'Running concurrent tasks...');
try {
const texts = ['Great!', 'Terrible!', 'Amazing!', 'Awful!', 'Good!', 'Bad!', 'Nice!', 'Horrible!'];
const start = performance.now();
const results = await Promise.all(texts.map(t => utils.inferText(t)));
const total = performance.now() - start;
const lines = [
`✓ Concurrent execution complete `,
...results.map((r, i) => `${r.label === 'positive' ? '😊' : '😞'} "${texts[i]}"`),
];
ui.$('concurrency-output').innerHTML = `${lines.join('\n')} `;
ui.renderMetrics('concurrency-metrics', [
{ value: total.toFixed(0) + 'ms', label: 'Total' },
{ value: String(texts.length), label: 'Tasks' },
{ value: (total / texts.length).toFixed(0) + 'ms', label: 'Avg' },
]);
} catch (e) {
ui.showError('concurrency-output', e);
}
},
/**
* Start performance monitor
*/
startMonitor() {
if (!state.monitor) {
state.monitor = new edgeFlow.PerformanceMonitor({
sampleInterval: config.monitorSampleInterval,
historySize: config.monitorHistorySize,
});
state.monitor.onSample(sample => ui.updateMonitorMetrics(sample));
}
state.monitor.start();
},
/**
* Stop monitor
*/
stopMonitor() {
if (state.monitor) {
state.monitor.stop();
}
},
/**
* Simulate inferences for monitor
*/
simulateInferences() {
if (!state.monitor) {
this.startMonitor();
}
for (let i = 0; i < 5; i++) {
setTimeout(() => {
state.monitor?.recordInference(30 + Math.random() * 70);
}, i * 100);
}
},
/**
* Open dashboard modal
*/
openDashboard() {
if (!state.monitor) {
this.startMonitor();
this.simulateInferences();
}
const modal = ui.$('dashboard-modal');
const frame = ui.$('dashboard-frame');
if (modal && frame) {
frame.srcdoc = edgeFlow.generateDashboardHTML(state.monitor);
modal.classList.add('active');
document.body.style.overflow = 'hidden';
}
},
/**
* Close dashboard modal
*/
closeDashboard() {
const modal = ui.$('dashboard-modal');
if (modal) {
modal.classList.remove('active');
document.body.style.overflow = '';
}
},
};
/* ==========================================================================
5. SAM Interactive Segmentation (Real Model)
========================================================================== */
const sam = {
/**
* Initialize SAM UI and start model loading
*/
async init() {
const fileInput = ui.$('sam-file-input');
const container = ui.$('sam-container');
if (fileInput) {
fileInput.addEventListener('change', (e) => this.handleFileSelect(e));
}
// Drag and drop
if (container) {
container.addEventListener('dragover', (e) => {
e.preventDefault();
container.classList.add('dragover');
});
container.addEventListener('dragleave', () => {
container.classList.remove('dragover');
});
container.addEventListener('drop', (e) => {
e.preventDefault();
container.classList.remove('dragover');
const file = e.dataTransfer?.files[0];
if (file && file.type.startsWith('image/')) {
this.loadImage(file);
}
});
}
// Start loading SAM models automatically
await this.loadModels();
},
/**
* Load SAM models with progress display
*/
async loadModels() {
const loader = ui.$('sam-loader');
const loaderText = ui.$('sam-loader-text');
const loaderDetail = ui.$('sam-loader-detail');
const progress = ui.$('sam-progress');
const samContainer = ui.$('sam-container');
try {
// Create pipeline
state.samPipeline = edgeFlow.createImageSegmentationPipeline();
// Load models with progress
await state.samPipeline.loadModels((progressInfo) => {
const { model, progress: pct, loaded, total } = progressInfo;
if (loaderText) {
loaderText.textContent = `Loading ${model}... (${utils.formatBytes(loaded)} / ${utils.formatBytes(total)})`;
}
if (loaderDetail) {
loaderDetail.textContent = `${pct}%`;
}
if (progress) {
progress.style.width = `${pct}%`;
}
});
state.samModelLoaded = true;
// Hide loader, show main UI
if (loader) loader.classList.add('hidden');
if (samContainer) samContainer.classList.remove('hidden');
// Enable buttons
ui.$('sam-sample-btn')?.removeAttribute('disabled');
ui.$('sam-clear-btn')?.removeAttribute('disabled');
ui.$('sam-download-btn')?.removeAttribute('disabled');
ui.setOutput('sam-output', '✓ SAM model loaded! Click to upload an image or use "Sample Image".', 'success');
} catch (error) {
console.error('SAM model loading failed:', error);
if (loaderText) {
loaderText.textContent = `Failed to load model: ${error.message}`;
loaderText.style.color = 'var(--error)';
}
if (loaderDetail) {
loaderDetail.textContent = 'Check console for details';
}
ui.showError('sam-output', error);
}
},
/**
* Handle file selection
*/
handleFileSelect(e) {
const file = e.target?.files?.[0];
if (file) {
this.loadImage(file);
}
},
/**
* Load image from file or URL
*/
async loadImage(source) {
if (!state.samModelLoaded) {
ui.setOutput('sam-output', 'Model not loaded yet. Please wait...', 'warn');
return;
}
ui.setOutput('sam-output', 'Loading image...', 'info');
try {
const img = new Image();
img.crossOrigin = 'anonymous';
await new Promise((resolve, reject) => {
img.onload = resolve;
img.onerror = reject;
if (typeof source === 'string') {
img.src = source;
} else {
img.src = URL.createObjectURL(source);
}
});
// Show workspace
ui.$('sam-upload')?.classList.add('hidden');
ui.$('sam-workspace')?.classList.remove('hidden');
// Setup canvases
const canvas = ui.$('sam-canvas');
const maskCanvas = ui.$('sam-mask-canvas');
if (canvas && maskCanvas) {
state.samCanvas = canvas;
state.samMaskCanvas = maskCanvas;
state.samCtx = canvas.getContext('2d');
state.samMaskCtx = maskCanvas.getContext('2d');
// Set canvas size
const container = ui.$('sam-workspace');
const containerWidth = container?.clientWidth || 400;
const containerHeight = container?.clientHeight || 250;
const scale = Math.min(
containerWidth / img.width,
containerHeight / img.height
);
canvas.width = img.width * scale;
canvas.height = img.height * scale;
maskCanvas.width = canvas.width;
maskCanvas.height = canvas.height;
// Draw image
state.samCtx.drawImage(img, 0, 0, canvas.width, canvas.height);
state.samImage = img;
state.samPoints = [];
// Setup click handler
canvas.onclick = (e) => this.handleClick(e, 1); // Left click = positive
canvas.oncontextmenu = (e) => {
e.preventDefault();
this.handleClick(e, 0); // Right click = negative
};
// Encode image with SAM encoder
ui.setOutput('sam-output', 'Encoding image with SAM...', 'info');
const encodeStart = performance.now();
await state.samPipeline.setImage(img);
const encodeTime = (performance.now() - encodeStart).toFixed(0);
ui.setOutput('sam-output', `✓ Image encoded in ${encodeTime}ms. Click to segment objects. Left-click = include, Right-click = exclude.`, 'success');
}
} catch (error) {
ui.showError('sam-output', error);
}
},
/**
* Load sample image
*/
async loadSampleImage() {
if (!state.samModelLoaded) {
ui.setOutput('sam-output', 'Model not loaded yet. Please wait...', 'warn');
return;
}
// Using a reliable public image URL
const sampleUrl = 'https://images.unsplash.com/photo-1587300003388-59208cc962cb?w=640';
await this.loadImage(sampleUrl);
},
/**
* Handle canvas click
*/
async handleClick(e, label) {
if (!state.samCanvas || !state.samPipeline || !state.samModelLoaded) return;
const rect = state.samCanvas.getBoundingClientRect();
const x = (e.clientX - rect.left) / rect.width;
const y = (e.clientY - rect.top) / rect.height;
// Add point
state.samPoints.push({ x, y, label });
// Draw point indicator
this.drawPoints();
// Run segmentation
ui.setOutput('sam-output', 'Segmenting...', 'info');
try {
const startTime = performance.now();
const result = await state.samPipeline.segment({
points: state.samPoints,
});
const time = (performance.now() - startTime).toFixed(0);
// Draw mask
this.drawMask(result);
ui.setOutput('sam-output', `✓ Segmented in ${time}ms (score: ${result.score.toFixed(2)})`, 'success');
} catch (error) {
ui.showError('sam-output', error);
}
},
/**
* Draw points on canvas
*/
drawPoints() {
// Remove existing point indicators
document.querySelectorAll('.sam-point').forEach(el => el.remove());
const workspace = ui.$('sam-workspace');
if (!workspace || !state.samCanvas) return;
for (const point of state.samPoints) {
const indicator = document.createElement('div');
indicator.className = `sam-point ${point.label === 1 ? 'positive' : 'negative'}`;
indicator.style.left = `${point.x * 100}%`;
indicator.style.top = `${point.y * 100}%`;
workspace.appendChild(indicator);
}
},
/**
* Draw segmentation mask
*/
drawMask(result) {
if (!state.samMaskCtx || !state.samMaskCanvas) return;
const { mask, width, height } = result;
const canvas = state.samMaskCanvas;
// Create ImageData
const imageData = state.samMaskCtx.createImageData(canvas.width, canvas.height);
// Scale mask to canvas size
const scaleX = width / canvas.width;
const scaleY = height / canvas.height;
for (let y = 0; y < canvas.height; y++) {
for (let x = 0; x < canvas.width; x++) {
const srcX = Math.floor(x * scaleX);
const srcY = Math.floor(y * scaleY);
const srcIdx = srcY * width + srcX;
const dstIdx = (y * canvas.width + x) * 4;
if (mask[srcIdx] > 0) {
// Green overlay for segmented area
imageData.data[dstIdx] = 127; // R
imageData.data[dstIdx + 1] = 169; // G
imageData.data[dstIdx + 2] = 33; // B
imageData.data[dstIdx + 3] = 180; // A
}
}
}
state.samMaskCtx.putImageData(imageData, 0, 0);
},
/**
* Clear segmentation
*/
clear() {
state.samPoints = [];
// Clear mask canvas
if (state.samMaskCtx && state.samMaskCanvas) {
state.samMaskCtx.clearRect(0, 0, state.samMaskCanvas.width, state.samMaskCanvas.height);
}
// Remove point indicators
document.querySelectorAll('.sam-point').forEach(el => el.remove());
ui.setOutput('sam-output', 'Cleared. Click to segment objects.', 'info');
},
/**
* Download mask as PNG
*/
downloadMask() {
if (!state.samMaskCanvas) {
ui.setOutput('sam-output', 'No mask to download', 'warn');
return;
}
// Create download link
const link = document.createElement('a');
link.download = 'segmentation-mask.png';
link.href = state.samMaskCanvas.toDataURL('image/png');
link.click();
},
/**
* Reset to upload state
*/
reset() {
state.samImage = null;
state.samPoints = [];
ui.$('sam-upload')?.classList.remove('hidden');
ui.$('sam-workspace')?.classList.add('hidden');
document.querySelectorAll('.sam-point').forEach(el => el.remove());
if (state.samMaskCtx && state.samMaskCanvas) {
state.samMaskCtx.clearRect(0, 0, state.samMaskCanvas.width, state.samMaskCanvas.height);
}
// Clear the pipeline's image embedding
if (state.samPipeline) {
state.samPipeline.clearImage();
}
ui.setOutput('sam-output', 'Click on image to segment objects. Left-click = include, Right-click = exclude.', 'info');
},
};
/* ==========================================================================
6. AI Chat (Real Model)
========================================================================== */
const chat = {
/**
* Initialize chat UI
*/
init() {
const input = ui.$('chat-input');
if (input) {
input.addEventListener('keydown', (e) => {
if (e.key === 'Enter' && !e.shiftKey && !state.chatGenerating) {
e.preventDefault();
this.send();
}
});
}
},
/**
* Load LLM model with progress display
*/
async loadModel() {
if (state.chatModelLoaded) {
ui.$('chat-container')?.classList.remove('hidden');
ui.$('llm-loader')?.classList.add('hidden');
return;
}
const loadBtn = ui.$('llm-load-btn');
const progressContainer = ui.$('llm-progress-container');
const progress = ui.$('llm-progress');
const loaderDetail = ui.$('llm-loader-detail');
try {
// Disable button and show progress
if (loadBtn) {
loadBtn.disabled = true;
loadBtn.textContent = 'Loading...';
}
if (progressContainer) progressContainer.classList.remove('hidden');
if (loaderDetail) loaderDetail.classList.remove('hidden');
this.updateStatus('loading', 'Downloading model...');
// Create pipeline
state.chatPipeline = edgeFlow.createTextGenerationPipeline();
state.chatPipeline.setChatTemplate('chatml');
// Load model with progress
await state.chatPipeline.loadModel((progressInfo) => {
const { stage, progress: pct } = progressInfo;
if (loadBtn) {
if (stage === 'tokenizer') {
loadBtn.textContent = 'Loading tokenizer...';
} else {
loadBtn.textContent = `Downloading... ${pct}%`;
}
}
if (loaderDetail) {
loaderDetail.classList.add('hidden');
}
if (progress) {
// Tokenizer is quick, model is the main download
const totalProgress = stage === 'tokenizer' ? pct * 0.05 : 5 + pct * 0.95;
progress.style.width = `${totalProgress}%`;
}
});
state.chatModelLoaded = true;
// Hide loader, show chat UI
ui.$('llm-loader')?.classList.add('hidden');
ui.$('chat-container')?.classList.remove('hidden');
this.updateStatus('ready', 'Model loaded! Ready to chat');
} catch (error) {
console.error('LLM model loading failed:', error);
if (loadBtn) {
loadBtn.disabled = false;
loadBtn.textContent = 'Retry Load';
}
if (loaderDetail) {
loaderDetail.textContent = `Error: ${error.message}`;
loaderDetail.style.color = 'var(--error)';
}
this.updateStatus('error', `Failed: ${error.message}`);
}
},
/**
* Send message
*/
async send() {
if (!state.chatModelLoaded) {
this.updateStatus('error', 'Load model first by clicking "Load Model"');
return;
}
const input = ui.$('chat-input');
const message = input?.value?.trim();
if (!message || state.chatGenerating) return;
// Clear input
input.value = '';
// Hide welcome message
const welcome = ui.$('chat-messages')?.querySelector('.chat-welcome');
if (welcome) welcome.remove();
// Add user message
this.addMessage('user', message);
// Set generating state
state.chatGenerating = true;
this.updateStatus('loading', 'Generating...');
try {
// Add assistant message placeholder
const assistantMsg = this.addMessage('assistant', 'Thinking...', true);
// Generate response using real model
// Note: TinyLlama in WASM is slow, limit tokens for demo
let response = '';
let tokenCount = 0;
console.log('[Chat] Starting generation...');
const startTime = performance.now();
// Use streaming for real-time feedback
if (state.chatPipeline.chatStream) {
for await (const event of state.chatPipeline.chatStream(message, {
maxNewTokens: 32, // Limited for browser performance
temperature: 0.7,
topP: 0.9,
})) {
response = event.generatedText;
tokenCount++;
assistantMsg.textContent = response;
this.updateStatus('loading', `Generating... (${tokenCount} tokens)`);
// Scroll to bottom
const container = ui.$('chat-messages');
if (container) {
container.scrollTop = container.scrollHeight;
}
}
} else {
// Fallback to non-streaming
this.updateStatus('loading', 'Generating (this may take a while)...');
const result = await state.chatPipeline.chat(message, {
maxNewTokens: 32, // Limited for browser performance
temperature: 0.7,
topP: 0.9,
});
response = result.generatedText;
tokenCount = result.numTokens;
assistantMsg.textContent = response;
}
const elapsed = ((performance.now() - startTime) / 1000).toFixed(1);
console.log(`[Chat] Generated ${tokenCount} tokens in ${elapsed}s`);
// Remove typing indicator
assistantMsg.classList.remove('typing');
// Update history
state.chatHistory.push(
{ role: 'user', content: message },
{ role: 'assistant', content: response }
);
this.updateStatus('ready', 'Ready to chat');
} catch (error) {
this.updateStatus('error', `Error: ${error.message}`);
// Remove typing indicator
const typingMsg = ui.$('chat-messages')?.querySelector('.typing');
if (typingMsg) typingMsg.remove();
} finally {
state.chatGenerating = false;
}
// Scroll to bottom
const container = ui.$('chat-messages');
if (container) {
container.scrollTop = container.scrollHeight;
}
},
/**
* Add message to chat
*/
addMessage(role, content, isTyping = false) {
const container = ui.$('chat-messages');
if (!container) return null;
const msg = document.createElement('div');
msg.className = `chat-message ${role}${isTyping ? ' typing' : ''}`;
msg.textContent = content;
container.appendChild(msg);
container.scrollTop = container.scrollHeight;
return msg;
},
/**
* Update status indicator
*/
updateStatus(status, text) {
const dot = ui.$('chat-status')?.querySelector('.chat-status-dot');
const textEl = ui.$('chat-status-text');
if (dot) {
dot.className = `chat-status-dot ${status === 'loading' ? 'loading' : status === 'error' ? 'error' : ''}`;
}
if (textEl) {
textEl.textContent = text;
}
},
/**
* Clear chat history
*/
clear() {
state.chatHistory = [];
// Clear conversation in pipeline
if (state.chatPipeline) {
state.chatPipeline.clearConversation();
}
const container = ui.$('chat-messages');
if (container) {
container.innerHTML = `
🤖
Hi! I'm TinyLlama running entirely in your browser.
Ask me anything!
`;
}
this.updateStatus('ready', 'Ready to chat');
},
};
/* ==========================================================================
7. Demo Class (Public API)
========================================================================== */
/**
* Demo public API - exposed to window for onclick handlers
*/
window.Demo = {
// Model
loadModel: () => features.loadModel(),
testModel: () => features.testModel(),
// SAM Interactive Segmentation
loadSampleImage: () => sam.loadSampleImage(),
clearSegmentation: () => sam.clear(),
downloadMask: () => sam.downloadMask(),
// AI Chat
loadLLM: () => chat.loadModel(),
sendChat: () => chat.send(),
clearChat: () => chat.clear(),
// Core
testTensors: () => features.testTensors(),
classifyText: () => features.classifyText(),
classifyBatch: () => features.classifyBatch(),
extractFeatures: () => features.extractFeatures(),
// Tools
quantize: () => features.quantize(),
prune: () => features.prune(),
debug: () => features.debug(),
benchmark: () => features.benchmark(),
// System
testScheduler: () => features.testScheduler(),
allocateMemory: () => features.allocateMemory(),
cleanupMemory: () => features.cleanupMemory(),
testConcurrency: () => features.testConcurrency(),
// Monitor
startMonitor: () => features.startMonitor(),
stopMonitor: () => features.stopMonitor(),
simulateInferences: () => features.simulateInferences(),
openDashboard: () => features.openDashboard(),
closeDashboard: () => features.closeDashboard(),
};
/* ==========================================================================
8. Initialization
========================================================================== */
/**
* Initialize demo on DOM ready
*/
async function init() {
// Initialize UI
ui.initOutputs();
await ui.updateRuntimeStatus();
ui.updateMemoryStatus();
// Initialize Chat UI (but don't load model yet)
chat.init();
// Initialize SAM and start loading models automatically
await sam.init();
// Setup modal close handlers
const modal = ui.$('dashboard-modal');
if (modal) {
modal.addEventListener('click', (e) => {
if (e.target === modal) {
features.closeDashboard();
}
});
}
// ESC key closes modal
document.addEventListener('keydown', (e) => {
if (e.key === 'Escape') {
features.closeDashboard();
}
});
console.log('✓ edgeFlow.js Demo initialized');
}
// Wait for DOM
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', init);
} else {
init();
}
================================================
FILE: demo/index.html
================================================
edgeFlow.js - Interactive Demo
Sample Image
Clear
Download
Loading SAM model... Please wait.
TinyLlama is a 1.1B parameter language model.
Download size: ~714MB (may take several minutes)
Load Model
0%
🤖
Hi! I'm TinyLlama running entirely in your browser.
Ask me anything!
Send
Click "Load Model" to start
Start
Simulate
Dashboard
Stop
================================================
FILE: demo/server.js
================================================
/**
* Simple development server for testing edgeFlow.js
*
* Usage: node demo/server.js
*/
import { createServer } from 'http';
import { readFile } from 'fs/promises';
import { extname, join } from 'path';
import { fileURLToPath } from 'url';
import { dirname } from 'path';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const ROOT = join(__dirname, '..');
const MIME_TYPES = {
'.html': 'text/html',
'.js': 'application/javascript',
'.mjs': 'application/javascript',
'.css': 'text/css',
'.json': 'application/json',
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.svg': 'image/svg+xml',
'.wasm': 'application/wasm',
};
const PORT = process.env.PORT || 3000;
const server = createServer(async (req, res) => {
let url = req.url || '/';
// Default to demo/index.html
if (url === '/') {
url = '/demo/index.html';
}
const filePath = join(ROOT, url);
const ext = extname(filePath);
const mimeType = MIME_TYPES[ext] || 'application/octet-stream';
try {
const content = await readFile(filePath);
// Add CORS and security headers for WebGPU/WASM
res.setHeader('Cross-Origin-Opener-Policy', 'same-origin');
res.setHeader('Cross-Origin-Embedder-Policy', 'require-corp');
res.setHeader('Content-Type', mimeType);
res.setHeader('Access-Control-Allow-Origin', '*');
res.writeHead(200);
res.end(content);
} catch (error) {
if (error.code === 'ENOENT') {
res.writeHead(404);
res.end(`File not found: ${url}`);
} else {
res.writeHead(500);
res.end(`Server error: ${error.message}`);
}
}
});
server.listen(PORT, () => {
console.log(`
╔══════════════════════════════════════════════════════╗
║ ║
║ ⚡ edgeFlow.js Development Server ║
║ ║
║ Local: http://localhost:${PORT} ║
║ ║
║ Press Ctrl+C to stop ║
║ ║
╚══════════════════════════════════════════════════════╝
`);
});
================================================
FILE: demo/styles.css
================================================
/**
* edgeFlow.js Demo - Spotify-inspired Theme
*
* Design: Spotify color palette + liquid glass
* - Deep blacks and grays
* - Signature green accent
* - Clean, bold typography
* - Subtle glass effects
*/
/* ==========================================================================
1. Variables & Reset
========================================================================== */
:root {
/* 牛油果绿配色 */
--color-accent: #7fa921;
--color-accent-light: #8fbc2a;
--color-accent-dim: rgba(127, 169, 33, 0.12);
--color-dark: #5a5755;
--color-light: #e2e1e6;
/* Background - 明亮风格 */
--bg-base: #d8d7dc;
--bg-elevated: var(--color-light);
--bg-highlight: #eaeaed;
/* Glass - 浅色玻璃效果 */
--glass-bg: rgba(255, 255, 255, 0.6);
--glass-bg-hover: rgba(255, 255, 255, 0.8);
--glass-border: rgba(255, 255, 255, 0.8);
--glass-border-hover: rgba(255, 255, 255, 0.95);
--glass-highlight: rgba(255, 255, 255, 0.5);
/* Text - 深色文字 */
--text-primary: var(--color-dark);
--text-secondary: #6e6b69;
--text-muted: #8a8886;
/* Accent variations */
--accent: var(--color-accent);
--accent-hover: var(--color-accent-light);
--accent-dim: var(--color-accent-dim);
/* Status */
--success: var(--color-accent);
--warning: #d4a520;
--error: #c45c4a;
/* Spacing */
--space-xs: 0.25rem;
--space-sm: 0.5rem;
--space-md: 0.75rem;
--space-lg: 1rem;
--space-xl: 1.5rem;
/* Border Radius */
--radius-sm: 4px;
--radius-md: 8px;
--radius-lg: 12px;
--radius-xl: 16px;
--radius-full: 9999px;
/* Fonts */
--font-sans: 'Circular', -apple-system, BlinkMacSystemFont, 'Helvetica Neue', sans-serif;
--font-mono: 'Fira Code', 'SF Mono', monospace;
/* Glass blur */
--blur-glass: 40px;
}
*,
*::before,
*::after {
margin: 0;
padding: 0;
box-sizing: border-box;
}
html {
scroll-behavior: smooth;
}
body {
font-family: var(--font-sans);
background: linear-gradient(145deg, #d0cfd4 0%, var(--bg-base) 50%, #cccbd0 100%);
color: var(--text-primary);
min-height: 100vh;
line-height: 1.6;
overflow-x: hidden;
}
/* Subtle gradient overlay */
body::before {
content: '';
position: fixed;
top: 0;
left: 0;
right: 0;
height: 500px;
background: linear-gradient(180deg,
rgba(127, 169, 33, 0.1) 0%,
transparent 100%);
pointer-events: none;
z-index: 0;
}
/* ==========================================================================
2. Layout
========================================================================== */
.container {
max-width: 1440px;
margin: 0 auto;
padding: var(--space-xl);
position: relative;
z-index: 1;
}
.bento-grid {
display: grid;
grid-template-columns: repeat(12, 1fr);
gap: var(--space-lg);
margin-bottom: var(--space-lg);
}
.span-3 { grid-column: span 3; }
.span-4 { grid-column: span 4; }
.span-5 { grid-column: span 5; }
.span-6 { grid-column: span 6; }
.span-7 { grid-column: span 7; }
.span-8 { grid-column: span 8; }
.span-12 { grid-column: span 12; }
/* ==========================================================================
3. Header
========================================================================== */
header {
text-align: center;
padding: 3rem 2rem;
margin-bottom: 2rem;
position: relative;
/* 绿色渐变背景 */
background: linear-gradient(135deg, var(--color-accent) 0%, #6a9020 100%);
border-radius: var(--radius-xl);
overflow: hidden;
box-shadow: 0 10px 40px -10px rgba(127, 169, 33, 0.4);
}
/* 白色光晕 */
header::before {
content: '';
position: absolute;
top: -30%;
left: 50%;
transform: translateX(-50%);
width: 500px;
height: 250px;
background: radial-gradient(ellipse, rgba(255, 255, 255, 0.3) 0%, transparent 70%);
pointer-events: none;
}
h1 {
font-size: 3rem;
font-weight: 700;
letter-spacing: -0.04em;
margin-bottom: var(--space-sm);
position: relative;
}
h1 span {
color: #ffffff;
text-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
}
/* 白色下划线 */
h1::after {
content: '';
display: block;
width: 60px;
height: 4px;
background: rgba(255, 255, 255, 0.8);
border-radius: 2px;
margin: var(--space-md) auto 0;
}
.subtitle {
color: rgba(255, 255, 255, 0.85);
font-size: 1rem;
font-weight: 400;
position: relative;
}
/* ==========================================================================
4. Cards - Spotify Style with Glass
========================================================================== */
.bento-card {
position: relative;
padding: 1.25rem;
/* 浅色玻璃卡片 */
background: var(--glass-bg);
backdrop-filter: blur(var(--blur-glass)) saturate(180%);
-webkit-backdrop-filter: blur(var(--blur-glass)) saturate(180%);
border: 1px solid var(--glass-border);
border-radius: var(--radius-lg);
box-shadow:
0 4px 24px -8px rgba(0, 0, 0, 0.08),
inset 0 1px 0 0 rgba(255, 255, 255, 0.8);
transition: all 0.3s ease;
}
.bento-card:hover {
background: var(--glass-bg-hover);
border-color: var(--glass-border-hover);
transform: translateY(-2px);
box-shadow:
0 8px 32px -8px rgba(0, 0, 0, 0.12),
inset 0 1px 0 0 rgba(255, 255, 255, 0.9);
}
/* Card Header */
.card-header {
display: flex;
align-items: center;
gap: var(--space-md);
margin-bottom: var(--space-lg);
}
.card-icon {
width: 40px;
height: 40px;
border-radius: var(--radius-md);
display: flex;
align-items: center;
justify-content: center;
font-size: 1.25rem;
flex-shrink: 0;
/* 统一黄色背景 */
background: #f0c850;
}
.card-icon.pink,
.card-icon.green,
.card-icon.orange {
background: #f0c850;
}
.card-title {
font-size: 1rem;
font-weight: 700;
color: var(--text-primary);
letter-spacing: -0.01em;
}
.card-desc {
font-size: 0.8125rem;
color: var(--text-muted);
margin-top: 2px;
}
/* ==========================================================================
5. Components
========================================================================== */
/* Buttons */
button {
position: relative;
padding: 0.75rem 2rem;
font-family: inherit;
font-size: 0.875rem;
font-weight: 700;
letter-spacing: 0.1em;
text-transform: uppercase;
cursor: pointer;
border-radius: var(--radius-full);
transition: all 0.2s ease;
/* Primary accent button */
background: var(--accent);
border: none;
color: #ffffff;
box-shadow: 0 4px 12px -4px rgba(127, 169, 33, 0.4);
}
button:hover {
background: var(--accent-hover);
transform: scale(1.04);
box-shadow: 0 6px 16px -4px rgba(127, 169, 33, 0.5);
}
button:active {
transform: scale(1);
}
button:disabled {
opacity: 0.4;
cursor: not-allowed;
transform: none;
}
.btn-secondary {
background: rgba(255, 255, 255, 0.6);
border: 1px solid rgba(66, 63, 61, 0.2);
color: var(--color-dark);
box-shadow: none;
}
.btn-secondary:hover {
background: rgba(255, 255, 255, 0.9);
border-color: rgba(66, 63, 61, 0.3);
transform: scale(1.04);
box-shadow: 0 4px 12px -4px rgba(0, 0, 0, 0.1);
}
.btn-sm {
padding: 0.5rem 1rem;
font-size: 0.75rem;
}
.btn-group {
display: flex;
gap: var(--space-sm);
flex-wrap: wrap;
}
/* Inputs */
input,
textarea {
width: 100%;
padding: 0.75rem 1rem;
font-family: inherit;
font-size: 0.875rem;
border-radius: var(--radius-md);
transition: all 0.2s;
background: rgba(255, 255, 255, 0.7);
border: 1px solid rgba(66, 63, 61, 0.15);
color: var(--text-primary);
}
input:focus,
textarea:focus {
outline: none;
border-color: var(--accent);
background: rgba(255, 255, 255, 0.9);
box-shadow: 0 0 0 3px rgba(127, 169, 33, 0.1);
}
input::placeholder,
textarea::placeholder {
color: var(--text-muted);
}
textarea {
min-height: 80px;
resize: vertical;
}
/* Status List */
.status-list {
display: flex;
flex-direction: column;
gap: var(--space-sm);
}
.status-item {
display: flex;
justify-content: space-between;
align-items: center;
padding: var(--space-sm) var(--space-md);
font-size: 0.875rem;
background: rgba(255, 255, 255, 0.5);
border-radius: var(--radius-sm);
}
.status-badge {
padding: var(--space-xs) var(--space-md);
border-radius: var(--radius-full);
font-size: 0.6875rem;
font-weight: 700;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.status-success {
background: var(--accent-dim);
color: var(--accent);
}
.status-warning {
background: rgba(245, 158, 11, 0.15);
color: var(--warning);
}
.status-error {
background: rgba(233, 20, 41, 0.15);
color: var(--error);
}
.status-pending {
background: rgba(255, 255, 255, 0.05);
color: var(--text-muted);
}
/* ==========================================================================
6. Output & Metrics
========================================================================== */
.output {
padding: 1rem;
font-family: var(--font-mono);
font-size: 0.75rem;
overflow-x: auto;
max-height: 200px;
overflow-y: auto;
line-height: 1.7;
background: var(--color-dark);
border-radius: var(--radius-md);
color: var(--color-light);
}
.output pre {
white-space: pre-wrap;
word-break: break-word;
margin: 0;
}
.output .success { color: var(--accent); }
.output .error { color: #e8806e; }
.output .info { color: var(--color-light); }
.output .warn { color: #e8c860; }
/* Metrics */
.metrics {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(90px, 1fr));
gap: var(--space-md);
}
.metric {
text-align: center;
padding: var(--space-md);
background: rgba(255, 255, 255, 0.5);
border-radius: var(--radius-md);
transition: background 0.2s;
}
.metric:hover {
background: rgba(255, 255, 255, 0.7);
}
.metric-value {
font-size: 1.5rem;
font-weight: 700;
color: var(--accent);
font-variant-numeric: tabular-nums;
}
.metric-label {
font-size: 0.6875rem;
color: var(--text-muted);
margin-top: var(--space-xs);
text-transform: uppercase;
letter-spacing: 0.1em;
font-weight: 700;
}
/* ==========================================================================
7. Modal
========================================================================== */
.modal-overlay {
position: fixed;
inset: 0;
z-index: 1000;
display: none;
align-items: center;
justify-content: center;
padding: 2rem;
background: rgba(66, 63, 61, 0.6);
backdrop-filter: blur(8px);
}
.modal-overlay.active {
display: flex;
animation: fadeIn 0.2s ease;
}
.modal {
position: relative;
width: 100%;
max-width: 1200px;
height: 90vh;
overflow: hidden;
background: var(--color-light);
border-radius: var(--radius-xl);
box-shadow: 0 25px 80px -20px rgba(0, 0, 0, 0.3);
animation: slideUp 0.3s ease;
}
.modal-header {
display: flex;
align-items: center;
justify-content: space-between;
padding: var(--space-lg) var(--space-xl);
background: rgba(255, 255, 255, 0.5);
border-bottom: 1px solid rgba(66, 63, 61, 0.1);
}
.modal-title {
font-size: 1rem;
font-weight: 700;
color: var(--text-primary);
}
.modal-close {
width: 32px;
height: 32px;
padding: 0;
display: flex;
align-items: center;
justify-content: center;
font-size: 1.25rem;
line-height: 1;
cursor: pointer;
background: rgba(196, 92, 74, 0.1);
border: none;
border-radius: var(--radius-full);
color: var(--error);
transition: all 0.2s;
}
.modal-close:hover {
background: rgba(196, 92, 74, 0.2);
color: var(--error);
transform: scale(1.1);
}
.modal-frame {
width: 100%;
height: calc(100% - 60px);
border: none;
background: var(--color-dark);
}
/* ==========================================================================
8. Footer
========================================================================== */
footer {
text-align: center;
padding: 2rem;
color: var(--text-muted);
font-size: 0.8125rem;
}
footer a {
color: var(--text-secondary);
text-decoration: none;
transition: color 0.2s;
}
footer a:hover {
color: var(--accent);
}
/* ==========================================================================
9. Utilities
========================================================================== */
.hidden { display: none !important; }
.mt-1 { margin-top: var(--space-sm); }
.mt-2 { margin-top: var(--space-md); }
.mt-3 { margin-top: var(--space-lg); }
.mb-1 { margin-bottom: var(--space-sm); }
.mb-2 { margin-bottom: var(--space-md); }
/* Loader */
.loader {
display: inline-block;
width: 14px;
height: 14px;
border: 2px solid rgba(127, 169, 33, 0.3);
border-top-color: var(--accent);
border-radius: 50%;
animation: spin 0.8s linear infinite;
vertical-align: middle;
margin-right: var(--space-sm);
}
/* ==========================================================================
10. Animations
========================================================================== */
@keyframes spin {
to { transform: rotate(360deg); }
}
@keyframes fadeIn {
from { opacity: 0; }
to { opacity: 1; }
}
@keyframes slideUp {
from { opacity: 0; transform: translateY(20px); }
to { opacity: 1; transform: translateY(0); }
}
/* Scrollbar */
::-webkit-scrollbar {
width: 8px;
height: 8px;
}
::-webkit-scrollbar-track {
background: rgba(0, 0, 0, 0.05);
border-radius: 4px;
}
::-webkit-scrollbar-thumb {
background: rgba(66, 63, 61, 0.3);
border-radius: 4px;
}
::-webkit-scrollbar-thumb:hover {
background: rgba(66, 63, 61, 0.5);
}
/* ==========================================================================
11. Responsive
========================================================================== */
@media (max-width: 1024px) {
.span-3,
.span-4,
.span-5 { grid-column: span 6; }
.span-7,
.span-8 { grid-column: span 12; }
}
@media (max-width: 768px) {
.container { padding: var(--space-lg); }
h1 { font-size: 2rem; }
header { padding: 2rem 1.5rem; }
.bento-grid { grid-template-columns: 1fr; }
.span-3,
.span-4,
.span-5,
.span-6,
.span-7,
.span-8,
.span-12 { grid-column: span 1; }
button {
padding: 0.625rem 1.5rem;
}
.modal { height: 95vh; }
.modal-overlay { padding: var(--space-lg); }
}
/* ==========================================================================
12. SAM Interactive Segmentation
========================================================================== */
.sam-container {
position: relative;
width: 100%;
min-height: 250px;
border-radius: var(--radius-md);
overflow: hidden;
background: var(--color-dark);
}
.sam-upload {
position: absolute;
inset: 0;
display: flex;
align-items: center;
justify-content: center;
cursor: pointer;
border: 2px dashed rgba(255, 255, 255, 0.3);
border-radius: var(--radius-md);
transition: all 0.3s;
}
.sam-upload:hover {
border-color: var(--accent);
background: rgba(127, 169, 33, 0.1);
}
.sam-upload-content {
display: flex;
flex-direction: column;
align-items: center;
gap: var(--space-sm);
color: var(--color-light);
opacity: 0.7;
}
.sam-upload-icon {
font-size: 2.5rem;
}
.sam-workspace {
position: relative;
width: 100%;
height: 250px;
}
.sam-workspace canvas {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
object-fit: contain;
}
#sam-canvas {
z-index: 1;
}
#sam-mask-canvas {
z-index: 2;
pointer-events: none;
opacity: 0.5;
}
/* Click indicator */
.sam-point {
position: absolute;
width: 16px;
height: 16px;
border-radius: 50%;
transform: translate(-50%, -50%);
z-index: 3;
pointer-events: none;
animation: pointPulse 0.3s ease-out;
}
.sam-point.positive {
background: var(--accent);
box-shadow: 0 0 0 3px rgba(127, 169, 33, 0.3);
}
.sam-point.negative {
background: var(--error);
box-shadow: 0 0 0 3px rgba(196, 92, 74, 0.3);
}
@keyframes pointPulse {
0% { transform: translate(-50%, -50%) scale(0); opacity: 0; }
50% { transform: translate(-50%, -50%) scale(1.2); }
100% { transform: translate(-50%, -50%) scale(1); opacity: 1; }
}
/* ==========================================================================
13. AI Chat
========================================================================== */
.chat-container {
display: flex;
flex-direction: column;
height: 400px;
min-height: 300px;
background: var(--color-dark);
border-radius: var(--radius-md);
overflow: hidden;
}
.chat-messages {
flex: 1;
overflow-y: auto;
padding: var(--space-md);
display: flex;
flex-direction: column;
gap: var(--space-sm);
}
.chat-welcome {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
height: 100%;
text-align: center;
color: var(--color-light);
opacity: 0.7;
}
.chat-welcome-icon {
font-size: 2.5rem;
margin-bottom: var(--space-sm);
}
.chat-welcome p {
margin: var(--space-xs) 0;
font-size: 0.875rem;
}
.chat-welcome-hint {
opacity: 0.6;
font-size: 0.75rem !important;
}
.chat-message {
max-width: 85%;
padding: var(--space-sm) var(--space-md);
border-radius: var(--radius-md);
font-size: 0.875rem;
line-height: 1.5;
animation: messageSlide 0.2s ease-out;
}
@keyframes messageSlide {
from { opacity: 0; transform: translateY(10px); }
to { opacity: 1; transform: translateY(0); }
}
.chat-message.user {
align-self: flex-end;
background: var(--accent);
color: white;
border-bottom-right-radius: 4px;
}
.chat-message.assistant {
align-self: flex-start;
background: rgba(255, 255, 255, 0.1);
color: var(--color-light);
border-bottom-left-radius: 4px;
}
.chat-message.assistant.typing::after {
content: '▋';
animation: blink 0.7s infinite;
}
@keyframes blink {
0%, 100% { opacity: 1; }
50% { opacity: 0; }
}
.chat-input-container {
display: flex;
gap: var(--space-sm);
padding: var(--space-sm);
background: rgba(0, 0, 0, 0.2);
}
.chat-input {
flex: 1;
padding: var(--space-sm) var(--space-md);
background: rgba(255, 255, 255, 0.1);
border: 1px solid rgba(255, 255, 255, 0.1);
border-radius: var(--radius-full);
color: var(--color-light);
font-size: 0.875rem;
}
.chat-input:focus {
outline: none;
border-color: var(--accent);
background: rgba(255, 255, 255, 0.15);
}
.chat-input::placeholder {
color: rgba(255, 255, 255, 0.4);
}
.chat-input-container button {
padding: var(--space-sm) var(--space-lg);
font-size: 0.75rem;
}
.chat-status {
display: flex;
align-items: center;
gap: var(--space-sm);
padding: var(--space-sm) 0;
font-size: 0.75rem;
color: var(--text-muted);
}
.chat-status-dot {
width: 8px;
height: 8px;
border-radius: 50%;
background: var(--accent);
animation: pulse 2s infinite;
}
@keyframes pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.5; }
}
.chat-status-dot.loading {
background: var(--warning);
animation: pulse 0.5s infinite;
}
.chat-status-dot.error {
background: var(--error);
animation: none;
}
/* ==========================================================================
14. Model Loader
========================================================================== */
.model-loader {
display: flex;
align-items: center;
justify-content: center;
min-height: 250px;
background: var(--color-dark);
border-radius: var(--radius-md);
padding: var(--space-xl);
}
.loader-content {
text-align: center;
max-width: 300px;
}
.loader-spinner {
width: 48px;
height: 48px;
margin: 0 auto var(--space-lg);
border: 3px solid rgba(127, 169, 33, 0.2);
border-top-color: var(--accent);
border-radius: 50%;
animation: spin 1s linear infinite;
}
.loader-text {
color: var(--color-light);
font-size: 0.875rem;
margin-bottom: var(--space-sm);
}
.loader-detail {
color: var(--text-muted);
font-size: 0.75rem;
font-family: var(--font-mono);
}
.loader-info {
color: var(--color-light);
font-size: 0.875rem;
margin-bottom: var(--space-lg);
line-height: 1.6;
}
.loader-info p {
margin: var(--space-xs) 0;
}
.loader-warning {
color: var(--warning) !important;
font-size: 0.75rem !important;
opacity: 0.9;
}
.loader-btn {
margin-bottom: var(--space-lg);
}
/* Progress Bar */
.progress-bar {
width: 100%;
height: 8px;
background: rgba(255, 255, 255, 0.1);
border-radius: var(--radius-full);
overflow: hidden;
margin: var(--space-md) 0;
}
.progress-fill {
height: 100%;
background: linear-gradient(90deg, var(--accent), var(--accent-hover));
border-radius: var(--radius-full);
width: 0%;
transition: width 0.3s ease;
}
.progress-fill.downloading {
background: linear-gradient(90deg, var(--accent), var(--accent-hover));
animation: progressPulse 1.5s ease-in-out infinite;
}
@keyframes progressPulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.7; }
}
/* Model loader states */
.model-loader.loading .loader-spinner {
display: block;
}
.model-loader.ready {
display: none;
}
/* Success state for SAM */
.sam-ready .model-loader {
display: none;
}
.sam-ready .sam-container {
display: block !important;
}
================================================
FILE: dist/backends/index.d.ts
================================================
/**
* edgeFlow.js - Backend Exports
*/
export { WebGPURuntime, createWebGPURuntime } from './webgpu.js';
export { WebNNRuntime, createWebNNRuntime } from './webnn.js';
export { WASMRuntime, createWASMRuntime } from './wasm.js';
export { ONNXRuntime, createONNXRuntime, isOnnxAvailable } from './onnx.js';
export { TransformersAdapterRuntime, useTransformersBackend, getTransformersAdapter, type TransformersAdapterOptions, type TransformersPipelineFactory, } from './transformers-adapter.js';
export type { Runtime, RuntimeType, RuntimeCapabilities } from '../core/types.js';
/**
* Register all available backends.
*
* Always registers the ONNX Runtime factory synchronously so there is no
* async race between registration and the first pipeline() call.
* `ONNXRuntime.isAvailable()` is called lazily by RuntimeManager when it
* selects a backend, so if onnxruntime-web is not installed the runtime is
* simply skipped at that point.
*/
export declare function registerAllBackends(): void;
//# sourceMappingURL=index.d.ts.map
================================================
FILE: dist/backends/index.js
================================================
/**
* edgeFlow.js - Backend Exports
*/
// WebGPU Backend (planned - skeleton only)
export { WebGPURuntime, createWebGPURuntime } from './webgpu.js';
// WebNN Backend (planned - skeleton only)
export { WebNNRuntime, createWebNNRuntime } from './webnn.js';
// WASM Backend (basic tensor ops)
export { WASMRuntime, createWASMRuntime } from './wasm.js';
// ONNX Runtime Backend (real model inference)
export { ONNXRuntime, createONNXRuntime, isOnnxAvailable } from './onnx.js';
// transformers.js Adapter Backend
export { TransformersAdapterRuntime, useTransformersBackend, getTransformersAdapter, } from './transformers-adapter.js';
import { registerRuntime } from '../core/runtime.js';
import { createONNXRuntime } from './onnx.js';
/**
* Register all available backends.
*
* Always registers the ONNX Runtime factory synchronously so there is no
* async race between registration and the first pipeline() call.
* `ONNXRuntime.isAvailable()` is called lazily by RuntimeManager when it
* selects a backend, so if onnxruntime-web is not installed the runtime is
* simply skipped at that point.
*/
export function registerAllBackends() {
registerRuntime('wasm', createONNXRuntime);
}
/**
* Auto-register backends on module load (synchronous — no race condition).
*/
registerAllBackends();
//# sourceMappingURL=index.js.map
================================================
FILE: dist/backends/onnx.d.ts
================================================
/**
* edgeFlow.js - ONNX Runtime Backend
*
* Uses onnxruntime-web for real ONNX model inference.
* onnxruntime-web is an optional peer dependency loaded dynamically.
*/
import { Runtime, RuntimeType, RuntimeCapabilities, LoadedModel, ModelLoadOptions, Tensor } from '../core/types.js';
/**
* Check whether onnxruntime-web is importable.
*/
export declare function isOnnxAvailable(): Promise;
/**
* ONNXRuntime - Real ONNX model inference using onnxruntime-web
*/
export declare class ONNXRuntime implements Runtime {
readonly name: RuntimeType;
private initialized;
private executionProvider;
get capabilities(): RuntimeCapabilities;
/**
* Check if ONNX Runtime is available (peer dependency installed)
*/
isAvailable(): Promise;
/**
* Initialize the ONNX runtime
*/
initialize(): Promise;
/**
* Load a model from ArrayBuffer
*/
loadModel(modelData: ArrayBuffer, options?: ModelLoadOptions): Promise;
/**
* Run inference
*/
run(model: LoadedModel, inputs: Tensor[]): Promise;
/**
* Run inference with named inputs
*/
runNamed(model: LoadedModel, namedInputs: Map): Promise;
/**
* Unload a model
*/
private unloadModel;
/**
* Dispose the runtime
*/
dispose(): void;
}
/**
* Create ONNX runtime factory
*/
export declare function createONNXRuntime(): Runtime;
//# sourceMappingURL=onnx.d.ts.map
================================================
FILE: dist/backends/onnx.js
================================================
/**
* edgeFlow.js - ONNX Runtime Backend
*
* Uses onnxruntime-web for real ONNX model inference.
* onnxruntime-web is an optional peer dependency loaded dynamically.
*/
import { EdgeFlowError, ErrorCodes, } from '../core/types.js';
import { LoadedModelImpl } from '../core/runtime.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { getMemoryManager } from '../core/memory.js';
// Lazy-loaded onnxruntime-web module
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let ort = null;
async function getOrt() {
if (ort)
return ort;
try {
// Import the WASM-only sub-path so Vite rewrites the bare specifier
// to ort.wasm.bundle.min.mjs. This avoids loading the JSEP/WebGPU
// worker module (jsep.mjs) that ort.bundle.min.mjs eagerly fetches
// whenever navigator.gpu exists — which causes a 404 in dev servers
// that restrict ES module imports from /public.
ort = await import('onnxruntime-web/wasm');
return ort;
}
catch {
return null;
}
}
/**
* Check whether onnxruntime-web is importable.
*/
export async function isOnnxAvailable() {
return (await getOrt()) != null;
}
const sessionStore = new Map();
// ============================================================================
// ONNX Runtime Implementation
// ============================================================================
/**
* ONNXRuntime - Real ONNX model inference using onnxruntime-web
*/
export class ONNXRuntime {
name = 'wasm'; // Register as wasm since it's the fallback
initialized = false;
executionProvider = 'wasm';
get capabilities() {
return {
concurrency: true,
quantization: true,
float16: this.executionProvider === 'webgpu',
dynamicShapes: true,
maxBatchSize: 32,
availableMemory: 512 * 1024 * 1024, // 512MB
};
}
/**
* Check if ONNX Runtime is available (peer dependency installed)
*/
async isAvailable() {
return isOnnxAvailable();
}
/**
* Initialize the ONNX runtime
*/
async initialize() {
if (this.initialized)
return;
const ortModule = await getOrt();
if (!ortModule) {
throw new EdgeFlowError('onnxruntime-web is not installed. Install it with: npm install onnxruntime-web', ErrorCodes.RUNTIME_NOT_AVAILABLE);
}
// Configure WASM backend for browser use.
// numThreads=1 disables multi-threading so ort only needs the plain
// .wasm binary — the worker .mjs file is never requested, which avoids
// Vite's restriction on importing files from /public as ES modules.
// Consumers should copy onnxruntime-web/dist/*.wasm to public/ort/.
if (typeof window !== 'undefined' && ortModule.env?.wasm) {
ortModule.env.wasm.wasmPaths = '/ort/';
ortModule.env.wasm.numThreads = 1;
}
this.initialized = true;
}
/**
* Load a model from ArrayBuffer
*/
async loadModel(modelData, options = {}) {
if (!this.initialized) {
await this.initialize();
}
try {
const ortModule = await getOrt();
if (!ortModule) {
throw new Error('onnxruntime-web is not installed');
}
// WASM-only execution provider — WebGPU acceleration can be added
// later via the dedicated WebGPURuntime backend.
const sessionOptions = {
executionProviders: ['wasm'],
graphOptimizationLevel: 'all',
};
const modelBytes = new Uint8Array(modelData);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const session = await ortModule.InferenceSession.create(modelBytes, sessionOptions);
// Get input/output names
const inputNames = session.inputNames;
const outputNames = session.outputNames;
// Generate model ID
const modelId = `onnx_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
// Store session
sessionStore.set(modelId, {
session,
inputNames: [...inputNames],
outputNames: [...outputNames],
});
// Create metadata
const metadata = {
name: options.metadata?.name ?? 'onnx-model',
version: '1.0.0',
inputs: inputNames.map((name) => ({
name,
dtype: 'float32',
shape: [-1], // Dynamic shape
})),
outputs: outputNames.map((name) => ({
name,
dtype: 'float32',
shape: [-1],
})),
sizeBytes: modelData.byteLength,
quantization: options.quantization ?? 'float32',
format: 'onnx',
};
// Create model instance
const model = new LoadedModelImpl(metadata, 'wasm', () => this.unloadModel(modelId));
// Override the ID to match our stored session
Object.defineProperty(model, 'id', { value: modelId, writable: false });
// Track in memory manager
getMemoryManager().trackModel(model, () => model.dispose());
return model;
}
catch (error) {
throw new EdgeFlowError(`Failed to load ONNX model: ${error instanceof Error ? error.message : String(error)}`, ErrorCodes.MODEL_LOAD_FAILED, { error });
}
}
/**
* Run inference
*/
async run(model, inputs) {
const sessionData = sessionStore.get(model.id);
if (!sessionData) {
throw new EdgeFlowError(`ONNX session not found for model ${model.id}`, ErrorCodes.MODEL_NOT_LOADED, { modelId: model.id });
}
const { session, inputNames, outputNames } = sessionData;
try {
const ortModule = await getOrt();
const feeds = {};
for (let i = 0; i < Math.min(inputs.length, inputNames.length); i++) {
const inputName = inputNames[i];
const inputTensor = inputs[i];
if (inputName && inputTensor) {
const dtype = inputTensor.dtype;
let ortTensor;
if (dtype === 'int64') {
const data = inputTensor.data;
ortTensor = new ortModule.Tensor('int64', data, inputTensor.shape);
}
else if (dtype === 'int32') {
const data = inputTensor.data;
ortTensor = new ortModule.Tensor('int32', data, inputTensor.shape);
}
else {
const data = inputTensor.toFloat32Array();
ortTensor = new ortModule.Tensor('float32', data, inputTensor.shape);
}
feeds[inputName] = ortTensor;
}
}
const results = await session.run(feeds);
// Convert outputs to EdgeFlowTensor
const outputs = [];
for (const outputName of outputNames) {
const ortTensor = results[outputName];
if (ortTensor) {
const data = ortTensor.data;
const shape = Array.from(ortTensor.dims).map(d => Number(d));
outputs.push(new EdgeFlowTensor(new Float32Array(data), shape, 'float32'));
}
}
return outputs;
}
catch (error) {
throw new EdgeFlowError(`ONNX inference failed: ${error instanceof Error ? error.message : String(error)}`, ErrorCodes.INFERENCE_FAILED, { modelId: model.id, error });
}
}
/**
* Run inference with named inputs
*/
async runNamed(model, namedInputs) {
const sessionData = sessionStore.get(model.id);
if (!sessionData) {
throw new EdgeFlowError(`ONNX session not found for model ${model.id}`, ErrorCodes.MODEL_NOT_LOADED, { modelId: model.id });
}
const { session, inputNames, outputNames } = sessionData;
try {
const ortModule = await getOrt();
const feeds = {};
for (const [inputName, inputTensor] of namedInputs) {
const tensor = inputTensor;
const dtype = tensor.dtype;
let ortTensor;
if (dtype === 'int64') {
const data = tensor.data;
ortTensor = new ortModule.Tensor('int64', data, tensor.shape);
}
else if (dtype === 'int32') {
const data = tensor.data;
ortTensor = new ortModule.Tensor('int32', data, tensor.shape);
}
else {
const data = tensor.toFloat32Array();
ortTensor = new ortModule.Tensor('float32', data, tensor.shape);
}
feeds[inputName] = ortTensor;
}
const results = await session.run(feeds);
// Convert outputs to EdgeFlowTensor
const outputs = [];
for (const outputName of outputNames) {
const ortTensor = results[outputName];
if (ortTensor) {
const data = ortTensor.data;
const shape = Array.from(ortTensor.dims).map(d => Number(d));
outputs.push(new EdgeFlowTensor(new Float32Array(data), shape, 'float32'));
}
}
return outputs;
}
catch (error) {
throw new EdgeFlowError(`ONNX inference failed: ${error instanceof Error ? error.message : String(error)}`, ErrorCodes.INFERENCE_FAILED, { modelId: model.id, expectedInputs: inputNames, providedInputs: Array.from(namedInputs.keys()), error });
}
}
/**
* Unload a model
*/
async unloadModel(modelId) {
const sessionData = sessionStore.get(modelId);
if (sessionData) {
// Release session will be handled by GC
sessionStore.delete(modelId);
}
}
/**
* Dispose the runtime
*/
dispose() {
// Clear all sessions
sessionStore.clear();
this.initialized = false;
}
}
/**
* Create ONNX runtime factory
*/
export function createONNXRuntime() {
return new ONNXRuntime();
}
//# sourceMappingURL=onnx.js.map
================================================
FILE: dist/backends/transformers-adapter.d.ts
================================================
/**
* edgeFlow.js - transformers.js Adapter Backend
*
* Wraps transformers.js (by Hugging Face) as an inference backend, giving
* users access to 1000+ HuggingFace models while adding edgeFlow.js's
* orchestration layer (scheduling, caching, memory management, workers).
*
* @example
* ```typescript
* import { useTransformersBackend } from 'edgeflowjs';
* import { pipeline as tfPipeline } from '@xenova/transformers';
*
* // Register the adapter
* useTransformersBackend();
*
* // Now use edgeFlow.js pipeline API — inference delegates to transformers.js
* const classifier = await pipeline('text-classification', {
* model: 'Xenova/distilbert-base-uncased-finetuned-sst-2-english',
* });
*
* // edgeFlow.js handles scheduling, batching, memory, caching
* const results = await classifier.runBatch(thousandsOfTexts);
* ```
*/
import { Runtime, RuntimeType, RuntimeCapabilities, LoadedModel, ModelLoadOptions, Tensor } from '../core/types.js';
/**
* Minimal interface for a transformers.js pipeline instance.
* We avoid importing @xenova/transformers directly so edgeFlow.js
* does not add it as a hard dependency.
*/
interface TransformersPipelineInstance {
(input: unknown, options?: unknown): Promise;
dispose?: () => Promise | void;
}
/**
* A factory that creates a transformers.js pipeline.
* Users pass this so we don't hard-depend on the library.
*/
export type TransformersPipelineFactory = (task: string, model?: string, options?: Record) => Promise;
/**
* Options for configuring the transformers.js adapter.
*/
export interface TransformersAdapterOptions {
/** The pipeline factory from transformers.js (e.g. the `pipeline` function) */
pipelineFactory: TransformersPipelineFactory;
/** Default device ('webgpu' | 'wasm' | 'cpu') — passed to transformers.js */
device?: string;
/** Default dtype ('fp32' | 'fp16' | 'q8' | 'q4') */
dtype?: string;
/** Cache directory (browser IndexedDB path) */
cacheDir?: string;
}
export declare class TransformersAdapterRuntime implements Runtime {
readonly name: RuntimeType;
get capabilities(): RuntimeCapabilities;
isAvailable(): Promise;
initialize(): Promise;
loadModel(modelData: ArrayBuffer, options?: ModelLoadOptions): Promise;
/**
* Load a transformers.js pipeline by task + model name
* (called by the higher-level adapter pipeline, not via the
* standard loadModel path).
*/
loadPipeline(task: string, model: string, pipelineOptions?: Record): Promise;
/**
* Run inference by passing the raw input to the transformers.js pipeline.
* The result is returned as a single EdgeFlowTensor wrapping the JSON-encoded output
* (since transformers.js returns task-specific objects, not raw tensors).
*/
run(model: LoadedModel, inputs: Tensor[]): Promise;
/**
* High-level: run the transformers.js pipeline directly with arbitrary input.
* Returns the raw result object (not a tensor).
*/
runDirect(modelId: string, input: unknown, options?: Record): Promise;
dispose(): void;
}
/**
* Register the transformers.js adapter as the default inference backend.
*
* @example
* ```typescript
* import { pipeline } from '@xenova/transformers';
* import { useTransformersBackend } from 'edgeflowjs';
*
* useTransformersBackend({
* pipelineFactory: pipeline,
* device: 'webgpu',
* dtype: 'fp16',
* });
* ```
*/
export declare function useTransformersBackend(options: TransformersAdapterOptions): void;
/**
* Get the adapter runtime instance (for advanced use).
*/
export declare function getTransformersAdapter(): TransformersAdapterRuntime | null;
export {};
//# sourceMappingURL=transformers-adapter.d.ts.map
================================================
FILE: dist/backends/transformers-adapter.js
================================================
/**
* edgeFlow.js - transformers.js Adapter Backend
*
* Wraps transformers.js (by Hugging Face) as an inference backend, giving
* users access to 1000+ HuggingFace models while adding edgeFlow.js's
* orchestration layer (scheduling, caching, memory management, workers).
*
* @example
* ```typescript
* import { useTransformersBackend } from 'edgeflowjs';
* import { pipeline as tfPipeline } from '@xenova/transformers';
*
* // Register the adapter
* useTransformersBackend();
*
* // Now use edgeFlow.js pipeline API — inference delegates to transformers.js
* const classifier = await pipeline('text-classification', {
* model: 'Xenova/distilbert-base-uncased-finetuned-sst-2-english',
* });
*
* // edgeFlow.js handles scheduling, batching, memory, caching
* const results = await classifier.runBatch(thousandsOfTexts);
* ```
*/
import { EdgeFlowError, ErrorCodes, } from '../core/types.js';
import { LoadedModelImpl } from '../core/runtime.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { getMemoryManager } from '../core/memory.js';
import { registerRuntime } from '../core/runtime.js';
// ---------------------------------------------------------------------------
// Session store: maps model IDs to transformers.js pipeline instances
// ---------------------------------------------------------------------------
const sessionStore = new Map();
let adapterOptions = null;
// ---------------------------------------------------------------------------
// Runtime implementation
// ---------------------------------------------------------------------------
export class TransformersAdapterRuntime {
name = 'wasm'; // registers under the wasm slot
get capabilities() {
return {
concurrency: true,
quantization: true,
float16: true,
dynamicShapes: true,
maxBatchSize: 128,
availableMemory: 1024 * 1024 * 1024,
};
}
async isAvailable() {
return adapterOptions?.pipelineFactory != null;
}
async initialize() {
if (!adapterOptions?.pipelineFactory) {
throw new EdgeFlowError('TransformersAdapterRuntime requires a pipelineFactory. ' +
'Call useTransformersBackend({ pipelineFactory }) first.', ErrorCodes.RUNTIME_INIT_FAILED);
}
}
async loadModel(modelData, options = {}) {
// modelData is unused — transformers.js downloads its own models.
// Instead the model identifier comes via metadata.name or the URL.
const modelName = options.metadata?.name ?? 'default';
const metadata = {
name: modelName,
version: '1.0.0',
inputs: [{ name: 'input', dtype: 'float32', shape: [-1] }],
outputs: [{ name: 'output', dtype: 'float32', shape: [-1] }],
sizeBytes: modelData.byteLength || 0,
quantization: options.quantization ?? 'float32',
format: 'onnx',
};
const modelId = `tjs_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 6)}`;
const model = new LoadedModelImpl(metadata, this.name, () => {
const session = sessionStore.get(modelId);
if (session?.instance.dispose) {
session.instance.dispose();
}
sessionStore.delete(modelId);
});
getMemoryManager().trackModel(model, () => model.dispose());
return model;
}
/**
* Load a transformers.js pipeline by task + model name
* (called by the higher-level adapter pipeline, not via the
* standard loadModel path).
*/
async loadPipeline(task, model, pipelineOptions) {
if (!adapterOptions?.pipelineFactory) {
throw new EdgeFlowError('Adapter not initialised', ErrorCodes.RUNTIME_NOT_INITIALIZED);
}
const opts = { ...pipelineOptions };
if (adapterOptions.device)
opts['device'] = adapterOptions.device;
if (adapterOptions.dtype)
opts['dtype'] = adapterOptions.dtype;
const instance = await adapterOptions.pipelineFactory(task, model, opts);
const modelId = `tjs_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 6)}`;
sessionStore.set(modelId, { instance, task, model });
return modelId;
}
/**
* Run inference by passing the raw input to the transformers.js pipeline.
* The result is returned as a single EdgeFlowTensor wrapping the JSON-encoded output
* (since transformers.js returns task-specific objects, not raw tensors).
*/
async run(model, inputs) {
const session = sessionStore.get(model.id);
if (!session) {
throw new EdgeFlowError(`No transformers.js session for model ${model.id}`, ErrorCodes.MODEL_NOT_LOADED);
}
// Reconstruct input from tensor (simple: use the float data as-is)
const inputData = inputs[0]?.toFloat32Array() ?? new Float32Array(0);
const result = await session.instance(inputData);
// Wrap the result in a tensor — downstream pipelines can interpret it
const resultArray = Array.isArray(result)
? new Float32Array(result.flat(Infinity))
: new Float32Array([0]);
return [new EdgeFlowTensor(resultArray, [resultArray.length], 'float32')];
}
/**
* High-level: run the transformers.js pipeline directly with arbitrary input.
* Returns the raw result object (not a tensor).
*/
async runDirect(modelId, input, options) {
const session = sessionStore.get(modelId);
if (!session) {
throw new EdgeFlowError(`No transformers.js session for model ${modelId}`, ErrorCodes.MODEL_NOT_LOADED);
}
return session.instance(input, options);
}
dispose() {
for (const [id, session] of sessionStore) {
if (session.instance.dispose) {
session.instance.dispose();
}
sessionStore.delete(id);
}
}
}
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
let adapterRuntime = null;
/**
* Register the transformers.js adapter as the default inference backend.
*
* @example
* ```typescript
* import { pipeline } from '@xenova/transformers';
* import { useTransformersBackend } from 'edgeflowjs';
*
* useTransformersBackend({
* pipelineFactory: pipeline,
* device: 'webgpu',
* dtype: 'fp16',
* });
* ```
*/
export function useTransformersBackend(options) {
adapterOptions = options;
adapterRuntime = new TransformersAdapterRuntime();
registerRuntime('wasm', () => adapterRuntime);
}
/**
* Get the adapter runtime instance (for advanced use).
*/
export function getTransformersAdapter() {
return adapterRuntime;
}
//# sourceMappingURL=transformers-adapter.js.map
================================================
FILE: dist/backends/wasm.d.ts
================================================
/**
* edgeFlow.js - WebAssembly Backend
*
* Pure WASM runtime for universal browser support.
* Features:
* - Universal compatibility
* - SIMD acceleration when available
* - Memory-efficient execution
*/
import { Runtime, RuntimeType, RuntimeCapabilities, LoadedModel, ModelLoadOptions, Tensor } from '../core/types.js';
/**
* WASMRuntime - Pure WebAssembly inference runtime
*/
export declare class WASMRuntime implements Runtime {
readonly name: RuntimeType;
private module;
private simdSupported;
private models;
private initialized;
get capabilities(): RuntimeCapabilities;
/**
* Check if WASM is available
*/
isAvailable(): Promise;
/**
* Initialize the WASM runtime
*/
initialize(): Promise;
/**
* Check SIMD support
*/
private checkSIMDSupport;
/**
* Create JavaScript fallback for WASM operations
*/
private createJSFallback;
/**
* Load a model
*/
loadModel(modelData: ArrayBuffer, options?: ModelLoadOptions): Promise;
/**
* Run inference
*/
run(model: LoadedModel, inputs: Tensor[]): Promise;
/**
* Execute model
*/
private executeModel;
/**
* Parse model configuration
*/
private parseModelConfig;
/**
* Load weights into WASM memory
*/
private loadWeights;
/**
* Unload a model
*/
private unloadModel;
/**
* Ensure runtime is initialized
*/
private ensureInitialized;
/**
* Check if SIMD is supported
*/
hasSIMDSupport(): boolean;
/**
* Dispose the runtime
*/
dispose(): void;
}
/**
* Create WASM runtime factory
*/
export declare function createWASMRuntime(): Runtime;
//# sourceMappingURL=wasm.d.ts.map
================================================
FILE: dist/backends/wasm.js
================================================
/**
* edgeFlow.js - WebAssembly Backend
*
* Pure WASM runtime for universal browser support.
* Features:
* - Universal compatibility
* - SIMD acceleration when available
* - Memory-efficient execution
*/
import { EdgeFlowError, ErrorCodes, } from '../core/types.js';
import { LoadedModelImpl } from '../core/runtime.js';
import { EdgeFlowTensor, softmax as tensorSoftmax, relu as tensorRelu, sigmoid as tensorSigmoid } from '../core/tensor.js';
import { getMemoryManager } from '../core/memory.js';
// ============================================================================
// WASM Runtime Implementation
// ============================================================================
/**
* WASMRuntime - Pure WebAssembly inference runtime
*/
export class WASMRuntime {
name = 'wasm';
module = null;
simdSupported = false;
models = new Map();
initialized = false;
get capabilities() {
return {
concurrency: false, // WASM is single-threaded by default
quantization: true,
float16: false,
dynamicShapes: true,
maxBatchSize: 16,
availableMemory: 128 * 1024 * 1024, // 128MB default
};
}
/**
* Check if WASM is available
*/
async isAvailable() {
if (typeof WebAssembly === 'undefined')
return false;
try {
// Check if we can instantiate a minimal WASM module
const bytes = new Uint8Array([
0x00, 0x61, 0x73, 0x6d, // Magic number
0x01, 0x00, 0x00, 0x00, // Version
]);
await WebAssembly.instantiate(bytes);
return true;
}
catch {
return false;
}
}
/**
* Initialize the WASM runtime
*/
async initialize() {
if (this.initialized)
return;
// Check SIMD support
this.simdSupported = await this.checkSIMDSupport();
// Create memory pool
const memory = new WebAssembly.Memory({
initial: 256, // 16MB initial
maximum: 2048, // 128MB maximum
});
// Compile and instantiate the WASM module
// In production, this would load an actual WASM binary
// For now, we use a pure JS fallback
this.module = {
memory,
exports: this.createJSFallback(memory),
};
this.initialized = true;
}
/**
* Check SIMD support
*/
async checkSIMDSupport() {
try {
// SIMD detection via feature detection
const simdTest = new Uint8Array([
0x00, 0x61, 0x73, 0x6d, 0x01, 0x00, 0x00, 0x00,
0x01, 0x05, 0x01, 0x60, 0x00, 0x01, 0x7b, 0x03,
0x02, 0x01, 0x00, 0x0a, 0x0a, 0x01, 0x08, 0x00,
0xfd, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x0b
]);
await WebAssembly.instantiate(simdTest);
return true;
}
catch {
return false;
}
}
/**
* Create JavaScript fallback for WASM operations
*/
createJSFallback(memory) {
let nextPtr = 0;
const allocations = new Map();
return {
malloc: (size) => {
const ptr = nextPtr;
nextPtr += size;
allocations.set(ptr, size);
return ptr;
},
free: (ptr) => {
allocations.delete(ptr);
},
matmul_f32: (aPtr, aRows, aCols, bPtr, _bRows, bCols, outPtr) => {
const view = new Float32Array(memory.buffer);
const aOffset = aPtr / 4;
const bOffset = bPtr / 4;
const outOffset = outPtr / 4;
for (let i = 0; i < aRows; i++) {
for (let j = 0; j < bCols; j++) {
let sum = 0;
for (let k = 0; k < aCols; k++) {
sum += (view[aOffset + i * aCols + k] ?? 0) * (view[bOffset + k * bCols + j] ?? 0);
}
view[outOffset + i * bCols + j] = sum;
}
}
},
add_f32: (aPtr, bPtr, outPtr, size) => {
const view = new Float32Array(memory.buffer);
const aOffset = aPtr / 4;
const bOffset = bPtr / 4;
const outOffset = outPtr / 4;
for (let i = 0; i < size; i++) {
view[outOffset + i] = (view[aOffset + i] ?? 0) + (view[bOffset + i] ?? 0);
}
},
mul_f32: (aPtr, bPtr, outPtr, size) => {
const view = new Float32Array(memory.buffer);
const aOffset = aPtr / 4;
const bOffset = bPtr / 4;
const outOffset = outPtr / 4;
for (let i = 0; i < size; i++) {
view[outOffset + i] = (view[aOffset + i] ?? 0) * (view[bOffset + i] ?? 0);
}
},
relu_f32: (inputPtr, outputPtr, size) => {
const view = new Float32Array(memory.buffer);
const inOffset = inputPtr / 4;
const outOffset = outputPtr / 4;
for (let i = 0; i < size; i++) {
view[outOffset + i] = Math.max(0, view[inOffset + i] ?? 0);
}
},
sigmoid_f32: (inputPtr, outputPtr, size) => {
const view = new Float32Array(memory.buffer);
const inOffset = inputPtr / 4;
const outOffset = outputPtr / 4;
for (let i = 0; i < size; i++) {
view[outOffset + i] = 1 / (1 + Math.exp(-(view[inOffset + i] ?? 0)));
}
},
softmax_f32: (inputPtr, outputPtr, size) => {
const view = new Float32Array(memory.buffer);
const inOffset = inputPtr / 4;
const outOffset = outputPtr / 4;
// Find max for numerical stability
let max = -Infinity;
for (let i = 0; i < size; i++) {
if ((view[inOffset + i] ?? 0) > max)
max = view[inOffset + i] ?? 0;
}
// Compute exp and sum
let sum = 0;
for (let i = 0; i < size; i++) {
view[outOffset + i] = Math.exp((view[inOffset + i] ?? 0) - max);
sum += view[outOffset + i] ?? 0;
}
// Normalize
for (let i = 0; i < size; i++) {
view[outOffset + i] = (view[outOffset + i] ?? 0) / sum;
}
},
};
}
/**
* Load a model
*/
async loadModel(modelData, options = {}) {
this.ensureInitialized();
// Parse model configuration
const config = this.parseModelConfig(modelData);
// Extract and store weights
const wasmData = {
weights: new Map(),
config,
executionOrder: config.layers.map(l => l.name),
};
// Load weights into memory
await this.loadWeights(modelData, wasmData);
const modelId = `wasm_${Date.now().toString(36)}`;
this.models.set(modelId, wasmData);
// Create metadata
const metadata = {
name: config.name || options.metadata?.name || 'unknown',
version: config.version || '1.0.0',
inputs: config.inputs.map(i => ({
name: i.name,
dtype: i.dtype,
shape: i.shape,
})),
outputs: config.outputs.map(o => ({
name: o.name,
dtype: o.dtype,
shape: o.shape,
})),
sizeBytes: modelData.byteLength,
quantization: options.quantization ?? 'float32',
format: 'edgeflow',
};
// Create model instance
const model = new LoadedModelImpl(metadata, 'wasm', () => this.unloadModel(modelId));
// Track in memory manager
getMemoryManager().trackModel(model, () => model.dispose());
return model;
}
/**
* Run inference
*/
async run(model, inputs) {
this.ensureInitialized();
// Execute model layers
return this.executeModel(inputs, model.metadata);
}
/**
* Execute model
*/
async executeModel(inputs, metadata) {
const outputs = [];
for (const outputSpec of metadata.outputs) {
const outputSize = outputSpec.shape.reduce((a, b) => a * b, 1);
// Process based on output requirements
// This is a simplified implementation
let outputTensor;
if (inputs.length > 0 && inputs[0]) {
const inputTensor = inputs[0];
// Apply transformations based on layer types
// For demo, apply softmax to classification outputs
if (outputSpec.name.includes('logits') || outputSpec.name.includes('class')) {
outputTensor = tensorSoftmax(inputTensor);
}
else if (outputSpec.name.includes('relu')) {
outputTensor = tensorRelu(inputTensor);
}
else if (outputSpec.name.includes('sigmoid')) {
outputTensor = tensorSigmoid(inputTensor);
}
else {
// Identity or feature extraction
const outputData = new Float32Array(outputSize);
const inputData = inputTensor.toFloat32Array();
for (let i = 0; i < Math.min(outputSize, inputData.length); i++) {
outputData[i] = inputData[i] ?? 0;
}
outputTensor = new EdgeFlowTensor(outputData, outputSpec.shape, 'float32');
}
}
else {
outputTensor = new EdgeFlowTensor(new Float32Array(outputSize), outputSpec.shape, 'float32');
}
outputs.push(outputTensor);
}
return outputs;
}
/**
* Parse model configuration
*/
parseModelConfig(data) {
try {
const decoder = new TextDecoder();
const text = decoder.decode(new Uint8Array(data, 0, Math.min(2048, data.byteLength)));
if (text.trim().startsWith('{')) {
let jsonEnd = text.indexOf('\n---\n');
if (jsonEnd === -1) {
// Try to parse as pure JSON
try {
return JSON.parse(text);
}
catch {
jsonEnd = data.byteLength;
}
}
const jsonStr = decoder.decode(new Uint8Array(data, 0, jsonEnd));
return JSON.parse(jsonStr);
}
}
catch {
// Not JSON format
}
return {
name: 'unknown',
version: '1.0.0',
layers: [],
inputs: [{ name: 'input', shape: [-1, 768], dtype: 'float32' }],
outputs: [{ name: 'output', shape: [-1, 768], dtype: 'float32' }],
};
}
/**
* Load weights into WASM memory
*/
async loadWeights(_modelData, _wasmData) {
// In a full implementation, extract and load weights
// This is a placeholder
}
/**
* Unload a model
*/
unloadModel(modelId) {
const modelData = this.models.get(modelId);
if (modelData && this.module) {
// Free weight buffers
for (const weight of modelData.weights.values()) {
this.module.exports.free(weight.ptr);
}
}
this.models.delete(modelId);
}
/**
* Ensure runtime is initialized
*/
ensureInitialized() {
if (!this.initialized || !this.module) {
throw new EdgeFlowError('WASM runtime is not initialized', ErrorCodes.RUNTIME_NOT_INITIALIZED);
}
}
/**
* Check if SIMD is supported
*/
hasSIMDSupport() {
return this.simdSupported;
}
/**
* Dispose the runtime
*/
dispose() {
// Free all model weights
for (const modelId of this.models.keys()) {
this.unloadModel(modelId);
}
this.module = null;
this.initialized = false;
}
}
/**
* Create WASM runtime factory
*/
export function createWASMRuntime() {
return new WASMRuntime();
}
//# sourceMappingURL=wasm.js.map
================================================
FILE: dist/backends/webgpu.d.ts
================================================
/**
* edgeFlow.js - WebGPU Backend
*
* **Status: Planned** - This is a skeleton implementation that initializes
* WebGPU and creates compute pipelines but does not perform real model
* inference. For GPU-accelerated inference, use the ONNX Runtime backend
* which supports WebGPU via its execution providers.
*
* This backend is intended for future custom WebGPU compute shader
* implementations that bypass ONNX Runtime for specialized ops.
*/
import { Runtime, RuntimeType, RuntimeCapabilities, LoadedModel, ModelLoadOptions, Tensor } from '../core/types.js';
declare global {
interface Navigator {
gpu?: GPU;
}
interface GPU {
requestAdapter(options?: GPURequestAdapterOptions): Promise;
}
interface GPURequestAdapterOptions {
powerPreference?: 'low-power' | 'high-performance';
}
interface GPUAdapter {
requestDevice(descriptor?: GPUDeviceDescriptor): Promise;
}
interface GPUDeviceDescriptor {
requiredFeatures?: string[];
requiredLimits?: Record;
}
interface GPUDevice {
limits: GPULimits;
lost: Promise;
createBuffer(descriptor: GPUBufferDescriptor): GPUBuffer;
createShaderModule(descriptor: GPUShaderModuleDescriptor): GPUShaderModule;
createBindGroupLayout(descriptor: GPUBindGroupLayoutDescriptor): GPUBindGroupLayout;
createPipelineLayout(descriptor: GPUPipelineLayoutDescriptor): GPUPipelineLayout;
createComputePipeline(descriptor: GPUComputePipelineDescriptor): GPUComputePipeline;
destroy(): void;
}
interface GPULimits {
maxBufferSize: number;
}
interface GPUDeviceLostInfo {
message: string;
reason: string;
}
interface GPUBuffer {
destroy(): void;
}
interface GPUShaderModule {
}
interface GPUBindGroupLayout {
}
interface GPUPipelineLayout {
}
interface GPUComputePipeline {
}
interface GPUBufferDescriptor {
size: number;
usage: number;
}
interface GPUShaderModuleDescriptor {
code: string;
}
interface GPUBindGroupLayoutDescriptor {
entries: GPUBindGroupLayoutEntry[];
}
interface GPUBindGroupLayoutEntry {
binding: number;
visibility: number;
buffer?: {
type: string;
};
}
interface GPUPipelineLayoutDescriptor {
bindGroupLayouts: GPUBindGroupLayout[];
}
interface GPUComputePipelineDescriptor {
layout: GPUPipelineLayout;
compute: {
module: GPUShaderModule;
entryPoint: string;
};
}
}
/**
* WebGPURuntime - GPU-accelerated inference runtime
*/
export declare class WebGPURuntime implements Runtime {
readonly name: RuntimeType;
private adapter;
private device;
private models;
private initialized;
get capabilities(): RuntimeCapabilities;
/**
* Check if WebGPU is available
*/
isAvailable(): Promise;
/**
* Initialize the WebGPU runtime
*/
initialize(): Promise;
/**
* Load a model
*/
loadModel(modelData: ArrayBuffer, options?: ModelLoadOptions): Promise;
/**
* Run inference
*/
run(model: LoadedModel, inputs: Tensor[]): Promise;
/**
* Execute model (simplified implementation)
*/
private executeModel;
/**
* Parse model data
*/
private parseModelData;
/**
* Upload weights to GPU
*/
private uploadWeights;
/**
* Create compute pipelines
*/
private createPipelines;
/**
* Unload a model
*/
private unloadModel;
/**
* Ensure runtime is initialized
*/
private ensureInitialized;
/**
* Dispose the runtime
*/
dispose(): void;
}
/**
* Create WebGPU runtime factory
*/
export declare function createWebGPURuntime(): Runtime;
//# sourceMappingURL=webgpu.d.ts.map
================================================
FILE: dist/backends/webgpu.js
================================================
/**
* edgeFlow.js - WebGPU Backend
*
* **Status: Planned** - This is a skeleton implementation that initializes
* WebGPU and creates compute pipelines but does not perform real model
* inference. For GPU-accelerated inference, use the ONNX Runtime backend
* which supports WebGPU via its execution providers.
*
* This backend is intended for future custom WebGPU compute shader
* implementations that bypass ONNX Runtime for specialized ops.
*/
import { EdgeFlowError, ErrorCodes, } from '../core/types.js';
import { LoadedModelImpl } from '../core/runtime.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { getMemoryManager } from '../core/memory.js';
// WebGPU constants
const GPUBufferUsage = {
STORAGE: 0x0080,
COPY_SRC: 0x0004,
COPY_DST: 0x0008,
MAP_READ: 0x0001,
};
const GPUShaderStage = {
COMPUTE: 0x0004,
};
// ============================================================================
// WebGPU Runtime Implementation
// ============================================================================
/**
* WebGPURuntime - GPU-accelerated inference runtime
*/
export class WebGPURuntime {
name = 'webgpu';
adapter = null;
device = null;
models = new Map();
initialized = false;
get capabilities() {
return {
concurrency: true,
quantization: true,
float16: true,
dynamicShapes: false,
maxBatchSize: 64,
availableMemory: this.device?.limits.maxBufferSize ?? 256 * 1024 * 1024,
};
}
/**
* Check if WebGPU is available
*/
async isAvailable() {
if (typeof navigator === 'undefined')
return false;
if (!navigator.gpu)
return false;
try {
const adapter = await navigator.gpu.requestAdapter();
return adapter !== null;
}
catch {
return false;
}
}
/**
* Initialize the WebGPU runtime
*/
async initialize() {
if (this.initialized)
return;
if (!navigator.gpu) {
throw new EdgeFlowError('WebGPU is not supported in this browser', ErrorCodes.RUNTIME_NOT_AVAILABLE);
}
// Request adapter
this.adapter = await navigator.gpu.requestAdapter({
powerPreference: 'high-performance',
});
if (!this.adapter) {
throw new EdgeFlowError('Failed to get WebGPU adapter', ErrorCodes.RUNTIME_INIT_FAILED);
}
// Request device
this.device = await this.adapter.requestDevice({
requiredFeatures: [],
requiredLimits: {},
});
// Handle device loss
this.device.lost.then((info) => {
console.error('WebGPU device was lost:', info.message);
this.initialized = false;
this.device = null;
});
this.initialized = true;
}
/**
* Load a model
*/
async loadModel(modelData, options = {}) {
this.ensureInitialized();
// Parse model data
const config = this.parseModelData(modelData);
// Create shader modules and pipelines
const webgpuData = {
shaders: new Map(),
pipelines: new Map(),
weights: new Map(),
bindGroupLayouts: [],
config,
};
// Extract and upload weights
await this.uploadWeights(modelData, webgpuData);
// Create compute pipelines for each layer
await this.createPipelines(webgpuData);
// Generate model ID
const modelId = `webgpu_${Date.now().toString(36)}`;
this.models.set(modelId, webgpuData);
// Create metadata
const metadata = {
name: config.name || options.metadata?.name || 'unknown',
version: config.version,
inputs: config.inputs.map(i => ({
name: i.name,
dtype: i.dtype,
shape: i.shape,
})),
outputs: config.outputs.map(o => ({
name: o.name,
dtype: o.dtype,
shape: o.shape,
})),
sizeBytes: modelData.byteLength,
quantization: options.quantization ?? 'float32',
format: 'edgeflow',
};
// Create model instance
const model = new LoadedModelImpl(metadata, 'webgpu', () => this.unloadModel(modelId));
// Track in memory manager
getMemoryManager().trackModel(model, () => model.dispose());
return model;
}
/**
* Run inference
*/
async run(model, inputs) {
this.ensureInitialized();
// For now, use a simple fallback implementation
// In a full implementation, this would execute the compute pipelines
return this.executeModel(inputs, model.metadata);
}
/**
* Execute model (simplified implementation)
*/
async executeModel(inputs, metadata) {
// This is a simplified implementation
// A full implementation would:
// 1. Upload input tensors to GPU buffers
// 2. Execute compute pipelines in topological order
// 3. Read back output tensors
const device = this.device;
const outputs = [];
for (const outputSpec of metadata.outputs) {
// Create output buffer
const outputSize = outputSpec.shape.reduce((a, b) => a * b, 1);
const outputBuffer = device.createBuffer({
size: outputSize * 4, // float32
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
});
// Create staging buffer for readback
const stagingBuffer = device.createBuffer({
size: outputSize * 4,
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
});
// For now, return zeros (placeholder)
// In production, execute actual compute pipelines
const outputData = new Float32Array(outputSize);
// Simulate some computation based on inputs
if (inputs.length > 0 && inputs[0]) {
const inputData = inputs[0].toFloat32Array();
for (let i = 0; i < Math.min(outputSize, inputData.length); i++) {
outputData[i] = (inputData[i] ?? 0);
}
}
outputs.push(new EdgeFlowTensor(outputData, outputSpec.shape, 'float32'));
// Cleanup
outputBuffer.destroy();
stagingBuffer.destroy();
}
return outputs;
}
/**
* Parse model data
*/
parseModelData(data) {
// Try to parse as JSON first (for our custom format)
try {
const decoder = new TextDecoder();
const text = decoder.decode(new Uint8Array(data, 0, Math.min(1024, data.byteLength)));
// Check if it starts with JSON
if (text.trim().startsWith('{')) {
// Find the JSON header end
let jsonEnd = text.indexOf('\n---\n');
if (jsonEnd === -1)
jsonEnd = data.byteLength;
const jsonStr = decoder.decode(new Uint8Array(data, 0, jsonEnd));
return JSON.parse(jsonStr);
}
}
catch {
// Not JSON format
}
// Return default config for unknown formats
return {
name: 'unknown',
version: '1.0.0',
layers: [],
inputs: [{ name: 'input', shape: [-1, 768], dtype: 'float32' }],
outputs: [{ name: 'output', shape: [-1, 768], dtype: 'float32' }],
};
}
/**
* Upload weights to GPU
*/
async uploadWeights(_data, modelData) {
const device = this.device;
// In a full implementation, parse weight data from the model file
// and upload to GPU buffers
// Placeholder: create empty weight buffer
const weightsBuffer = device.createBuffer({
size: 1024,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
});
modelData.weights.set('default', weightsBuffer);
}
/**
* Create compute pipelines
*/
async createPipelines(modelData) {
const device = this.device;
// Create a general-purpose compute shader
const shaderCode = /* wgsl */ `
@group(0) @binding(0) var input: array;
@group(0) @binding(1) var output: array;
@compute @workgroup_size(64)
fn main(@builtin(global_invocation_id) gid: vec3) {
let idx = gid.x;
if (idx < arrayLength(&input)) {
output[idx] = input[idx];
}
}
`;
const shaderModule = device.createShaderModule({
code: shaderCode,
});
modelData.shaders.set('default', shaderModule);
// Create bind group layout
const bindGroupLayout = device.createBindGroupLayout({
entries: [
{
binding: 0,
visibility: GPUShaderStage.COMPUTE,
buffer: { type: 'read-only-storage' },
},
{
binding: 1,
visibility: GPUShaderStage.COMPUTE,
buffer: { type: 'storage' },
},
],
});
modelData.bindGroupLayouts.push(bindGroupLayout);
// Create pipeline layout
const pipelineLayout = device.createPipelineLayout({
bindGroupLayouts: [bindGroupLayout],
});
// Create compute pipeline
const pipeline = device.createComputePipeline({
layout: pipelineLayout,
compute: {
module: shaderModule,
entryPoint: 'main',
},
});
modelData.pipelines.set('default', pipeline);
}
/**
* Unload a model
*/
unloadModel(modelId) {
const modelData = this.models.get(modelId);
if (modelData) {
// Destroy GPU buffers
for (const buffer of modelData.weights.values()) {
buffer.destroy();
}
this.models.delete(modelId);
}
}
/**
* Ensure runtime is initialized
*/
ensureInitialized() {
if (!this.initialized || !this.device) {
throw new EdgeFlowError('WebGPU runtime is not initialized', ErrorCodes.RUNTIME_NOT_INITIALIZED);
}
}
/**
* Dispose the runtime
*/
dispose() {
// Unload all models
for (const modelId of this.models.keys()) {
this.unloadModel(modelId);
}
// Destroy device
if (this.device) {
this.device.destroy();
this.device = null;
}
this.adapter = null;
this.initialized = false;
}
}
/**
* Create WebGPU runtime factory
*/
export function createWebGPURuntime() {
return new WebGPURuntime();
}
//# sourceMappingURL=webgpu.js.map
================================================
FILE: dist/backends/webnn.d.ts
================================================
/**
* edgeFlow.js - WebNN Backend
*
* **Status: Planned** - This is a skeleton implementation that initializes
* a WebNN context but does not perform real model inference or graph building.
* For hardware-accelerated inference, use the ONNX Runtime backend which
* supports WebNN via its execution providers when available.
*
* This backend is intended for future native WebNN graph building support.
*/
import { Runtime, RuntimeType, RuntimeCapabilities, LoadedModel, ModelLoadOptions, Tensor } from '../core/types.js';
/**
* WebNN context type
*/
type MLContextType = 'default' | 'gpu' | 'cpu' | 'npu';
/**
* WebNN operand descriptor
*/
interface MLOperandDescriptor {
dataType: 'float32' | 'float16' | 'int32' | 'uint32' | 'int8' | 'uint8';
dimensions: number[];
}
/**
* WebNN context options
*/
interface MLContextOptions {
deviceType?: MLContextType;
powerPreference?: 'default' | 'high-performance' | 'low-power';
}
declare global {
interface Navigator {
ml?: {
createContext(options?: MLContextOptions): Promise;
};
}
interface MLContext {
compute(graph: MLGraph, inputs: Record, outputs: Record): Promise>;
}
interface MLGraph {
}
interface MLGraphBuilder {
input(name: string, desc: MLOperandDescriptor): MLOperand;
constant(desc: MLOperandDescriptor, data: ArrayBufferView): MLOperand;
build(outputs: Record): Promise;
add(a: MLOperand, b: MLOperand): MLOperand;
sub(a: MLOperand, b: MLOperand): MLOperand;
mul(a: MLOperand, b: MLOperand): MLOperand;
div(a: MLOperand, b: MLOperand): MLOperand;
matmul(a: MLOperand, b: MLOperand): MLOperand;
relu(x: MLOperand): MLOperand;
sigmoid(x: MLOperand): MLOperand;
tanh(x: MLOperand): MLOperand;
softmax(x: MLOperand): MLOperand;
reshape(x: MLOperand, newShape: number[]): MLOperand;
transpose(x: MLOperand, permutation?: number[]): MLOperand;
}
interface MLOperand {
}
}
/**
* WebNNRuntime - Browser-native neural network runtime
*/
export declare class WebNNRuntime implements Runtime {
readonly name: RuntimeType;
private context;
private models;
private initialized;
private deviceType;
get capabilities(): RuntimeCapabilities;
/**
* Check if WebNN is available
*/
isAvailable(): Promise;
/**
* Initialize the WebNN runtime
*/
initialize(): Promise;
/**
* Load a model
*/
loadModel(modelData: ArrayBuffer, options?: ModelLoadOptions): Promise;
/**
* Run inference
*/
run(model: LoadedModel, inputs: Tensor[]): Promise;
/**
* Execute model (simplified implementation)
*/
private executeModel;
/**
* Parse model configuration
*/
private parseModelConfig;
/**
* Unload a model
*/
private unloadModel;
/**
* Ensure runtime is initialized
*/
private ensureInitialized;
/**
* Get device type
*/
getDeviceType(): MLContextType;
/**
* Dispose the runtime
*/
dispose(): void;
}
/**
* Create WebNN runtime factory
*/
export declare function createWebNNRuntime(): Runtime;
export {};
//# sourceMappingURL=webnn.d.ts.map
================================================
FILE: dist/backends/webnn.js
================================================
/**
* edgeFlow.js - WebNN Backend
*
* **Status: Planned** - This is a skeleton implementation that initializes
* a WebNN context but does not perform real model inference or graph building.
* For hardware-accelerated inference, use the ONNX Runtime backend which
* supports WebNN via its execution providers when available.
*
* This backend is intended for future native WebNN graph building support.
*/
import { EdgeFlowError, ErrorCodes, } from '../core/types.js';
import { LoadedModelImpl } from '../core/runtime.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { getMemoryManager } from '../core/memory.js';
// ============================================================================
// WebNN Runtime Implementation
// ============================================================================
/**
* WebNNRuntime - Browser-native neural network runtime
*/
export class WebNNRuntime {
name = 'webnn';
context = null;
models = new Map();
initialized = false;
deviceType = 'default';
get capabilities() {
return {
concurrency: true,
quantization: true,
float16: true,
dynamicShapes: false,
maxBatchSize: 32,
availableMemory: 256 * 1024 * 1024, // Estimated
};
}
/**
* Check if WebNN is available
*/
async isAvailable() {
if (typeof navigator === 'undefined')
return false;
if (!navigator.ml)
return false;
try {
const context = await navigator.ml.createContext({ deviceType: 'default' });
return context !== null;
}
catch {
return false;
}
}
/**
* Initialize the WebNN runtime
*/
async initialize() {
if (this.initialized)
return;
if (!navigator.ml) {
throw new EdgeFlowError('WebNN is not supported in this browser', ErrorCodes.RUNTIME_NOT_AVAILABLE);
}
// Try to get GPU context first, fallback to CPU
try {
this.context = await navigator.ml.createContext({
deviceType: 'gpu',
powerPreference: 'high-performance',
});
this.deviceType = 'gpu';
}
catch {
try {
this.context = await navigator.ml.createContext({ deviceType: 'cpu' });
this.deviceType = 'cpu';
}
catch (error) {
throw new EdgeFlowError(`Failed to create WebNN context: ${error instanceof Error ? error.message : String(error)}`, ErrorCodes.RUNTIME_INIT_FAILED);
}
}
this.initialized = true;
}
/**
* Load a model
*/
async loadModel(modelData, options = {}) {
this.ensureInitialized();
// Parse model configuration
const config = this.parseModelConfig(modelData);
// Note: Full WebNN implementation would build the graph here
// This is a placeholder that creates minimal metadata
const modelId = `webnn_${Date.now().toString(36)}`;
// Create metadata
const metadata = {
name: config.name || options.metadata?.name || 'unknown',
version: config.version || '1.0.0',
inputs: config.inputs.map(i => ({
name: i.name,
dtype: i.dtype,
shape: i.shape,
})),
outputs: config.outputs.map(o => ({
name: o.name,
dtype: o.dtype,
shape: o.shape,
})),
sizeBytes: modelData.byteLength,
quantization: options.quantization ?? 'float32',
format: 'edgeflow',
};
// Create model instance
const model = new LoadedModelImpl(metadata, 'webnn', () => this.unloadModel(modelId));
// Track in memory manager
getMemoryManager().trackModel(model, () => model.dispose());
return model;
}
/**
* Run inference
*/
async run(model, inputs) {
this.ensureInitialized();
// Simplified implementation - in production, would use compiled graph
return this.executeModel(inputs, model.metadata);
}
/**
* Execute model (simplified implementation)
*/
async executeModel(inputs, metadata) {
const outputs = [];
// For each expected output
for (const outputSpec of metadata.outputs) {
const outputSize = outputSpec.shape.reduce((a, b) => a * b, 1);
const outputData = new Float32Array(outputSize);
// Simple passthrough for demo (real impl would use WebNN compute)
if (inputs.length > 0 && inputs[0]) {
const inputData = inputs[0].toFloat32Array();
for (let i = 0; i < Math.min(outputSize, inputData.length); i++) {
outputData[i] = inputData[i] ?? 0;
}
}
outputs.push(new EdgeFlowTensor(outputData, outputSpec.shape, 'float32'));
}
return outputs;
}
/**
* Parse model configuration
*/
parseModelConfig(data) {
try {
const decoder = new TextDecoder();
const text = decoder.decode(new Uint8Array(data, 0, Math.min(1024, data.byteLength)));
if (text.trim().startsWith('{')) {
let jsonEnd = text.indexOf('\n---\n');
if (jsonEnd === -1)
jsonEnd = data.byteLength;
const jsonStr = decoder.decode(new Uint8Array(data, 0, jsonEnd));
return JSON.parse(jsonStr);
}
}
catch {
// Not JSON format
}
return {
name: 'unknown',
version: '1.0.0',
inputs: [{ name: 'input', shape: [-1, 768], dtype: 'float32' }],
outputs: [{ name: 'output', shape: [-1, 768], dtype: 'float32' }],
};
}
/**
* Unload a model
*/
unloadModel(modelId) {
this.models.delete(modelId);
}
/**
* Ensure runtime is initialized
*/
ensureInitialized() {
if (!this.initialized || !this.context) {
throw new EdgeFlowError('WebNN runtime is not initialized', ErrorCodes.RUNTIME_NOT_INITIALIZED);
}
}
/**
* Get device type
*/
getDeviceType() {
return this.deviceType;
}
/**
* Dispose the runtime
*/
dispose() {
this.models.clear();
this.context = null;
this.initialized = false;
}
}
/**
* Create WebNN runtime factory
*/
export function createWebNNRuntime() {
return new WebNNRuntime();
}
//# sourceMappingURL=webnn.js.map
================================================
FILE: dist/core/composer.d.ts
================================================
/**
* edgeFlow.js - Pipeline Composer
*
* Chain multiple pipelines together to build complex multi-model workflows.
* Each stage's output is transformed and fed as input to the next stage.
*
* @example
* ```typescript
* import { compose } from 'edgeflowjs';
*
* const speechTranslator = compose([
* { task: 'automatic-speech-recognition' },
* { task: 'translation', options: { srcLang: 'en', tgtLang: 'zh' } },
* ]);
*
* const result = await speechTranslator.run(audioBlob);
* // result.stages = [asrResult, translationResult]
* // result.output = final translation text
* ```
*/
import { type PipelineFactoryOptions } from '../pipelines/index.js';
import type { PipelineTask } from './types.js';
/**
* A single stage in a composed pipeline.
*/
export interface CompositionStage {
/** The pipeline task to run */
task: PipelineTask | (string & {});
/** Model override for this stage */
model?: string;
/** Extra options forwarded to `pipeline()` */
options?: PipelineFactoryOptions;
/**
* Optional transform applied to the previous stage's output before it is
* passed as input to this stage. If omitted, the raw output is forwarded.
*/
transform?: (previousOutput: unknown) => unknown;
/**
* Options forwarded to the pipeline's `run()` call.
*/
runOptions?: Record;
}
/**
* Result from running a composed pipeline.
*/
export interface CompositionResult {
/** The final output from the last stage */
output: unknown;
/** Intermediate results for every stage (index-aligned with stages) */
stages: unknown[];
/** Total wall-clock time in milliseconds */
totalTime: number;
/** Per-stage timing */
stageTimes: number[];
}
/**
* A composed (chained) pipeline.
*/
export interface ComposedPipeline {
/** Execute the full chain with the given initial input */
run(input: unknown): Promise;
/** Dispose all underlying pipeline instances */
dispose(): void;
/** Number of stages */
readonly length: number;
}
/**
* Compose multiple pipeline stages into a single sequential chain.
*
* The output of each stage is fed as the input to the next stage. Use the
* optional `transform` hook in a stage to reshape data between stages.
*
* All pipelines are lazily initialised on the first `run()` call and cached
* for subsequent calls.
*
* @param stages - Ordered list of pipeline stages
* @returns A composed pipeline that can be run end-to-end
*
* @example
* ```typescript
* const ocrPipeline = compose([
* { task: 'image-to-text' },
* {
* task: 'text-classification',
* transform: (ocrResult: any) => ocrResult.text,
* },
* ]);
*
* const { output, stages, totalTime } = await ocrPipeline.run(imageElement);
* ```
*/
export declare function compose(stages: CompositionStage[]): ComposedPipeline;
/**
* Run stages in parallel (fan-out) and collect all results.
*
* Unlike `compose` (which is sequential), `parallel` runs every stage
* independently with the same input and returns an array of results.
*
* @example
* ```typescript
* const analyzer = parallel([
* { task: 'text-classification' },
* { task: 'feature-extraction' },
* { task: 'zero-shot-classification',
* transform: (text) => ({ text, candidateLabels: ['news', 'sports'] }) },
* ]);
*
* const results = await analyzer.run('Breaking: team wins championship');
* ```
*/
export declare function parallel(stages: CompositionStage[]): {
run(input: unknown): Promise<{
outputs: unknown[];
totalTime: number;
}>;
dispose(): void;
};
//# sourceMappingURL=composer.d.ts.map
================================================
FILE: dist/core/composer.js
================================================
/**
* edgeFlow.js - Pipeline Composer
*
* Chain multiple pipelines together to build complex multi-model workflows.
* Each stage's output is transformed and fed as input to the next stage.
*
* @example
* ```typescript
* import { compose } from 'edgeflowjs';
*
* const speechTranslator = compose([
* { task: 'automatic-speech-recognition' },
* { task: 'translation', options: { srcLang: 'en', tgtLang: 'zh' } },
* ]);
*
* const result = await speechTranslator.run(audioBlob);
* // result.stages = [asrResult, translationResult]
* // result.output = final translation text
* ```
*/
import { pipeline } from '../pipelines/index.js';
// ---------------------------------------------------------------------------
// Implementation
// ---------------------------------------------------------------------------
/**
* Compose multiple pipeline stages into a single sequential chain.
*
* The output of each stage is fed as the input to the next stage. Use the
* optional `transform` hook in a stage to reshape data between stages.
*
* All pipelines are lazily initialised on the first `run()` call and cached
* for subsequent calls.
*
* @param stages - Ordered list of pipeline stages
* @returns A composed pipeline that can be run end-to-end
*
* @example
* ```typescript
* const ocrPipeline = compose([
* { task: 'image-to-text' },
* {
* task: 'text-classification',
* transform: (ocrResult: any) => ocrResult.text,
* },
* ]);
*
* const { output, stages, totalTime } = await ocrPipeline.run(imageElement);
* ```
*/
export function compose(stages) {
if (stages.length === 0) {
throw new Error('[edgeFlow.js] compose() requires at least one stage');
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let pipelineInstances = null;
async function ensureInitialised() {
if (pipelineInstances)
return pipelineInstances;
pipelineInstances = await Promise.all(stages.map((stage) => pipeline(stage.task, {
model: stage.model,
...stage.options,
})));
return pipelineInstances;
}
return {
get length() {
return stages.length;
},
async run(input) {
const instances = await ensureInitialised();
const stageResults = [];
const stageTimes = [];
let current = input;
const wallStart = performance.now();
for (let i = 0; i < stages.length; i++) {
const stage = stages[i];
const inst = instances[i];
// Apply transform from previous stage output if provided
if (stage.transform) {
current = stage.transform(current);
}
const t0 = performance.now();
// eslint-disable-next-line @typescript-eslint/no-explicit-any
current = await inst.run(current, stage.runOptions);
stageTimes.push(performance.now() - t0);
stageResults.push(current);
}
return {
output: current,
stages: stageResults,
totalTime: performance.now() - wallStart,
stageTimes,
};
},
dispose() {
if (pipelineInstances) {
for (const inst of pipelineInstances) {
if (inst && typeof inst.dispose === 'function') {
inst.dispose();
}
}
pipelineInstances = null;
}
},
};
}
/**
* Run stages in parallel (fan-out) and collect all results.
*
* Unlike `compose` (which is sequential), `parallel` runs every stage
* independently with the same input and returns an array of results.
*
* @example
* ```typescript
* const analyzer = parallel([
* { task: 'text-classification' },
* { task: 'feature-extraction' },
* { task: 'zero-shot-classification',
* transform: (text) => ({ text, candidateLabels: ['news', 'sports'] }) },
* ]);
*
* const results = await analyzer.run('Breaking: team wins championship');
* ```
*/
export function parallel(stages) {
if (stages.length === 0) {
throw new Error('[edgeFlow.js] parallel() requires at least one stage');
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let pipelineInstances = null;
async function ensureInitialised() {
if (pipelineInstances)
return pipelineInstances;
pipelineInstances = await Promise.all(stages.map((s) => pipeline(s.task, {
model: s.model,
...s.options,
})));
return pipelineInstances;
}
return {
async run(input) {
const instances = await ensureInitialised();
const t0 = performance.now();
const outputs = await Promise.all(stages.map((stage, i) => {
const stageInput = stage.transform ? stage.transform(input) : input;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
return instances[i].run(stageInput, stage.runOptions);
}));
return { outputs, totalTime: performance.now() - t0 };
},
dispose() {
if (pipelineInstances) {
for (const inst of pipelineInstances) {
if (inst && typeof inst.dispose === 'function') {
inst.dispose();
}
}
pipelineInstances = null;
}
},
};
}
//# sourceMappingURL=composer.js.map
================================================
FILE: dist/core/device-profiler.d.ts
================================================
/**
* edgeFlow.js - Device Profiler
*
* Automatically profiles the current device and recommends optimal model
* variants (quantization level, batch size, execution provider).
*
* @example
* ```typescript
* import { getDeviceProfile, recommendQuantization } from 'edgeflowjs';
*
* const profile = await getDeviceProfile();
* console.log(profile.tier); // 'high' | 'medium' | 'low'
*
* const quant = recommendQuantization(profile);
* console.log(quant); // 'fp16' | 'int8' | 'int4'
* ```
*/
import type { QuantizationType } from './types.js';
/**
* Device capability tier.
*/
export type DeviceTier = 'high' | 'medium' | 'low';
/**
* Profiled device information.
*/
export interface DeviceProfile {
/** Capability tier */
tier: DeviceTier;
/** Number of logical CPU cores */
cores: number;
/** Device memory in GiB (navigator.deviceMemory, may be null) */
memoryGiB: number | null;
/** Whether WebGPU is available */
webgpu: boolean;
/** Whether WebNN is available */
webnn: boolean;
/** Recommended max batch size */
recommendedBatchSize: number;
/** Recommended concurrency limit */
recommendedConcurrency: number;
/** Whether the device is mobile */
mobile: boolean;
/** Raw GPU adapter info (if WebGPU available) */
gpuInfo?: string;
}
/**
* Model variant recommendation.
*/
export interface ModelRecommendation {
/** Recommended quantization */
quantization: QuantizationType;
/** Recommended execution provider */
executionProvider: 'webgpu' | 'wasm';
/** Recommended batch size */
batchSize: number;
/** Whether to enable worker-based inference */
useWorker: boolean;
}
/**
* Profile the current device. Results are cached after the first call.
*/
export declare function getDeviceProfile(): Promise;
/**
* Recommend the best quantization level for the current device.
*/
export declare function recommendQuantization(profile: DeviceProfile): QuantizationType;
/**
* Get full model variant recommendations for the current device.
*/
export declare function recommendModelVariant(): Promise;
/**
* Reset the cached profile (useful for testing).
*/
export declare function resetDeviceProfile(): void;
//# sourceMappingURL=device-profiler.d.ts.map
================================================
FILE: dist/core/device-profiler.js
================================================
/**
* edgeFlow.js - Device Profiler
*
* Automatically profiles the current device and recommends optimal model
* variants (quantization level, batch size, execution provider).
*
* @example
* ```typescript
* import { getDeviceProfile, recommendQuantization } from 'edgeflowjs';
*
* const profile = await getDeviceProfile();
* console.log(profile.tier); // 'high' | 'medium' | 'low'
*
* const quant = recommendQuantization(profile);
* console.log(quant); // 'fp16' | 'int8' | 'int4'
* ```
*/
// ---------------------------------------------------------------------------
// Profiling
// ---------------------------------------------------------------------------
let cachedProfile = null;
/**
* Profile the current device. Results are cached after the first call.
*/
export async function getDeviceProfile() {
if (cachedProfile)
return cachedProfile;
const cores = typeof navigator !== 'undefined'
? navigator.hardwareConcurrency ?? 2
: 2;
const memoryGiB = typeof navigator !== 'undefined' && 'deviceMemory' in navigator
? navigator.deviceMemory ?? null
: null;
const mobile = typeof navigator !== 'undefined'
? /Android|iPhone|iPad|iPod|Mobile/i.test(navigator.userAgent)
: false;
let webgpu = false;
let gpuInfo;
if (typeof navigator !== 'undefined' && 'gpu' in navigator) {
try {
const adapter = await navigator.gpu.requestAdapter();
webgpu = adapter != null;
if (adapter && typeof adapter === 'object') {
try {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const info = adapter['info'];
if (info) {
gpuInfo = `${info['vendor'] ?? ''} ${info['architecture'] ?? ''}`.trim() || undefined;
}
}
catch {
// info not available
}
}
}
catch {
// WebGPU not available
}
}
let webnn = false;
if (typeof navigator !== 'undefined' && 'ml' in navigator) {
try {
const ml = navigator.ml;
if (ml) {
const ctx = await ml.createContext();
webnn = ctx != null;
}
}
catch {
// WebNN not available
}
}
// Determine tier
let tier;
if (webgpu && cores >= 8 && (memoryGiB === null || memoryGiB >= 8)) {
tier = 'high';
}
else if (cores >= 4 && (memoryGiB === null || memoryGiB >= 4)) {
tier = 'medium';
}
else {
tier = 'low';
}
// Mobile devices get capped even if specs look good
if (mobile && tier === 'high') {
tier = 'medium';
}
const recommendedBatchSize = tier === 'high' ? 32 : tier === 'medium' ? 8 : 1;
const recommendedConcurrency = tier === 'high' ? 4 : tier === 'medium' ? 2 : 1;
cachedProfile = {
tier,
cores,
memoryGiB,
webgpu,
webnn,
recommendedBatchSize,
recommendedConcurrency,
mobile,
gpuInfo,
};
return cachedProfile;
}
/**
* Recommend the best quantization level for the current device.
*/
export function recommendQuantization(profile) {
if (profile.tier === 'high' && profile.webgpu)
return 'float16';
if (profile.tier === 'medium')
return 'int8';
return 'int8'; // low-tier: most aggressive
}
/**
* Get full model variant recommendations for the current device.
*/
export async function recommendModelVariant() {
const profile = await getDeviceProfile();
return {
quantization: recommendQuantization(profile),
executionProvider: profile.webgpu ? 'webgpu' : 'wasm',
batchSize: profile.recommendedBatchSize,
useWorker: profile.cores >= 4,
};
}
/**
* Reset the cached profile (useful for testing).
*/
export function resetDeviceProfile() {
cachedProfile = null;
}
//# sourceMappingURL=device-profiler.js.map
================================================
FILE: dist/core/index.d.ts
================================================
/**
* edgeFlow.js - Core Module Exports
*/
export * from './types.js';
export { EdgeFlowTensor, tensor, zeros, ones, full, random, randn, arange, linspace, eye, add, sub, mul, div, matmul, softmax, relu, sigmoid, tanh, sum, mean, argmax, concat, } from './tensor.js';
export { InferenceScheduler, getScheduler, setScheduler, configureScheduler, } from './scheduler.js';
export { MemoryManager, MemoryScope, ModelCache, withMemoryScope, withMemoryScopeSync, getMemoryManager, getMemoryStats, release, gc, } from './memory.js';
export { RuntimeManager, LoadedModelImpl, loadModel, loadModelFromBuffer, runInference, runBatchInference, getRuntimeManager, registerRuntime, getBestRuntime, getAvailableRuntimes, } from './runtime.js';
export { registerPlugin, getPluginPipeline, getPluginMiddleware, listPlugins, unregisterPlugin, type EdgeFlowPlugin, type PluginPipelineEntry, type PluginBackendEntry, type PluginMiddleware, } from './plugin.js';
export { getDeviceProfile, recommendQuantization, recommendModelVariant, resetDeviceProfile, type DeviceProfile, type DeviceTier, type ModelRecommendation, } from './device-profiler.js';
export { compose, parallel, type CompositionStage, type CompositionResult, type ComposedPipeline, } from './composer.js';
export { InferenceWorker, WorkerPool, getWorkerPool, runInWorker, isWorkerSupported, serializeTensor, deserializeTensor, type WorkerMessage, type WorkerMessageType, type LoadModelRequest, type InferenceRequest, type SerializedTensor, type WorkerPoolOptions, } from './worker.js';
//# sourceMappingURL=index.d.ts.map
================================================
FILE: dist/core/index.js
================================================
/**
* edgeFlow.js - Core Module Exports
*/
// Types
export * from './types.js';
// Tensor
export { EdgeFlowTensor, tensor, zeros, ones, full, random, randn, arange, linspace, eye, add, sub, mul, div, matmul, softmax, relu, sigmoid, tanh, sum, mean, argmax, concat, } from './tensor.js';
// Scheduler
export { InferenceScheduler, getScheduler, setScheduler, configureScheduler, } from './scheduler.js';
// Memory
export { MemoryManager, MemoryScope, ModelCache, withMemoryScope, withMemoryScopeSync, getMemoryManager, getMemoryStats, release, gc, } from './memory.js';
// Runtime
export { RuntimeManager, LoadedModelImpl, loadModel, loadModelFromBuffer, runInference, runBatchInference, getRuntimeManager, registerRuntime, getBestRuntime, getAvailableRuntimes, } from './runtime.js';
// Plugin System
export { registerPlugin, getPluginPipeline, getPluginMiddleware, listPlugins, unregisterPlugin, } from './plugin.js';
// Device Profiler
export { getDeviceProfile, recommendQuantization, recommendModelVariant, resetDeviceProfile, } from './device-profiler.js';
// Composer
export { compose, parallel, } from './composer.js';
// Worker
export { InferenceWorker, WorkerPool, getWorkerPool, runInWorker, isWorkerSupported, serializeTensor, deserializeTensor, } from './worker.js';
//# sourceMappingURL=index.js.map
================================================
FILE: dist/core/memory.d.ts
================================================
/**
* edgeFlow.js - Memory Management
*
* Efficient memory management for tensors and models.
* Features:
* - Memory pooling
* - Automatic garbage collection
* - Memory tracking and statistics
* - Leak detection
*/
import { Tensor, LoadedModel, MemoryStats, MemoryPoolConfig, EventType, EventListener } from './types.js';
/**
* Tracked resource info
*/
interface TrackedResource {
id: string;
type: 'tensor' | 'model';
size: number;
createdAt: number;
stackTrace?: string;
}
/**
* MemoryManager - Central memory management
*
* Provides:
* - Resource tracking
* - Memory statistics
* - Garbage collection coordination
* - Memory warning events
*/
export declare class MemoryManager {
private static instance;
private readonly config;
private readonly resources;
private readonly disposers;
private readonly listeners;
private allocated;
private peak;
private gcScheduled;
private disposed;
private constructor();
/**
* Get singleton instance
*/
static getInstance(): MemoryManager;
/**
* Configure the memory manager
*/
static configure(config: MemoryPoolConfig): void;
/**
* Track a tensor
*/
track(tensor: Tensor, disposer?: () => void): void;
/**
* Track a model
*/
trackModel(model: LoadedModel, disposer?: () => void): void;
/**
* Untrack a resource
*/
untrack(id: string): void;
/**
* Release a resource
*/
release(resourceOrId: Tensor | LoadedModel | string): void;
/**
* Estimate tensor memory size
*/
private estimateTensorSize;
/**
* Get bytes per element for a data type
*/
private getBytesPerElement;
/**
* Capture stack trace for debugging
*/
private captureStackTrace;
/**
* Check if memory threshold is exceeded
*/
private checkMemoryThreshold;
/**
* Garbage collection helper.
*
* Identifies stale resources and optionally evicts them.
* @param evict - If true, actually dispose stale resources (default: false)
* @param maxAge - Resources older than this (ms) are considered stale (default: 5 min)
*/
gc(evict?: boolean, maxAge?: number): void;
/**
* Query actual browser memory usage via performance.measureUserAgentSpecificMemory()
* (Chrome 89+, requires cross-origin isolation). Returns null if unavailable.
*/
measureBrowserMemory(): Promise<{
bytes: number;
breakdown: Array<{
bytes: number;
types: string[];
}>;
} | null>;
/**
* Get the device's total memory hint (navigator.deviceMemory).
* Returns null if unavailable. Value is in GiB, rounded (e.g. 4, 8).
*/
getDeviceMemory(): number | null;
/**
* Get memory statistics
*/
getStats(): MemoryStats;
/**
* Get detailed resource list (for debugging)
*/
getResourceDetails(): TrackedResource[];
/**
* Check for potential memory leaks
*/
detectLeaks(maxAge?: number): TrackedResource[];
/**
* Add event listener
*/
on(event: EventType, listener: EventListener): void;
/**
* Remove event listener
*/
off(event: EventType, listener: EventListener): void;
/**
* Emit event
*/
private emit;
/**
* Reset statistics
*/
resetStats(): void;
/**
* Dispose all resources
*/
disposeAll(): void;
/**
* Dispose the manager
*/
dispose(): void;
}
/**
* Memory scope for automatic resource cleanup
*
* Usage:
* ```typescript
* const result = await withMemoryScope(async (scope) => {
* const tensor1 = scope.track(createTensor(...));
* const tensor2 = scope.track(createTensor(...));
* // Process tensors
* return computeResult(tensor1, tensor2);
* });
* // tensor1 and tensor2 are automatically disposed
* ```
*/
export declare class MemoryScope {
private resources;
private children;
private parent;
constructor(parent?: MemoryScope);
/**
* Track a resource in this scope
*/
track void;
}>(resource: T): T;
/**
* Create a child scope
*/
createChild(): MemoryScope;
/**
* Keep a resource (don't dispose it when scope ends)
*/
keep void;
}>(resource: T): T;
/**
* Dispose all resources in this scope
*/
dispose(): void;
}
/**
* Execute a function with automatic memory cleanup
*/
export declare function withMemoryScope(fn: (scope: MemoryScope) => Promise): Promise;
/**
* Synchronous version of withMemoryScope
*/
export declare function withMemoryScopeSync(fn: (scope: MemoryScope) => T): T;
/**
* LRU Cache for loaded models
*/
export declare class ModelCache {
private readonly maxSize;
private readonly maxModels;
private readonly cache;
private currentSize;
constructor(options?: {
maxSize?: number;
maxModels?: number;
});
/**
* Get a model from cache
*/
get(key: string): LoadedModel | undefined;
/**
* Add a model to cache
*/
set(key: string, model: LoadedModel): void;
/**
* Remove a model from cache
*/
delete(key: string): boolean;
/**
* Check if model is in cache
*/
has(key: string): boolean;
/**
* Evict least recently used model
*/
private evictLRU;
/**
* Clear the cache
*/
clear(): void;
/**
* Get cache statistics
*/
getStats(): {
size: number;
count: number;
maxSize: number;
maxModels: number;
};
}
/**
* Get memory manager instance
*/
export declare function getMemoryManager(): MemoryManager;
/**
* Get memory statistics
*/
export declare function getMemoryStats(): MemoryStats;
/**
* Release a resource
*/
export declare function release(resource: Tensor | LoadedModel): void;
/**
* Force garbage collection hint
*/
export declare function gc(): void;
export {};
//# sourceMappingURL=memory.d.ts.map
================================================
FILE: dist/core/memory.js
================================================
/**
* edgeFlow.js - Memory Management
*
* Efficient memory management for tensors and models.
* Features:
* - Memory pooling
* - Automatic garbage collection
* - Memory tracking and statistics
* - Leak detection
*/
/**
* Default memory pool configuration
*/
const DEFAULT_POOL_CONFIG = {
initialSize: 64 * 1024 * 1024, // 64MB
maxSize: 512 * 1024 * 1024, // 512MB
growthFactor: 1.5,
autoGC: true,
gcThreshold: 0.8, // 80%
};
// ============================================================================
// Memory Manager
// ============================================================================
/**
* MemoryManager - Central memory management
*
* Provides:
* - Resource tracking
* - Memory statistics
* - Garbage collection coordination
* - Memory warning events
*/
export class MemoryManager {
static instance = null;
config;
resources = new Map();
disposers = new Map();
listeners = new Map();
allocated = 0;
peak = 0;
gcScheduled = false;
disposed = false;
constructor(config = {}) {
this.config = { ...DEFAULT_POOL_CONFIG, ...config };
}
/**
* Get singleton instance
*/
static getInstance() {
if (!MemoryManager.instance) {
MemoryManager.instance = new MemoryManager();
}
return MemoryManager.instance;
}
/**
* Configure the memory manager
*/
static configure(config) {
if (MemoryManager.instance) {
console.warn('MemoryManager already initialized, configuration may not apply');
}
MemoryManager.instance = new MemoryManager(config);
}
/**
* Track a tensor
*/
track(tensor, disposer) {
if (this.disposed)
return;
const size = this.estimateTensorSize(tensor);
this.resources.set(tensor.id, {
id: tensor.id,
type: 'tensor',
size,
createdAt: Date.now(),
stackTrace: this.captureStackTrace(),
});
if (disposer) {
this.disposers.set(tensor.id, disposer);
}
this.allocated += size;
this.peak = Math.max(this.peak, this.allocated);
this.checkMemoryThreshold();
}
/**
* Track a model
*/
trackModel(model, disposer) {
if (this.disposed)
return;
const size = model.metadata.sizeBytes;
this.resources.set(model.id, {
id: model.id,
type: 'model',
size,
createdAt: Date.now(),
stackTrace: this.captureStackTrace(),
});
if (disposer) {
this.disposers.set(model.id, disposer);
}
this.allocated += size;
this.peak = Math.max(this.peak, this.allocated);
this.checkMemoryThreshold();
}
/**
* Untrack a resource
*/
untrack(id) {
const resource = this.resources.get(id);
if (resource) {
this.allocated -= resource.size;
this.resources.delete(id);
this.disposers.delete(id);
}
}
/**
* Release a resource
*/
release(resourceOrId) {
const id = typeof resourceOrId === 'string' ? resourceOrId : resourceOrId.id;
const disposer = this.disposers.get(id);
if (disposer) {
try {
disposer();
}
catch (error) {
console.error('Error disposing resource:', error);
}
}
this.untrack(id);
}
/**
* Estimate tensor memory size
*/
estimateTensorSize(tensor) {
const bytesPerElement = this.getBytesPerElement(tensor.dtype);
return tensor.size * bytesPerElement;
}
/**
* Get bytes per element for a data type
*/
getBytesPerElement(dtype) {
switch (dtype) {
case 'float32':
return 4;
case 'float16':
return 2;
case 'int32':
return 4;
case 'int64':
return 8;
case 'uint8':
case 'int8':
case 'bool':
return 1;
default:
return 4;
}
}
/**
* Capture stack trace for debugging
*/
captureStackTrace() {
if (typeof Error.captureStackTrace === 'function') {
const obj = {};
Error.captureStackTrace(obj, this.captureStackTrace);
return obj.stack;
}
return new Error().stack;
}
/**
* Check if memory threshold is exceeded
*/
checkMemoryThreshold() {
if (!this.config.autoGC)
return;
const usage = this.allocated / this.config.maxSize;
if (usage >= this.config.gcThreshold && !this.gcScheduled) {
this.gcScheduled = true;
this.emit('memory:warning', {
allocated: this.allocated,
maxSize: this.config.maxSize,
usage,
});
// Schedule GC on next tick
setTimeout(() => {
this.gc();
this.gcScheduled = false;
}, 0);
}
}
/**
* Garbage collection helper.
*
* Identifies stale resources and optionally evicts them.
* @param evict - If true, actually dispose stale resources (default: false)
* @param maxAge - Resources older than this (ms) are considered stale (default: 5 min)
*/
gc(evict = false, maxAge = 5 * 60 * 1000) {
this.emit('memory:gc', { before: this.allocated });
const now = Date.now();
const staleIds = [];
for (const [id, resource] of this.resources) {
if (now - resource.createdAt > maxAge) {
staleIds.push(id);
}
}
if (evict) {
for (const id of staleIds) {
this.release(id);
}
}
this.emit('memory:gc', {
after: this.allocated,
evicted: evict ? staleIds.length : 0,
potentialCleanup: staleIds.length,
});
}
/**
* Query actual browser memory usage via performance.measureUserAgentSpecificMemory()
* (Chrome 89+, requires cross-origin isolation). Returns null if unavailable.
*/
async measureBrowserMemory() {
try {
if (typeof performance !== 'undefined' &&
'measureUserAgentSpecificMemory' in performance) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const result = await performance.measureUserAgentSpecificMemory();
return result;
}
}
catch {
// Not available or not cross-origin isolated
}
return null;
}
/**
* Get the device's total memory hint (navigator.deviceMemory).
* Returns null if unavailable. Value is in GiB, rounded (e.g. 4, 8).
*/
getDeviceMemory() {
try {
if (typeof navigator !== 'undefined' && 'deviceMemory' in navigator) {
return navigator.deviceMemory ?? null;
}
}
catch {
// Not available
}
return null;
}
/**
* Get memory statistics
*/
getStats() {
let tensorCount = 0;
let modelCount = 0;
for (const resource of this.resources.values()) {
if (resource.type === 'tensor') {
tensorCount++;
}
else {
modelCount++;
}
}
return {
allocated: this.allocated,
used: this.allocated, // In JS, allocated = used
peak: this.peak,
tensorCount,
modelCount,
};
}
/**
* Get detailed resource list (for debugging)
*/
getResourceDetails() {
return Array.from(this.resources.values());
}
/**
* Check for potential memory leaks
*/
detectLeaks(maxAge = 10 * 60 * 1000) {
const now = Date.now();
const potentialLeaks = [];
for (const resource of this.resources.values()) {
if (now - resource.createdAt > maxAge) {
potentialLeaks.push(resource);
}
}
return potentialLeaks;
}
/**
* Add event listener
*/
on(event, listener) {
let listeners = this.listeners.get(event);
if (!listeners) {
listeners = new Set();
this.listeners.set(event, listeners);
}
listeners.add(listener);
}
/**
* Remove event listener
*/
off(event, listener) {
const listeners = this.listeners.get(event);
if (listeners) {
listeners.delete(listener);
}
}
/**
* Emit event
*/
emit(type, data) {
const event = {
type,
timestamp: Date.now(),
data,
};
const listeners = this.listeners.get(type);
if (listeners) {
for (const listener of listeners) {
try {
listener(event);
}
catch (error) {
console.error('Error in event listener:', error);
}
}
}
}
/**
* Reset statistics
*/
resetStats() {
this.peak = this.allocated;
}
/**
* Dispose all resources
*/
disposeAll() {
for (const id of this.resources.keys()) {
this.release(id);
}
}
/**
* Dispose the manager
*/
dispose() {
this.disposeAll();
this.disposed = true;
this.listeners.clear();
MemoryManager.instance = null;
}
}
// ============================================================================
// Memory Scope (RAII-like pattern)
// ============================================================================
/**
* Memory scope for automatic resource cleanup
*
* Usage:
* ```typescript
* const result = await withMemoryScope(async (scope) => {
* const tensor1 = scope.track(createTensor(...));
* const tensor2 = scope.track(createTensor(...));
* // Process tensors
* return computeResult(tensor1, tensor2);
* });
* // tensor1 and tensor2 are automatically disposed
* ```
*/
export class MemoryScope {
resources = [];
children = [];
parent = null;
constructor(parent) {
if (parent) {
this.parent = parent;
parent.children.push(this);
}
}
/**
* Track a resource in this scope
*/
track(resource) {
this.resources.push(resource);
return resource;
}
/**
* Create a child scope
*/
createChild() {
return new MemoryScope(this);
}
/**
* Keep a resource (don't dispose it when scope ends)
*/
keep(resource) {
const index = this.resources.indexOf(resource);
if (index !== -1) {
this.resources.splice(index, 1);
}
return resource;
}
/**
* Dispose all resources in this scope
*/
dispose() {
// Dispose children first
for (const child of this.children) {
child.dispose();
}
this.children = [];
// Dispose resources in reverse order
for (let i = this.resources.length - 1; i >= 0; i--) {
try {
this.resources[i]?.dispose();
}
catch (error) {
console.error('Error disposing resource in scope:', error);
}
}
this.resources = [];
// Remove from parent
if (this.parent) {
const index = this.parent.children.indexOf(this);
if (index !== -1) {
this.parent.children.splice(index, 1);
}
this.parent = null;
}
}
}
/**
* Execute a function with automatic memory cleanup
*/
export async function withMemoryScope(fn) {
const scope = new MemoryScope();
try {
return await fn(scope);
}
finally {
scope.dispose();
}
}
/**
* Synchronous version of withMemoryScope
*/
export function withMemoryScopeSync(fn) {
const scope = new MemoryScope();
try {
return fn(scope);
}
finally {
scope.dispose();
}
}
// ============================================================================
// LRU Cache for Models
// ============================================================================
/**
* LRU Cache for loaded models
*/
export class ModelCache {
maxSize;
maxModels;
cache = new Map();
currentSize = 0;
constructor(options = {}) {
this.maxSize = options.maxSize ?? 256 * 1024 * 1024; // 256MB default
this.maxModels = options.maxModels ?? 5;
}
/**
* Get a model from cache
*/
get(key) {
const entry = this.cache.get(key);
if (entry) {
entry.lastAccess = Date.now();
return entry.model;
}
return undefined;
}
/**
* Add a model to cache
*/
set(key, model) {
const size = model.metadata.sizeBytes;
// Check if we need to evict
while ((this.currentSize + size > this.maxSize || this.cache.size >= this.maxModels) &&
this.cache.size > 0) {
this.evictLRU();
}
// Add to cache
this.cache.set(key, {
model,
size,
lastAccess: Date.now(),
});
this.currentSize += size;
}
/**
* Remove a model from cache
*/
delete(key) {
const entry = this.cache.get(key);
if (entry) {
entry.model.dispose();
this.currentSize -= entry.size;
this.cache.delete(key);
return true;
}
return false;
}
/**
* Check if model is in cache
*/
has(key) {
return this.cache.has(key);
}
/**
* Evict least recently used model
*/
evictLRU() {
let oldestKey = null;
let oldestTime = Infinity;
for (const [key, entry] of this.cache) {
if (entry.lastAccess < oldestTime) {
oldestTime = entry.lastAccess;
oldestKey = key;
}
}
if (oldestKey) {
this.delete(oldestKey);
}
}
/**
* Clear the cache
*/
clear() {
for (const entry of this.cache.values()) {
entry.model.dispose();
}
this.cache.clear();
this.currentSize = 0;
}
/**
* Get cache statistics
*/
getStats() {
return {
size: this.currentSize,
count: this.cache.size,
maxSize: this.maxSize,
maxModels: this.maxModels,
};
}
}
// ============================================================================
// Convenience Functions
// ============================================================================
/**
* Get memory manager instance
*/
export function getMemoryManager() {
return MemoryManager.getInstance();
}
/**
* Get memory statistics
*/
export function getMemoryStats() {
return MemoryManager.getInstance().getStats();
}
/**
* Release a resource
*/
export function release(resource) {
MemoryManager.getInstance().release(resource);
}
/**
* Force garbage collection hint
*/
export function gc() {
MemoryManager.getInstance().gc();
}
//# sourceMappingURL=memory.js.map
================================================
FILE: dist/core/plugin.d.ts
================================================
/**
* edgeFlow.js - Plugin System
*
* Register custom pipelines, backends, and middleware via plugins.
*
* @example
* ```typescript
* import { registerPlugin } from 'edgeflowjs';
*
* registerPlugin({
* name: 'edgeflow-plugin-whisper',
* version: '1.0.0',
* pipelines: {
* 'whisper-transcribe': {
* factory: (config) => new WhisperPipeline(config),
* },
* },
* });
*
* // Now available via pipeline('whisper-transcribe')
* ```
*/
import type { PipelineConfig, Runtime } from './types.js';
/**
* A pipeline factory registered by a plugin.
*/
export interface PluginPipelineEntry {
/** Factory that creates a pipeline instance */
factory: (config: PipelineConfig) => any;
/** Optional description */
description?: string;
}
/**
* A backend registered by a plugin.
*/
export interface PluginBackendEntry {
/** Factory that creates a runtime instance */
factory: () => Runtime;
/** Optional description */
description?: string;
}
/**
* Middleware that runs before/after inference.
*/
export interface PluginMiddleware {
/** Unique name */
name: string;
/** Called before inference with (model, inputs). Return modified inputs. */
before?: (ctx: {
modelId: string;
inputs: any;
}) => any | Promise;
/** Called after inference with (model, outputs). Return modified outputs. */
after?: (ctx: {
modelId: string;
outputs: any;
}) => any | Promise;
}
/**
* Plugin definition.
*/
export interface EdgeFlowPlugin {
/** Unique plugin name (e.g. 'edgeflow-plugin-whisper') */
name: string;
/** Plugin version (semver) */
version: string;
/** Pipelines contributed by this plugin */
pipelines?: Record;
/** Backends contributed by this plugin */
backends?: Record;
/** Middleware contributed by this plugin */
middleware?: PluginMiddleware[];
/** Called once when the plugin is registered */
setup?: () => void | Promise;
}
/**
* Register a plugin. Pipelines and backends are made available immediately.
*/
export declare function registerPlugin(plugin: EdgeFlowPlugin): Promise;
/**
* Look up a pipeline factory registered by any plugin.
* Returns undefined if no plugin provides this task.
*/
export declare function getPluginPipeline(task: string): PluginPipelineEntry | undefined;
/**
* Get all registered middleware.
*/
export declare function getPluginMiddleware(): ReadonlyArray;
/**
* List all registered plugins.
*/
export declare function listPlugins(): Array<{
name: string;
version: string;
}>;
/**
* Unregister a plugin by name.
*/
export declare function unregisterPlugin(name: string): boolean;
//# sourceMappingURL=plugin.d.ts.map
================================================
FILE: dist/core/plugin.js
================================================
/**
* edgeFlow.js - Plugin System
*
* Register custom pipelines, backends, and middleware via plugins.
*
* @example
* ```typescript
* import { registerPlugin } from 'edgeflowjs';
*
* registerPlugin({
* name: 'edgeflow-plugin-whisper',
* version: '1.0.0',
* pipelines: {
* 'whisper-transcribe': {
* factory: (config) => new WhisperPipeline(config),
* },
* },
* });
*
* // Now available via pipeline('whisper-transcribe')
* ```
*/
import { registerRuntime } from './runtime.js';
// ---------------------------------------------------------------------------
// Registry
// ---------------------------------------------------------------------------
const registeredPlugins = new Map();
const pluginPipelines = new Map();
const pluginMiddleware = [];
/**
* Register a plugin. Pipelines and backends are made available immediately.
*/
export async function registerPlugin(plugin) {
if (registeredPlugins.has(plugin.name)) {
console.warn(`[edgeFlow.js] Plugin "${plugin.name}" is already registered — skipping.`);
return;
}
// Run setup hook
if (plugin.setup) {
await plugin.setup();
}
// Register pipelines
if (plugin.pipelines) {
for (const [task, entry] of Object.entries(plugin.pipelines)) {
pluginPipelines.set(task, entry);
}
}
// Register backends
if (plugin.backends) {
for (const [name, entry] of Object.entries(plugin.backends)) {
registerRuntime(name, entry.factory);
}
}
// Register middleware
if (plugin.middleware) {
pluginMiddleware.push(...plugin.middleware);
}
registeredPlugins.set(plugin.name, plugin);
}
/**
* Look up a pipeline factory registered by any plugin.
* Returns undefined if no plugin provides this task.
*/
export function getPluginPipeline(task) {
return pluginPipelines.get(task);
}
/**
* Get all registered middleware.
*/
export function getPluginMiddleware() {
return pluginMiddleware;
}
/**
* List all registered plugins.
*/
export function listPlugins() {
return Array.from(registeredPlugins.values()).map(p => ({
name: p.name,
version: p.version,
}));
}
/**
* Unregister a plugin by name.
*/
export function unregisterPlugin(name) {
const plugin = registeredPlugins.get(name);
if (!plugin)
return false;
// Remove pipelines
if (plugin.pipelines) {
for (const task of Object.keys(plugin.pipelines)) {
pluginPipelines.delete(task);
}
}
// Remove middleware
if (plugin.middleware) {
for (const mw of plugin.middleware) {
const idx = pluginMiddleware.indexOf(mw);
if (idx !== -1)
pluginMiddleware.splice(idx, 1);
}
}
registeredPlugins.delete(name);
return true;
}
//# sourceMappingURL=plugin.js.map
================================================
FILE: dist/core/runtime.d.ts
================================================
/**
* edgeFlow.js - Runtime Management
*
* Manages runtime backends and automatic selection.
* Provides unified interface for different compute backends.
*/
import { Runtime, RuntimeType, RuntimeCapabilities, LoadedModel, ModelLoadOptions, ModelMetadata, Tensor, EventType, EventListener } from './types.js';
/**
* RuntimeManager - Manages runtime selection and lifecycle
*
* Features:
* - Automatic best runtime selection
* - Runtime registration
* - Capability detection
* - Fallback handling
*/
export declare class RuntimeManager {
private static instance;
private readonly listeners;
private defaultRuntime;
private constructor();
/**
* Get singleton instance
*/
static getInstance(): RuntimeManager;
/**
* Register a runtime factory
*/
register(type: RuntimeType, factory: () => Runtime): void;
/**
* Get a runtime instance
*/
getRuntime(type?: RuntimeType): Promise;
/**
* Get the best available runtime
*/
getBestRuntime(): Promise;
/**
* Check which runtimes are available
*/
detectAvailableRuntimes(): Promise>;
/**
* Get capabilities of a runtime
*/
getCapabilities(type: RuntimeType): Promise;
/**
* Set default runtime
*/
setDefaultRuntime(type: RuntimeType): void;
/**
* Get default runtime type
*/
getDefaultRuntimeType(): RuntimeType;
/**
* Dispose a specific runtime
*/
disposeRuntime(type: RuntimeType): void;
/**
* Dispose all runtimes
*/
disposeAll(): void;
/**
* Add event listener
*/
on(event: EventType, listener: EventListener): void;
/**
* Remove event listener
*/
off(event: EventType, listener: EventListener): void;
/**
* Emit event
*/
private emit;
}
/**
* LoadedModelImpl - Implementation of LoadedModel interface
*/
export declare class LoadedModelImpl implements LoadedModel {
readonly id: string;
readonly metadata: ModelMetadata;
readonly runtime: RuntimeType;
private _isLoaded;
private readonly _dispose;
constructor(metadata: ModelMetadata, runtime: RuntimeType, dispose: () => void);
get isLoaded(): boolean;
dispose(): void;
}
/**
* Load model from URL with advanced loading support
* (caching, sharding, resume download)
*/
export declare function loadModel(url: string, options?: ModelLoadOptions & {
runtime?: RuntimeType;
cache?: boolean;
resumable?: boolean;
chunkSize?: number;
forceDownload?: boolean;
}): Promise;
/**
* Load model from ArrayBuffer
*/
export declare function loadModelFromBuffer(data: ArrayBuffer, options?: ModelLoadOptions & {
runtime?: RuntimeType;
}): Promise;
/**
* Run inference on a model
*/
export declare function runInference(model: LoadedModel, inputs: Tensor[]): Promise;
/**
* Run inference with named inputs
*/
export declare function runInferenceNamed(model: LoadedModel, namedInputs: Map): Promise;
/**
* Run inference with batch processing
*/
export declare function runBatchInference(model: LoadedModel, batches: Tensor[][]): Promise;
/**
* Get runtime manager instance
*/
export declare function getRuntimeManager(): RuntimeManager;
/**
* Register a runtime
*/
export declare function registerRuntime(type: RuntimeType, factory: () => Runtime): void;
/**
* Get the best available runtime
*/
export declare function getBestRuntime(): Promise;
/**
* Check available runtimes
*/
export declare function getAvailableRuntimes(): Promise>;
//# sourceMappingURL=runtime.d.ts.map
================================================
FILE: dist/core/runtime.js
================================================
/**
* edgeFlow.js - Runtime Management
*
* Manages runtime backends and automatic selection.
* Provides unified interface for different compute backends.
*/
import { EdgeFlowError, ErrorCodes, } from './types.js';
import { getScheduler } from './scheduler.js';
import { getMemoryManager } from './memory.js';
// ============================================================================
// Runtime Registry
// ============================================================================
/**
* Registered runtime factories
*/
const runtimeFactories = new Map();
/**
* Cached runtime instances
*/
const runtimeInstances = new Map();
/**
* Runtime priority order (higher priority first)
*/
const RUNTIME_PRIORITY = ['webgpu', 'webnn', 'wasm'];
// ============================================================================
// Runtime Manager
// ============================================================================
/**
* RuntimeManager - Manages runtime selection and lifecycle
*
* Features:
* - Automatic best runtime selection
* - Runtime registration
* - Capability detection
* - Fallback handling
*/
export class RuntimeManager {
static instance = null;
listeners = new Map();
defaultRuntime = 'auto';
constructor() { }
/**
* Get singleton instance
*/
static getInstance() {
if (!RuntimeManager.instance) {
RuntimeManager.instance = new RuntimeManager();
}
return RuntimeManager.instance;
}
/**
* Register a runtime factory
*/
register(type, factory) {
runtimeFactories.set(type, factory);
}
/**
* Get a runtime instance
*/
async getRuntime(type = 'auto') {
if (type === 'auto') {
return this.getBestRuntime();
}
// Check if already instantiated
let runtime = runtimeInstances.get(type);
if (runtime) {
return runtime;
}
// Create new instance
const factory = runtimeFactories.get(type);
if (!factory) {
throw new EdgeFlowError(`Runtime '${type}' is not registered`, ErrorCodes.RUNTIME_NOT_AVAILABLE, { runtime: type });
}
runtime = factory();
// Check availability
const available = await runtime.isAvailable();
if (!available) {
throw new EdgeFlowError(`Runtime '${type}' is not available in this environment`, ErrorCodes.RUNTIME_NOT_AVAILABLE, { runtime: type });
}
// Initialize
try {
await runtime.initialize();
}
catch (error) {
throw new EdgeFlowError(`Failed to initialize runtime '${type}': ${error instanceof Error ? error.message : String(error)}`, ErrorCodes.RUNTIME_INIT_FAILED, { runtime: type, error });
}
runtimeInstances.set(type, runtime);
this.emit('runtime:ready', { runtime: type });
return runtime;
}
/**
* Get the best available runtime
*/
async getBestRuntime() {
for (const type of RUNTIME_PRIORITY) {
try {
// Check if already available
const existing = runtimeInstances.get(type);
if (existing) {
return existing;
}
// Try to create and initialize
const factory = runtimeFactories.get(type);
if (!factory)
continue;
const runtime = factory();
const available = await runtime.isAvailable();
if (available) {
await runtime.initialize();
runtimeInstances.set(type, runtime);
this.emit('runtime:ready', { runtime: type });
return runtime;
}
}
catch {
// Try next runtime
continue;
}
}
throw new EdgeFlowError('No runtime available. Please ensure WebGPU, WebNN, or WASM is supported.', ErrorCodes.RUNTIME_NOT_AVAILABLE, { triedRuntimes: RUNTIME_PRIORITY });
}
/**
* Check which runtimes are available
*/
async detectAvailableRuntimes() {
const results = new Map();
for (const type of RUNTIME_PRIORITY) {
const factory = runtimeFactories.get(type);
if (!factory) {
results.set(type, false);
continue;
}
try {
const runtime = factory();
results.set(type, await runtime.isAvailable());
}
catch {
results.set(type, false);
}
}
return results;
}
/**
* Get capabilities of a runtime
*/
async getCapabilities(type) {
const runtime = await this.getRuntime(type);
return runtime.capabilities;
}
/**
* Set default runtime
*/
setDefaultRuntime(type) {
this.defaultRuntime = type;
}
/**
* Get default runtime type
*/
getDefaultRuntimeType() {
return this.defaultRuntime;
}
/**
* Dispose a specific runtime
*/
disposeRuntime(type) {
const runtime = runtimeInstances.get(type);
if (runtime) {
runtime.dispose();
runtimeInstances.delete(type);
}
}
/**
* Dispose all runtimes
*/
disposeAll() {
for (const [type, runtime] of runtimeInstances) {
runtime.dispose();
runtimeInstances.delete(type);
}
}
/**
* Add event listener
*/
on(event, listener) {
let listeners = this.listeners.get(event);
if (!listeners) {
listeners = new Set();
this.listeners.set(event, listeners);
}
listeners.add(listener);
}
/**
* Remove event listener
*/
off(event, listener) {
const listeners = this.listeners.get(event);
if (listeners) {
listeners.delete(listener);
}
}
/**
* Emit event
*/
emit(type, data) {
const event = {
type,
timestamp: Date.now(),
data,
};
const listeners = this.listeners.get(type);
if (listeners) {
for (const listener of listeners) {
try {
listener(event);
}
catch (error) {
console.error('Error in event listener:', error);
}
}
}
}
}
// ============================================================================
// Model Loader
// ============================================================================
/**
* Model instance counter
*/
let modelIdCounter = 0;
/**
* Generate unique model ID
*/
function generateModelId() {
return `model_${++modelIdCounter}_${Date.now().toString(36)}`;
}
/**
* LoadedModelImpl - Implementation of LoadedModel interface
*/
export class LoadedModelImpl {
id;
metadata;
runtime;
_isLoaded = true;
_dispose;
constructor(metadata, runtime, dispose) {
this.id = generateModelId();
this.metadata = metadata;
this.runtime = runtime;
this._dispose = dispose;
}
get isLoaded() {
return this._isLoaded;
}
dispose() {
if (this._isLoaded) {
this._isLoaded = false;
this._dispose();
getMemoryManager().untrack(this.id);
}
}
}
// ============================================================================
// Model Loading Functions
// ============================================================================
/**
* Load model from URL with advanced loading support
* (caching, sharding, resume download)
*/
export async function loadModel(url, options = {}) {
const manager = RuntimeManager.getInstance();
const runtime = await manager.getRuntime(options.runtime ?? 'auto');
// Import model loader dynamically to avoid circular dependencies
const { loadModelData } = await import('../utils/model-loader.js');
// Use advanced model loader with caching and resume support
const modelData = await loadModelData(url, {
cache: options.cache ?? true,
resumable: options.resumable ?? true,
chunkSize: options.chunkSize,
forceDownload: options.forceDownload,
onProgress: options.onProgress ? (progress) => {
options.onProgress(progress.percent / 100);
} : undefined,
});
// Load into runtime
const model = await runtime.loadModel(modelData, options);
return model;
}
/**
* Load model from ArrayBuffer
*/
export async function loadModelFromBuffer(data, options = {}) {
const manager = RuntimeManager.getInstance();
const runtime = await manager.getRuntime(options.runtime ?? 'auto');
return runtime.loadModel(data, options);
}
// ============================================================================
// Inference Functions
// ============================================================================
/**
* Run inference on a model
*/
export async function runInference(model, inputs) {
if (!model.isLoaded) {
throw new EdgeFlowError('Model has been disposed', ErrorCodes.MODEL_NOT_LOADED, { modelId: model.id });
}
const manager = RuntimeManager.getInstance();
const runtime = await manager.getRuntime(model.runtime);
// Use scheduler for execution
const scheduler = getScheduler();
const task = scheduler.schedule(model.id, () => runtime.run(model, inputs));
return task.wait();
}
/**
* Run inference with named inputs
*/
export async function runInferenceNamed(model, namedInputs) {
if (!model.isLoaded) {
throw new EdgeFlowError('Model has been disposed', ErrorCodes.MODEL_NOT_LOADED, { modelId: model.id });
}
const manager = RuntimeManager.getInstance();
const runtime = await manager.getRuntime(model.runtime);
// Check if runtime supports named inputs
if (!('runNamed' in runtime)) {
throw new EdgeFlowError('Runtime does not support named inputs', ErrorCodes.INFERENCE_FAILED, { modelId: model.id });
}
// Use scheduler for execution
const scheduler = getScheduler();
const task = scheduler.schedule(model.id, () => runtime.runNamed(model, namedInputs));
return task.wait();
}
/**
* Run inference with batch processing
*/
export async function runBatchInference(model, batches) {
const scheduler = getScheduler();
const manager = RuntimeManager.getInstance();
const runtime = await manager.getRuntime(model.runtime);
// Schedule all batches
const tasks = batches.map(inputs => scheduler.schedule(model.id, () => runtime.run(model, inputs)));
// Wait for all to complete
return Promise.all(tasks.map(task => task.wait()));
}
// ============================================================================
// Convenience Functions
// ============================================================================
/**
* Get runtime manager instance
*/
export function getRuntimeManager() {
return RuntimeManager.getInstance();
}
/**
* Register a runtime
*/
export function registerRuntime(type, factory) {
RuntimeManager.getInstance().register(type, factory);
}
/**
* Get the best available runtime
*/
export async function getBestRuntime() {
return RuntimeManager.getInstance().getBestRuntime();
}
/**
* Check available runtimes
*/
export async function getAvailableRuntimes() {
return RuntimeManager.getInstance().detectAvailableRuntimes();
}
//# sourceMappingURL=runtime.js.map
================================================
FILE: dist/core/scheduler.d.ts
================================================
/**
* edgeFlow.js - Inference Scheduler
*
* Task scheduler for managing concurrent inference execution.
* Supports priority queues, model-level isolation, and batch processing.
*/
import { InferenceTask, TaskPriority, SchedulerOptions, EventType, EventListener } from './types.js';
/**
* InferenceScheduler - Manages concurrent task execution
*
* Features:
* - Priority-based task scheduling
* - Model-level concurrency control
* - Optional batch processing
* - Task cancellation
* - Event emission
*/
export declare class InferenceScheduler {
private readonly options;
private readonly queues;
private readonly runningTasks;
private readonly allTasks;
private readonly batchers;
private readonly listeners;
private readonly circuits;
private globalRunningCount;
private isProcessing;
private disposed;
constructor(options?: SchedulerOptions);
/**
* Get circuit breaker state for a model, creating default if absent
*/
private getCircuit;
/**
* Check if the circuit for a model allows new tasks
*/
private isCircuitOpen;
/**
* Record a success for circuit breaker
*/
private circuitSuccess;
/**
* Record a failure for circuit breaker
*/
private circuitFailure;
/**
* Get or create queue for a model
*/
private getQueue;
/**
* Get or create running set for a model
*/
private getRunningSet;
/**
* Check if we can start a new task for a model
*/
private canStartTask;
/**
* Process pending tasks
*/
private processQueue;
/**
* Schedule a task for execution
*/
schedule(modelId: string, executor: () => Promise, priority?: TaskPriority): InferenceTask;
/**
* Schedule with timeout
*/
scheduleWithTimeout(modelId: string, executor: () => Promise, timeout?: number, priority?: TaskPriority): InferenceTask;
/**
* Schedule multiple tasks and wait for all
*/
scheduleAll(tasks: Array<{
modelId: string;
executor: () => Promise;
priority?: TaskPriority;
}>): Promise;
/**
* Get task by ID
*/
getTask(taskId: string): InferenceTask | undefined;
/**
* Cancel a task
*/
cancelTask(taskId: string): boolean;
/**
* Cancel all tasks for a model
*/
cancelAllForModel(modelId: string): number;
/**
* Get statistics
*/
getStats(): {
totalTasks: number;
pendingTasks: number;
runningTasks: number;
completedTasks: number;
failedTasks: number;
cancelledTasks: number;
queuedByModel: Record;
};
/**
* Add event listener
*/
on(event: EventType, listener: EventListener): void;
/**
* Remove event listener
*/
off(event: EventType, listener: EventListener): void;
/**
* Emit event
*/
private emit;
/**
* Clear completed/failed/cancelled tasks from history
*/
clearHistory(): void;
/**
* Dispose the scheduler
*/
dispose(): void;
}
/**
* Get the global scheduler instance
*/
export declare function getScheduler(): InferenceScheduler;
/**
* Set the global scheduler instance
*/
export declare function setScheduler(scheduler: InferenceScheduler): void;
/**
* Configure the global scheduler
*/
export declare function configureScheduler(options: SchedulerOptions): void;
//# sourceMappingURL=scheduler.d.ts.map
================================================
FILE: dist/core/scheduler.js
================================================
/**
* edgeFlow.js - Inference Scheduler
*
* Task scheduler for managing concurrent inference execution.
* Supports priority queues, model-level isolation, and batch processing.
*/
import { EdgeFlowError, ErrorCodes, } from './types.js';
// ============================================================================
// Task Implementation
// ============================================================================
/**
* Internal task implementation
*/
class Task {
id;
modelId;
priority;
createdAt;
_status = 'pending';
_startedAt;
_completedAt;
_result;
_error;
_executor;
_resolvers = [];
_cancelled = false;
constructor(id, modelId, priority, executor) {
this.id = id;
this.modelId = modelId;
this.priority = priority;
this.createdAt = Date.now();
this._executor = executor;
}
get status() {
return this._status;
}
get startedAt() {
return this._startedAt;
}
get completedAt() {
return this._completedAt;
}
get result() {
return this._result;
}
get error() {
return this._error;
}
/**
* Cancel the task
*/
cancel() {
if (this._status === 'pending') {
this._cancelled = true;
this._status = 'cancelled';
this._completedAt = Date.now();
const cancelError = new EdgeFlowError('Task was cancelled', ErrorCodes.INFERENCE_CANCELLED, { taskId: this.id });
for (const { reject } of this._resolvers) {
reject(cancelError);
}
this._resolvers = [];
}
}
/**
* Wait for task completion
*/
wait() {
if (this._status === 'completed') {
return Promise.resolve(this._result);
}
if (this._status === 'failed') {
return Promise.reject(this._error);
}
if (this._status === 'cancelled') {
return Promise.reject(new EdgeFlowError('Task was cancelled', ErrorCodes.INFERENCE_CANCELLED, { taskId: this.id }));
}
return new Promise((resolve, reject) => {
this._resolvers.push({ resolve, reject });
});
}
/**
* Execute the task
*/
async execute() {
if (this._cancelled) {
return;
}
this._status = 'running';
this._startedAt = Date.now();
try {
this._result = await this._executor();
this._status = 'completed';
this._completedAt = Date.now();
for (const { resolve } of this._resolvers) {
resolve(this._result);
}
}
catch (err) {
this._error = err instanceof Error ? err : new Error(String(err));
this._status = 'failed';
this._completedAt = Date.now();
for (const { reject } of this._resolvers) {
reject(this._error);
}
}
this._resolvers = [];
}
}
// ============================================================================
// Priority Queue Implementation
// ============================================================================
/**
* Priority mapping for comparison
*/
const PRIORITY_ORDER = {
critical: 0,
high: 1,
normal: 2,
low: 3,
};
/**
* Priority queue for tasks
*/
class PriorityQueue {
items = [];
get length() {
return this.items.length;
}
isEmpty() {
return this.items.length === 0;
}
/**
* Add item to queue with priority ordering
*/
enqueue(item) {
let inserted = false;
for (let i = 0; i < this.items.length; i++) {
const currentItem = this.items[i];
if (currentItem && PRIORITY_ORDER[item.priority] < PRIORITY_ORDER[currentItem.priority]) {
this.items.splice(i, 0, item);
inserted = true;
break;
}
}
if (!inserted) {
this.items.push(item);
}
}
/**
* Remove and return highest priority item
*/
dequeue() {
return this.items.shift();
}
/**
* Peek at highest priority item without removing
*/
peek() {
return this.items[0];
}
/**
* Remove a specific item by ID
*/
remove(id) {
const index = this.items.findIndex(item => item.id === id);
if (index !== -1) {
const [removed] = this.items.splice(index, 1);
return removed;
}
return undefined;
}
/**
* Get all items
*/
getAll() {
return [...this.items];
}
/**
* Clear the queue
*/
clear() {
this.items = [];
}
}
// ============================================================================
// Batch Collector
// ============================================================================
/**
* Collects tasks for batch processing
*/
class BatchCollector {
tasks = [];
timer = null;
maxSize;
timeout;
onBatch;
constructor(maxSize, timeout, onBatch) {
this.maxSize = maxSize;
this.timeout = timeout;
this.onBatch = onBatch;
}
add(task) {
this.tasks.push(task);
if (this.tasks.length >= this.maxSize) {
this.flush();
}
else if (!this.timer) {
this.timer = setTimeout(() => this.flush(), this.timeout);
}
}
flush() {
if (this.timer) {
clearTimeout(this.timer);
this.timer = null;
}
if (this.tasks.length > 0) {
const batch = this.tasks;
this.tasks = [];
this.onBatch(batch);
}
}
clear() {
if (this.timer) {
clearTimeout(this.timer);
this.timer = null;
}
this.tasks = [];
}
}
// ============================================================================
// Inference Scheduler
// ============================================================================
// Counter for task IDs
let taskIdCounter = 0;
/**
* Generate unique task ID
*/
function generateTaskId() {
return `task_${++taskIdCounter}_${Date.now().toString(36)}`;
}
/**
* Default scheduler options
*/
const DEFAULT_OPTIONS = {
maxConcurrentTasks: 4,
maxConcurrentPerModel: 1,
defaultTimeout: 30000,
enableBatching: false,
maxBatchSize: 32,
batchTimeout: 50,
maxRetries: 0,
retryBaseDelay: 1000,
circuitBreaker: false,
circuitBreakerThreshold: 5,
circuitBreakerResetTimeout: 30000,
};
/**
* InferenceScheduler - Manages concurrent task execution
*
* Features:
* - Priority-based task scheduling
* - Model-level concurrency control
* - Optional batch processing
* - Task cancellation
* - Event emission
*/
export class InferenceScheduler {
options;
queues = new Map();
runningTasks = new Map();
allTasks = new Map();
batchers = new Map();
listeners = new Map();
circuits = new Map();
globalRunningCount = 0;
isProcessing = false;
disposed = false;
constructor(options = {}) {
this.options = { ...DEFAULT_OPTIONS, ...options };
}
/**
* Get circuit breaker state for a model, creating default if absent
*/
getCircuit(modelId) {
let c = this.circuits.get(modelId);
if (!c) {
c = { failures: 0, state: 'closed', lastFailure: 0 };
this.circuits.set(modelId, c);
}
return c;
}
/**
* Check if the circuit for a model allows new tasks
*/
isCircuitOpen(modelId) {
if (!this.options.circuitBreaker)
return false;
const c = this.getCircuit(modelId);
if (c.state === 'closed')
return false;
if (c.state === 'open') {
if (Date.now() - c.lastFailure > this.options.circuitBreakerResetTimeout) {
c.state = 'half-open';
return false; // allow one probe
}
return true;
}
return false; // half-open allows one
}
/**
* Record a success for circuit breaker
*/
circuitSuccess(modelId) {
if (!this.options.circuitBreaker)
return;
const c = this.getCircuit(modelId);
c.failures = 0;
c.state = 'closed';
}
/**
* Record a failure for circuit breaker
*/
circuitFailure(modelId) {
if (!this.options.circuitBreaker)
return;
const c = this.getCircuit(modelId);
c.failures++;
c.lastFailure = Date.now();
if (c.failures >= this.options.circuitBreakerThreshold) {
c.state = 'open';
this.emit('inference:error', {
modelId,
error: new Error(`Circuit breaker opened after ${c.failures} consecutive failures`),
});
}
}
/**
* Get or create queue for a model
*/
getQueue(modelId) {
let queue = this.queues.get(modelId);
if (!queue) {
queue = new PriorityQueue();
this.queues.set(modelId, queue);
}
return queue;
}
/**
* Get or create running set for a model
*/
getRunningSet(modelId) {
let running = this.runningTasks.get(modelId);
if (!running) {
running = new Set();
this.runningTasks.set(modelId, running);
}
return running;
}
/**
* Check if we can start a new task for a model
*/
canStartTask(modelId) {
if (this.globalRunningCount >= this.options.maxConcurrentTasks) {
return false;
}
const running = this.runningTasks.get(modelId);
if (running && running.size >= this.options.maxConcurrentPerModel) {
return false;
}
return true;
}
/**
* Process pending tasks
*/
async processQueue() {
if (this.isProcessing || this.disposed) {
return;
}
this.isProcessing = true;
try {
// Find tasks that can be started
const tasksToStart = [];
for (const [modelId, queue] of this.queues) {
while (!queue.isEmpty() && this.canStartTask(modelId)) {
const task = queue.dequeue();
if (task && task.status === 'pending') {
tasksToStart.push(task);
const running = this.getRunningSet(modelId);
running.add(task.id);
this.globalRunningCount++;
}
}
}
// Execute tasks concurrently
await Promise.all(tasksToStart.map(async (task) => {
this.emit('inference:start', { taskId: task.id, modelId: task.modelId });
try {
await task.execute();
this.emit('inference:complete', {
taskId: task.id,
modelId: task.modelId,
duration: (task.completedAt ?? 0) - (task.startedAt ?? 0),
});
}
catch (error) {
this.emit('inference:error', {
taskId: task.id,
modelId: task.modelId,
error,
});
}
finally {
// Clean up
const running = this.runningTasks.get(task.modelId);
if (running) {
running.delete(task.id);
}
this.globalRunningCount--;
}
}));
}
finally {
this.isProcessing = false;
}
// Check if there are more tasks to process
let hasPending = false;
for (const queue of this.queues.values()) {
if (!queue.isEmpty()) {
hasPending = true;
break;
}
}
if (hasPending) {
// Use setImmediate-like behavior for next tick processing
setTimeout(() => this.processQueue(), 0);
}
}
/**
* Schedule a task for execution
*/
schedule(modelId, executor, priority = 'normal') {
if (this.disposed) {
throw new EdgeFlowError('Scheduler has been disposed', ErrorCodes.RUNTIME_NOT_INITIALIZED);
}
if (this.isCircuitOpen(modelId)) {
throw new EdgeFlowError(`Circuit breaker is open for model ${modelId} — too many consecutive failures. ` +
`Retry after ${this.options.circuitBreakerResetTimeout}ms.`, ErrorCodes.INFERENCE_FAILED, { modelId });
}
// Wrap executor with retry logic
const maxRetries = this.options.maxRetries;
const baseDelay = this.options.retryBaseDelay;
const wrappedExecutor = maxRetries > 0
? async () => {
let lastError;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const result = await executor();
this.circuitSuccess(modelId);
return result;
}
catch (err) {
lastError = err instanceof Error ? err : new Error(String(err));
this.circuitFailure(modelId);
if (attempt < maxRetries) {
const delay = baseDelay * Math.pow(2, attempt);
await new Promise(r => setTimeout(r, delay));
}
}
}
throw lastError;
}
: async () => {
try {
const result = await executor();
this.circuitSuccess(modelId);
return result;
}
catch (err) {
this.circuitFailure(modelId);
throw err;
}
};
const task = new Task(generateTaskId(), modelId, priority, wrappedExecutor);
this.allTasks.set(task.id, task);
const queue = this.getQueue(modelId);
queue.enqueue(task);
this.processQueue();
return task;
}
/**
* Schedule with timeout
*/
scheduleWithTimeout(modelId, executor, timeout = this.options.defaultTimeout, priority = 'normal') {
const timeoutExecutor = () => {
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
reject(new EdgeFlowError(`Task timed out after ${timeout}ms`, ErrorCodes.INFERENCE_TIMEOUT, { timeout }));
}, timeout);
executor()
.then(result => {
clearTimeout(timer);
resolve(result);
})
.catch(error => {
clearTimeout(timer);
reject(error);
});
});
};
return this.schedule(modelId, timeoutExecutor, priority);
}
/**
* Schedule multiple tasks and wait for all
*/
async scheduleAll(tasks) {
const scheduledTasks = tasks.map(({ modelId, executor, priority }) => this.schedule(modelId, executor, priority));
return Promise.all(scheduledTasks.map(task => task.wait()));
}
/**
* Get task by ID
*/
getTask(taskId) {
return this.allTasks.get(taskId);
}
/**
* Cancel a task
*/
cancelTask(taskId) {
const task = this.allTasks.get(taskId);
if (task && task.status === 'pending') {
task.cancel();
// Remove from queue
for (const queue of this.queues.values()) {
queue.remove(taskId);
}
return true;
}
return false;
}
/**
* Cancel all tasks for a model
*/
cancelAllForModel(modelId) {
const queue = this.queues.get(modelId);
if (!queue)
return 0;
let cancelled = 0;
for (const task of queue.getAll()) {
if (task.status === 'pending') {
task.cancel();
cancelled++;
}
}
queue.clear();
return cancelled;
}
/**
* Get statistics
*/
getStats() {
const stats = {
totalTasks: this.allTasks.size,
pendingTasks: 0,
runningTasks: 0,
completedTasks: 0,
failedTasks: 0,
cancelledTasks: 0,
queuedByModel: {},
};
for (const task of this.allTasks.values()) {
switch (task.status) {
case 'pending':
stats.pendingTasks++;
break;
case 'running':
stats.runningTasks++;
break;
case 'completed':
stats.completedTasks++;
break;
case 'failed':
stats.failedTasks++;
break;
case 'cancelled':
stats.cancelledTasks++;
break;
}
}
for (const [modelId, queue] of this.queues) {
stats.queuedByModel[modelId] = queue.length;
}
return stats;
}
/**
* Add event listener
*/
on(event, listener) {
let listeners = this.listeners.get(event);
if (!listeners) {
listeners = new Set();
this.listeners.set(event, listeners);
}
listeners.add(listener);
}
/**
* Remove event listener
*/
off(event, listener) {
const listeners = this.listeners.get(event);
if (listeners) {
listeners.delete(listener);
}
}
/**
* Emit event
*/
emit(type, data) {
const event = {
type,
timestamp: Date.now(),
data,
};
const listeners = this.listeners.get(type);
if (listeners) {
for (const listener of listeners) {
try {
listener(event);
}
catch (error) {
console.error('Error in event listener:', error);
}
}
}
}
/**
* Clear completed/failed/cancelled tasks from history
*/
clearHistory() {
for (const [taskId, task] of this.allTasks) {
if (task.status === 'completed' ||
task.status === 'failed' ||
task.status === 'cancelled') {
this.allTasks.delete(taskId);
}
}
}
/**
* Dispose the scheduler
*/
dispose() {
this.disposed = true;
// Cancel all pending tasks
for (const queue of this.queues.values()) {
for (const task of queue.getAll()) {
task.cancel();
}
queue.clear();
}
// Clear batchers
for (const batcher of this.batchers.values()) {
batcher.clear();
}
this.queues.clear();
this.runningTasks.clear();
this.allTasks.clear();
this.batchers.clear();
this.listeners.clear();
}
}
// ============================================================================
// Global Scheduler Instance
// ============================================================================
let globalScheduler = null;
/**
* Get the global scheduler instance
*/
export function getScheduler() {
if (!globalScheduler) {
globalScheduler = new InferenceScheduler();
}
return globalScheduler;
}
/**
* Set the global scheduler instance
*/
export function setScheduler(scheduler) {
if (globalScheduler) {
globalScheduler.dispose();
}
globalScheduler = scheduler;
}
/**
* Configure the global scheduler
*/
export function configureScheduler(options) {
setScheduler(new InferenceScheduler(options));
}
//# sourceMappingURL=scheduler.js.map
================================================
FILE: dist/core/tensor.d.ts
================================================
/**
* edgeFlow.js - Tensor Implementation
*
* Lightweight tensor implementation with efficient memory management.
*/
import { Tensor, DataType, Shape, TypedArray } from './types.js';
/**
* EdgeFlowTensor - Core tensor implementation
*/
export declare class EdgeFlowTensor implements Tensor {
readonly id: string;
readonly dtype: DataType;
readonly shape: Shape;
readonly size: number;
private _data;
private _isDisposed;
constructor(data: TypedArray | number[], shape: Shape, dtype?: DataType);
get data(): TypedArray;
get isDisposed(): boolean;
/**
* Check if tensor has been disposed
*/
private checkDisposed;
/**
* Convert to Float32Array
*/
toFloat32Array(): Float32Array;
/**
* Convert to regular array
*/
toArray(): number[];
/**
* Clone the tensor
*/
clone(): EdgeFlowTensor;
/**
* Dispose the tensor and free memory
*/
dispose(): void;
/**
* Get value at specific indices
*/
get(...indices: number[]): number;
/**
* Set value at specific indices
*/
set(value: number, ...indices: number[]): void;
/**
* Reshape the tensor (returns new tensor)
*/
reshape(newShape: Shape): EdgeFlowTensor;
/**
* Transpose the tensor (2D only for now)
*/
transpose(): EdgeFlowTensor;
/**
* Create string representation
*/
toString(): string;
}
/**
* Create a tensor from data
*/
export declare function tensor(data: TypedArray | number[] | number[][], shape?: Shape, dtype?: DataType): EdgeFlowTensor;
/**
* Create a tensor filled with zeros
*/
export declare function zeros(shape: Shape, dtype?: DataType): EdgeFlowTensor;
/**
* Create a tensor filled with ones
*/
export declare function ones(shape: Shape, dtype?: DataType): EdgeFlowTensor;
/**
* Create a tensor filled with a specific value
*/
export declare function full(shape: Shape, value: number, dtype?: DataType): EdgeFlowTensor;
/**
* Create a tensor with random values between 0 and 1
*/
export declare function random(shape: Shape, dtype?: DataType): EdgeFlowTensor;
/**
* Create a tensor with random values from normal distribution
*/
export declare function randn(shape: Shape, dtype?: DataType): EdgeFlowTensor;
/**
* Create a 1D tensor with evenly spaced values
*/
export declare function arange(start: number, stop?: number, step?: number, dtype?: DataType): EdgeFlowTensor;
/**
* Create a 1D tensor with evenly spaced values (specify number of points)
*/
export declare function linspace(start: number, stop: number, num?: number, dtype?: DataType): EdgeFlowTensor;
/**
* Create an identity matrix
*/
export declare function eye(n: number, dtype?: DataType): EdgeFlowTensor;
/**
* Element-wise addition
*/
export declare function add(a: EdgeFlowTensor, b: EdgeFlowTensor | number): EdgeFlowTensor;
/**
* Element-wise subtraction
*/
export declare function sub(a: EdgeFlowTensor, b: EdgeFlowTensor | number): EdgeFlowTensor;
/**
* Element-wise multiplication
*/
export declare function mul(a: EdgeFlowTensor, b: EdgeFlowTensor | number): EdgeFlowTensor;
/**
* Element-wise division
*/
export declare function div(a: EdgeFlowTensor, b: EdgeFlowTensor | number): EdgeFlowTensor;
/**
* Matrix multiplication (2D tensors)
*/
export declare function matmul(a: EdgeFlowTensor, b: EdgeFlowTensor): EdgeFlowTensor;
/**
* Softmax activation
*/
export declare function softmax(t: EdgeFlowTensor, axis?: number): EdgeFlowTensor;
/**
* ReLU activation
*/
export declare function relu(t: EdgeFlowTensor): EdgeFlowTensor;
/**
* Sigmoid activation
*/
export declare function sigmoid(t: EdgeFlowTensor): EdgeFlowTensor;
/**
* Tanh activation
*/
export declare function tanh(t: EdgeFlowTensor): EdgeFlowTensor;
/**
* Sum all elements or along an axis
*/
export declare function sum(t: EdgeFlowTensor, axis?: number): EdgeFlowTensor | number;
/**
* Mean of all elements or along an axis
*/
export declare function mean(t: EdgeFlowTensor, axis?: number): EdgeFlowTensor | number;
/**
* Argmax - return index of maximum value
*/
export declare function argmax(t: EdgeFlowTensor, axis?: number): number | EdgeFlowTensor;
/**
* Concatenate tensors along an axis
*/
export declare function concat(tensors: EdgeFlowTensor[], axis?: number): EdgeFlowTensor;
//# sourceMappingURL=tensor.d.ts.map
================================================
FILE: dist/core/tensor.js
================================================
/**
* edgeFlow.js - Tensor Implementation
*
* Lightweight tensor implementation with efficient memory management.
*/
import { EdgeFlowError, ErrorCodes } from './types.js';
// Counter for generating unique tensor IDs
let tensorIdCounter = 0;
/**
* Generate a unique tensor ID
*/
function generateTensorId() {
return `tensor_${++tensorIdCounter}_${Date.now().toString(36)}`;
}
/**
* Get the typed array constructor for a data type
*/
function getTypedArrayConstructor(dtype) {
switch (dtype) {
case 'float32':
return Float32Array;
case 'float16':
// Float16 not natively supported, use Float32Array
return Float32Array;
case 'int32':
return Int32Array;
case 'int64':
return BigInt64Array;
case 'uint8':
case 'bool':
return Uint8Array;
case 'int8':
return Int8Array;
default:
throw new EdgeFlowError(`Unsupported data type: ${dtype}`, ErrorCodes.INVALID_ARGUMENT, { dtype });
}
}
/**
* Calculate the total number of elements from shape
*/
function calculateSize(shape) {
if (shape.length === 0)
return 1; // Scalar
return shape.reduce((acc, dim) => acc * dim, 1);
}
/**
* Validate tensor shape
*/
function validateShape(shape) {
for (let i = 0; i < shape.length; i++) {
const dim = shape[i];
if (dim === undefined || !Number.isInteger(dim) || dim < 0) {
throw new EdgeFlowError(`Invalid shape dimension at index ${i}: ${dim}`, ErrorCodes.INVALID_ARGUMENT, { shape, index: i, dimension: dim });
}
}
}
/**
* EdgeFlowTensor - Core tensor implementation
*/
export class EdgeFlowTensor {
id;
dtype;
shape;
size;
_data;
_isDisposed = false;
constructor(data, shape, dtype = 'float32') {
validateShape(shape);
this.id = generateTensorId();
this.dtype = dtype;
this.shape = Object.freeze([...shape]);
this.size = calculateSize(this.shape);
// Validate data size matches shape
const expectedSize = this.size;
if (data.length !== expectedSize) {
throw new EdgeFlowError(`Data length (${data.length}) does not match shape ${JSON.stringify(shape)} (expected ${expectedSize})`, ErrorCodes.TENSOR_SHAPE_MISMATCH, { dataLength: data.length, expectedSize, shape });
}
// Convert to appropriate typed array
if (data instanceof Array) {
const TypedArrayCtor = getTypedArrayConstructor(dtype);
this._data = new TypedArrayCtor(data.length);
if (dtype === 'int64') {
// BigInt64Array requires BigInt values
const bigIntData = this._data;
for (let i = 0; i < data.length; i++) {
bigIntData[i] = BigInt(Math.round(data[i] ?? 0));
}
}
else {
for (let i = 0; i < data.length; i++) {
this._data[i] = data[i] ?? 0;
}
}
}
else {
this._data = data;
}
}
get data() {
this.checkDisposed();
return this._data;
}
get isDisposed() {
return this._isDisposed;
}
/**
* Check if tensor has been disposed
*/
checkDisposed() {
if (this._isDisposed) {
throw new EdgeFlowError('Cannot access disposed tensor', ErrorCodes.TENSOR_DISPOSED, { tensorId: this.id });
}
}
/**
* Convert to Float32Array
*/
toFloat32Array() {
this.checkDisposed();
if (this._data instanceof Float32Array) {
return this._data;
}
const result = new Float32Array(this.size);
for (let i = 0; i < this.size; i++) {
result[i] = Number(this._data[i] ?? 0);
}
return result;
}
/**
* Convert to regular array
*/
toArray() {
this.checkDisposed();
if (this.dtype === 'int64') {
// BigInt64Array needs special handling
const bigIntData = this._data;
const result = [];
for (let i = 0; i < bigIntData.length; i++) {
result.push(Number(bigIntData[i]));
}
return result;
}
return Array.from(this._data);
}
/**
* Clone the tensor
*/
clone() {
this.checkDisposed();
const TypedArrayCtor = this._data.constructor;
const clonedData = new TypedArrayCtor(this._data);
return new EdgeFlowTensor(clonedData, this.shape, this.dtype);
}
/**
* Dispose the tensor and free memory
*/
dispose() {
if (!this._isDisposed) {
this._isDisposed = true;
// Help garbage collection - use Object.assign to avoid type issues
Object.assign(this, { _data: null });
}
}
/**
* Get value at specific indices
*/
get(...indices) {
this.checkDisposed();
if (indices.length !== this.shape.length) {
throw new EdgeFlowError(`Expected ${this.shape.length} indices, got ${indices.length}`, ErrorCodes.INVALID_ARGUMENT, { expectedIndices: this.shape.length, gotIndices: indices.length });
}
let flatIndex = 0;
let stride = 1;
for (let i = this.shape.length - 1; i >= 0; i--) {
const idx = indices[i] ?? 0;
const dim = this.shape[i] ?? 1;
if (idx < 0 || idx >= dim) {
throw new EdgeFlowError(`Index ${idx} out of bounds for dimension ${i} with size ${dim}`, ErrorCodes.INVALID_ARGUMENT, { index: idx, dimension: i, size: dim });
}
flatIndex += idx * stride;
stride *= dim;
}
return Number(this._data[flatIndex] ?? 0);
}
/**
* Set value at specific indices
*/
set(value, ...indices) {
this.checkDisposed();
if (indices.length !== this.shape.length) {
throw new EdgeFlowError(`Expected ${this.shape.length} indices, got ${indices.length}`, ErrorCodes.INVALID_ARGUMENT, { expectedIndices: this.shape.length, gotIndices: indices.length });
}
let flatIndex = 0;
let stride = 1;
for (let i = this.shape.length - 1; i >= 0; i--) {
const idx = indices[i] ?? 0;
const dim = this.shape[i] ?? 1;
if (idx < 0 || idx >= dim) {
throw new EdgeFlowError(`Index ${idx} out of bounds for dimension ${i} with size ${dim}`, ErrorCodes.INVALID_ARGUMENT, { index: idx, dimension: i, size: dim });
}
flatIndex += idx * stride;
stride *= dim;
}
this._data[flatIndex] = value;
}
/**
* Reshape the tensor (returns new tensor)
*/
reshape(newShape) {
this.checkDisposed();
const newSize = calculateSize(newShape);
if (newSize !== this.size) {
throw new EdgeFlowError(`Cannot reshape tensor of size ${this.size} to shape ${JSON.stringify(newShape)} (size ${newSize})`, ErrorCodes.TENSOR_SHAPE_MISMATCH, { currentSize: this.size, newSize, newShape });
}
const TypedArrayCtor = this._data.constructor;
const clonedData = new TypedArrayCtor(this._data);
return new EdgeFlowTensor(clonedData, newShape, this.dtype);
}
/**
* Transpose the tensor (2D only for now)
*/
transpose() {
this.checkDisposed();
if (this.shape.length !== 2) {
throw new EdgeFlowError('Transpose is currently only supported for 2D tensors', ErrorCodes.NOT_IMPLEMENTED, { shape: this.shape });
}
const [rows, cols] = this.shape;
const result = new Float32Array(this.size);
for (let i = 0; i < rows; i++) {
for (let j = 0; j < cols; j++) {
result[j * rows + i] = Number(this._data[i * cols + j] ?? 0);
}
}
return new EdgeFlowTensor(result, [cols, rows], this.dtype);
}
/**
* Create string representation
*/
toString() {
return `Tensor(shape=[${this.shape.join(', ')}], dtype=${this.dtype})`;
}
}
// ============================================================================
// Tensor Factory Functions
// ============================================================================
/**
* Create a tensor from data
*/
export function tensor(data, shape, dtype = 'float32') {
// Handle nested arrays
if (Array.isArray(data) && data.length > 0 && Array.isArray(data[0])) {
const rows = data.length;
const cols = data[0].length;
const flatData = [];
for (const row of data) {
if (row.length !== cols) {
throw new EdgeFlowError('Nested arrays must have consistent dimensions', ErrorCodes.INVALID_ARGUMENT);
}
flatData.push(...row);
}
return new EdgeFlowTensor(flatData, shape ?? [rows, cols], dtype);
}
// Infer shape if not provided
const inferredShape = shape ?? [data.length];
return new EdgeFlowTensor(data, inferredShape, dtype);
}
/**
* Create a tensor filled with zeros
*/
export function zeros(shape, dtype = 'float32') {
const size = calculateSize(shape);
const TypedArrayCtor = getTypedArrayConstructor(dtype);
const data = new TypedArrayCtor(size);
return new EdgeFlowTensor(data, shape, dtype);
}
/**
* Create a tensor filled with ones
*/
export function ones(shape, dtype = 'float32') {
const size = calculateSize(shape);
const TypedArrayCtor = getTypedArrayConstructor(dtype);
const data = new TypedArrayCtor(size);
data.fill(1);
return new EdgeFlowTensor(data, shape, dtype);
}
/**
* Create a tensor filled with a specific value
*/
export function full(shape, value, dtype = 'float32') {
const size = calculateSize(shape);
const TypedArrayCtor = getTypedArrayConstructor(dtype);
const data = new TypedArrayCtor(size);
data.fill(value);
return new EdgeFlowTensor(data, shape, dtype);
}
/**
* Create a tensor with random values between 0 and 1
*/
export function random(shape, dtype = 'float32') {
const size = calculateSize(shape);
const data = new Float32Array(size);
for (let i = 0; i < size; i++) {
data[i] = Math.random();
}
return new EdgeFlowTensor(data, shape, dtype);
}
/**
* Create a tensor with random values from normal distribution
*/
export function randn(shape, dtype = 'float32') {
const size = calculateSize(shape);
const data = new Float32Array(size);
// Box-Muller transform for normal distribution
for (let i = 0; i < size; i += 2) {
const u1 = Math.random();
const u2 = Math.random();
const r = Math.sqrt(-2 * Math.log(u1));
const theta = 2 * Math.PI * u2;
data[i] = r * Math.cos(theta);
if (i + 1 < size) {
data[i + 1] = r * Math.sin(theta);
}
}
return new EdgeFlowTensor(data, shape, dtype);
}
/**
* Create a 1D tensor with evenly spaced values
*/
export function arange(start, stop, step = 1, dtype = 'float32') {
if (stop === undefined) {
stop = start;
start = 0;
}
const size = Math.ceil((stop - start) / step);
const data = new Float32Array(size);
for (let i = 0; i < size; i++) {
data[i] = start + i * step;
}
return new EdgeFlowTensor(data, [size], dtype);
}
/**
* Create a 1D tensor with evenly spaced values (specify number of points)
*/
export function linspace(start, stop, num = 50, dtype = 'float32') {
const data = new Float32Array(num);
const step = (stop - start) / (num - 1);
for (let i = 0; i < num; i++) {
data[i] = start + i * step;
}
return new EdgeFlowTensor(data, [num], dtype);
}
/**
* Create an identity matrix
*/
export function eye(n, dtype = 'float32') {
const data = new Float32Array(n * n);
for (let i = 0; i < n; i++) {
data[i * n + i] = 1;
}
return new EdgeFlowTensor(data, [n, n], dtype);
}
// ============================================================================
// Tensor Operations
// ============================================================================
/**
* Element-wise addition
*/
export function add(a, b) {
if (typeof b === 'number') {
const result = new Float32Array(a.size);
const aData = a.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result[i] = (aData[i] ?? 0) + b;
}
return new EdgeFlowTensor(result, a.shape, a.dtype);
}
if (a.size !== b.size) {
throw new EdgeFlowError('Tensor sizes must match for element-wise operations', ErrorCodes.TENSOR_SHAPE_MISMATCH, { aShape: a.shape, bShape: b.shape });
}
const result = new Float32Array(a.size);
const aData = a.toFloat32Array();
const bData = b.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result[i] = (aData[i] ?? 0) + (bData[i] ?? 0);
}
return new EdgeFlowTensor(result, a.shape, a.dtype);
}
/**
* Element-wise subtraction
*/
export function sub(a, b) {
if (typeof b === 'number') {
const result = new Float32Array(a.size);
const aData = a.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result[i] = (aData[i] ?? 0) - b;
}
return new EdgeFlowTensor(result, a.shape, a.dtype);
}
if (a.size !== b.size) {
throw new EdgeFlowError('Tensor sizes must match for element-wise operations', ErrorCodes.TENSOR_SHAPE_MISMATCH, { aShape: a.shape, bShape: b.shape });
}
const result = new Float32Array(a.size);
const aData = a.toFloat32Array();
const bData = b.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result[i] = (aData[i] ?? 0) - (bData[i] ?? 0);
}
return new EdgeFlowTensor(result, a.shape, a.dtype);
}
/**
* Element-wise multiplication
*/
export function mul(a, b) {
if (typeof b === 'number') {
const result = new Float32Array(a.size);
const aData = a.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result[i] = (aData[i] ?? 0) * b;
}
return new EdgeFlowTensor(result, a.shape, a.dtype);
}
if (a.size !== b.size) {
throw new EdgeFlowError('Tensor sizes must match for element-wise operations', ErrorCodes.TENSOR_SHAPE_MISMATCH, { aShape: a.shape, bShape: b.shape });
}
const result = new Float32Array(a.size);
const aData = a.toFloat32Array();
const bData = b.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result[i] = (aData[i] ?? 0) * (bData[i] ?? 0);
}
return new EdgeFlowTensor(result, a.shape, a.dtype);
}
/**
* Element-wise division
*/
export function div(a, b) {
if (typeof b === 'number') {
const result = new Float32Array(a.size);
const aData = a.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result[i] = (aData[i] ?? 0) / b;
}
return new EdgeFlowTensor(result, a.shape, a.dtype);
}
if (a.size !== b.size) {
throw new EdgeFlowError('Tensor sizes must match for element-wise operations', ErrorCodes.TENSOR_SHAPE_MISMATCH, { aShape: a.shape, bShape: b.shape });
}
const result = new Float32Array(a.size);
const aData = a.toFloat32Array();
const bData = b.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result[i] = (aData[i] ?? 0) / (bData[i] ?? 0);
}
return new EdgeFlowTensor(result, a.shape, a.dtype);
}
/**
* Matrix multiplication (2D tensors)
*/
export function matmul(a, b) {
if (a.shape.length !== 2 || b.shape.length !== 2) {
throw new EdgeFlowError('matmul requires 2D tensors', ErrorCodes.INVALID_ARGUMENT, { aShape: a.shape, bShape: b.shape });
}
const [m, k1] = a.shape;
const [k2, n] = b.shape;
if (k1 !== k2) {
throw new EdgeFlowError(`Matrix dimensions incompatible for multiplication: (${m}x${k1}) @ (${k2}x${n})`, ErrorCodes.TENSOR_SHAPE_MISMATCH, { aShape: a.shape, bShape: b.shape });
}
const result = new Float32Array(m * n);
const aData = a.toFloat32Array();
const bData = b.toFloat32Array();
for (let i = 0; i < m; i++) {
for (let j = 0; j < n; j++) {
let sum = 0;
for (let k = 0; k < k1; k++) {
sum += (aData[i * k1 + k] ?? 0) * (bData[k * n + j] ?? 0);
}
result[i * n + j] = sum;
}
}
return new EdgeFlowTensor(result, [m, n], a.dtype);
}
/**
* Softmax activation
*/
export function softmax(t, axis = -1) {
const data = t.toFloat32Array();
const result = new Float32Array(t.size);
// Handle negative axis
const actualAxis = axis < 0 ? t.shape.length + axis : axis;
if (actualAxis < 0 || actualAxis >= t.shape.length) {
throw new EdgeFlowError(`Invalid axis ${axis} for tensor with ${t.shape.length} dimensions`, ErrorCodes.INVALID_ARGUMENT, { axis, shape: t.shape });
}
// For 1D tensors
if (t.shape.length === 1) {
let max = -Infinity;
for (let i = 0; i < t.size; i++) {
if ((data[i] ?? 0) > max)
max = data[i] ?? 0;
}
let sum = 0;
for (let i = 0; i < t.size; i++) {
result[i] = Math.exp((data[i] ?? 0) - max);
sum += result[i] ?? 0;
}
for (let i = 0; i < t.size; i++) {
result[i] = (result[i] ?? 0) / sum;
}
return new EdgeFlowTensor(result, t.shape, t.dtype);
}
// For 2D tensors along last axis
if (t.shape.length === 2 && actualAxis === 1) {
const [rows, cols] = t.shape;
for (let i = 0; i < rows; i++) {
let max = -Infinity;
for (let j = 0; j < cols; j++) {
if ((data[i * cols + j] ?? 0) > max)
max = data[i * cols + j] ?? 0;
}
let sum = 0;
for (let j = 0; j < cols; j++) {
result[i * cols + j] = Math.exp((data[i * cols + j] ?? 0) - max);
sum += result[i * cols + j] ?? 0;
}
for (let j = 0; j < cols; j++) {
result[i * cols + j] = (result[i * cols + j] ?? 0) / sum;
}
}
return new EdgeFlowTensor(result, t.shape, t.dtype);
}
throw new EdgeFlowError('Softmax currently only supports 1D tensors or 2D tensors along the last axis', ErrorCodes.NOT_IMPLEMENTED, { shape: t.shape, axis });
}
/**
* ReLU activation
*/
export function relu(t) {
const data = t.toFloat32Array();
const result = new Float32Array(t.size);
for (let i = 0; i < t.size; i++) {
result[i] = Math.max(0, data[i] ?? 0);
}
return new EdgeFlowTensor(result, t.shape, t.dtype);
}
/**
* Sigmoid activation
*/
export function sigmoid(t) {
const data = t.toFloat32Array();
const result = new Float32Array(t.size);
for (let i = 0; i < t.size; i++) {
result[i] = 1 / (1 + Math.exp(-(data[i] ?? 0)));
}
return new EdgeFlowTensor(result, t.shape, t.dtype);
}
/**
* Tanh activation
*/
export function tanh(t) {
const data = t.toFloat32Array();
const result = new Float32Array(t.size);
for (let i = 0; i < t.size; i++) {
result[i] = Math.tanh(data[i] ?? 0);
}
return new EdgeFlowTensor(result, t.shape, t.dtype);
}
/**
* Sum all elements or along an axis
*/
export function sum(t, axis) {
const data = t.toFloat32Array();
if (axis === undefined) {
let total = 0;
for (let i = 0; i < t.size; i++) {
total += data[i] ?? 0;
}
return total;
}
// Handle negative axis
const actualAxis = axis < 0 ? t.shape.length + axis : axis;
if (actualAxis < 0 || actualAxis >= t.shape.length) {
throw new EdgeFlowError(`Invalid axis ${axis} for tensor with ${t.shape.length} dimensions`, ErrorCodes.INVALID_ARGUMENT, { axis, shape: t.shape });
}
// Calculate new shape
const newShape = [...t.shape];
newShape.splice(actualAxis, 1);
if (newShape.length === 0) {
let total = 0;
for (let i = 0; i < t.size; i++) {
total += data[i] ?? 0;
}
return total;
}
// For 2D sum along axis
if (t.shape.length === 2) {
const [rows, cols] = t.shape;
if (actualAxis === 0) {
const result = new Float32Array(cols);
for (let j = 0; j < cols; j++) {
for (let i = 0; i < rows; i++) {
result[j] = (result[j] ?? 0) + (data[i * cols + j] ?? 0);
}
}
return new EdgeFlowTensor(result, [cols], t.dtype);
}
else {
const result = new Float32Array(rows);
for (let i = 0; i < rows; i++) {
for (let j = 0; j < cols; j++) {
result[i] = (result[i] ?? 0) + (data[i * cols + j] ?? 0);
}
}
return new EdgeFlowTensor(result, [rows], t.dtype);
}
}
throw new EdgeFlowError('Sum along axis currently only supports up to 2D tensors', ErrorCodes.NOT_IMPLEMENTED, { shape: t.shape, axis });
}
/**
* Mean of all elements or along an axis
*/
export function mean(t, axis) {
if (axis === undefined) {
return sum(t) / t.size;
}
const result = sum(t, axis);
if (typeof result === 'number') {
return result / (t.shape[axis] ?? 1);
}
const axisSize = t.shape[axis] ?? 1;
return div(result, axisSize);
}
/**
* Argmax - return index of maximum value
*/
export function argmax(t, axis) {
const data = t.toFloat32Array();
if (axis === undefined) {
let maxIdx = 0;
let maxVal = data[0] ?? -Infinity;
for (let i = 1; i < t.size; i++) {
if ((data[i] ?? -Infinity) > maxVal) {
maxVal = data[i] ?? -Infinity;
maxIdx = i;
}
}
return maxIdx;
}
// Handle negative axis
const actualAxis = axis < 0 ? t.shape.length + axis : axis;
// For 2D along last axis
if (t.shape.length === 2 && actualAxis === 1) {
const [rows, cols] = t.shape;
const result = new Float32Array(rows);
for (let i = 0; i < rows; i++) {
let maxIdx = 0;
let maxVal = data[i * cols] ?? -Infinity;
for (let j = 1; j < cols; j++) {
if ((data[i * cols + j] ?? -Infinity) > maxVal) {
maxVal = data[i * cols + j] ?? -Infinity;
maxIdx = j;
}
}
result[i] = maxIdx;
}
return new EdgeFlowTensor(result, [rows], 'int32');
}
throw new EdgeFlowError('Argmax along axis currently only supports 2D tensors along the last axis', ErrorCodes.NOT_IMPLEMENTED, { shape: t.shape, axis });
}
/**
* Concatenate tensors along an axis
*/
export function concat(tensors, axis = 0) {
if (tensors.length === 0) {
throw new EdgeFlowError('Cannot concatenate empty array of tensors', ErrorCodes.INVALID_ARGUMENT);
}
if (tensors.length === 1) {
return tensors[0]?.clone() ?? zeros([0]);
}
const first = tensors[0];
if (!first) {
throw new EdgeFlowError('First tensor is undefined', ErrorCodes.INVALID_ARGUMENT);
}
// Handle negative axis
const actualAxis = axis < 0 ? first.shape.length + axis : axis;
// Validate shapes
for (let i = 1; i < tensors.length; i++) {
const t = tensors[i];
if (!t)
continue;
if (t.shape.length !== first.shape.length) {
throw new EdgeFlowError('All tensors must have the same number of dimensions', ErrorCodes.TENSOR_SHAPE_MISMATCH);
}
for (let j = 0; j < first.shape.length; j++) {
if (j !== actualAxis && first.shape[j] !== t.shape[j]) {
throw new EdgeFlowError(`Shape mismatch at dimension ${j}`, ErrorCodes.TENSOR_SHAPE_MISMATCH);
}
}
}
// Calculate new shape
const newShape = [...first.shape];
let totalAxisSize = 0;
for (const t of tensors) {
if (t)
totalAxisSize += t.shape[actualAxis] ?? 0;
}
newShape[actualAxis] = totalAxisSize;
// For 1D concatenation
if (first.shape.length === 1) {
const result = new Float32Array(totalAxisSize);
let offset = 0;
for (const t of tensors) {
if (!t)
continue;
result.set(t.toFloat32Array(), offset);
offset += t.size;
}
return new EdgeFlowTensor(result, newShape, first.dtype);
}
throw new EdgeFlowError('Concatenation currently only supports 1D tensors', ErrorCodes.NOT_IMPLEMENTED);
}
//# sourceMappingURL=tensor.js.map
================================================
FILE: dist/core/types.d.ts
================================================
/**
* edgeFlow.js - Core Type Definitions
*
* This file contains all the core types used throughout the framework.
*/
/**
* Supported data types for tensors
*/
export type DataType = 'float32' | 'float16' | 'int32' | 'int64' | 'uint8' | 'int8' | 'bool';
/**
* TypedArray types used for tensor data
*/
export type TypedArray = Float32Array | Float64Array | Int32Array | BigInt64Array | Uint8Array | Int8Array;
/**
* Tensor shape definition
*/
export type Shape = readonly number[];
/**
* Tensor interface
*/
export interface Tensor {
/** Unique identifier for the tensor */
readonly id: string;
/** Data type of the tensor */
readonly dtype: DataType;
/** Shape of the tensor */
readonly shape: Shape;
/** Total number of elements */
readonly size: number;
/** Underlying data */
readonly data: TypedArray;
/** Get data as Float32Array */
toFloat32Array(): Float32Array;
/** Get data as array */
toArray(): number[];
/** Clone the tensor */
clone(): Tensor;
/** Dispose the tensor and free memory */
dispose(): void;
/** Check if tensor has been disposed */
readonly isDisposed: boolean;
}
/**
* Supported runtime backends
*/
export type RuntimeType = 'webgpu' | 'webnn' | 'wasm' | 'auto';
/**
* Runtime capability flags
*/
export interface RuntimeCapabilities {
/** Supports concurrent execution */
concurrency: boolean;
/** Supports quantized models */
quantization: boolean;
/** Supports float16 */
float16: boolean;
/** Supports dynamic shapes */
dynamicShapes: boolean;
/** Maximum batch size */
maxBatchSize: number;
/** Available memory in bytes */
availableMemory: number;
}
/**
* Runtime interface that all backends must implement
*/
export interface Runtime {
/** Runtime name */
readonly name: RuntimeType;
/** Runtime capabilities */
readonly capabilities: RuntimeCapabilities;
/** Initialize the runtime */
initialize(): Promise;
/** Check if runtime is available in current environment */
isAvailable(): Promise;
/** Load a model from ArrayBuffer */
loadModel(modelData: ArrayBuffer, options?: ModelLoadOptions): Promise;
/** Run inference */
run(model: LoadedModel, inputs: Tensor[]): Promise;
/** Run inference with named inputs (optional) */
runNamed?(model: LoadedModel, namedInputs: Map): Promise;
/** Dispose the runtime and free resources */
dispose(): void;
}
/**
* Model format types
*/
export type ModelFormat = 'onnx' | 'edgeflow' | 'safetensors';
/**
* Model quantization types
*/
export type QuantizationType = 'float32' | 'float16' | 'int8' | 'uint8' | 'int4';
/**
* Model metadata
*/
export interface ModelMetadata {
/** Model name/identifier */
name: string;
/** Model version */
version?: string;
/** Model description */
description?: string;
/** Model author */
author?: string;
/** Model license */
license?: string;
/** Model tags */
tags?: string[];
/** Input specifications */
inputs: ModelIOSpec[];
/** Output specifications */
outputs: ModelIOSpec[];
/** Model size in bytes */
sizeBytes: number;
/** Quantization type */
quantization: QuantizationType;
/** Model format */
format: ModelFormat;
}
/**
* Model input/output specification
*/
export interface ModelIOSpec {
/** Name of the input/output */
name: string;
/** Data type */
dtype: DataType;
/** Shape (use -1 for dynamic dimensions) */
shape: number[];
/** Optional description */
description?: string;
}
/**
* Options for loading a model
*/
export interface ModelLoadOptions {
/** Target quantization (convert during load) */
quantization?: QuantizationType;
/** Custom metadata */
metadata?: Partial;
/** Enable caching */
cache?: boolean;
/** Progress callback */
onProgress?: (progress: number) => void;
}
/**
* Loaded model instance
*/
export interface LoadedModel {
/** Unique model instance ID */
readonly id: string;
/** Model metadata */
readonly metadata: ModelMetadata;
/** Check if model is loaded */
readonly isLoaded: boolean;
/** Runtime this model is loaded on */
readonly runtime: RuntimeType;
/** Dispose the model and free resources */
dispose(): void;
}
/**
* Task priority levels
*/
export type TaskPriority = 'low' | 'normal' | 'high' | 'critical';
/**
* Task status
*/
export type TaskStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled';
/**
* Inference task definition
*/
export interface InferenceTask {
/** Unique task ID */
readonly id: string;
/** Model ID this task is for */
readonly modelId: string;
/** Task priority */
readonly priority: TaskPriority;
/** Task status */
readonly status: TaskStatus;
/** Creation timestamp */
readonly createdAt: number;
/** Start timestamp (when running) */
readonly startedAt?: number;
/** Completion timestamp */
readonly completedAt?: number;
/** Task result (when completed) */
readonly result?: T;
/** Task error (when failed) */
readonly error?: Error;
/** Cancel the task */
cancel(): void;
/** Wait for task completion */
wait(): Promise;
}
/**
* Scheduler options
*/
export interface SchedulerOptions {
/** Maximum concurrent tasks across all models */
maxConcurrentTasks?: number;
/** Maximum concurrent tasks per model */
maxConcurrentPerModel?: number;
/** Default task timeout in milliseconds */
defaultTimeout?: number;
/** Enable task batching */
enableBatching?: boolean;
/** Maximum batch size */
maxBatchSize?: number;
/** Batch timeout in milliseconds */
batchTimeout?: number;
/** Maximum retry attempts for failed tasks (default: 0 = no retry) */
maxRetries?: number;
/** Base delay between retries in ms (exponential backoff) */
retryBaseDelay?: number;
/** Enable circuit breaker per model (default: false) */
circuitBreaker?: boolean;
/** Consecutive failures before the circuit opens (default: 5) */
circuitBreakerThreshold?: number;
/** Time in ms before the circuit half-opens to test (default: 30000) */
circuitBreakerResetTimeout?: number;
}
/**
* Memory statistics
*/
export interface MemoryStats {
/** Total allocated memory in bytes */
allocated: number;
/** Currently used memory in bytes */
used: number;
/** Peak memory usage in bytes */
peak: number;
/** Number of active tensors */
tensorCount: number;
/** Number of loaded models */
modelCount: number;
}
/**
* Memory pool configuration
*/
export interface MemoryPoolConfig {
/** Initial pool size in bytes */
initialSize?: number;
/** Maximum pool size in bytes */
maxSize?: number;
/** Growth factor when expanding */
growthFactor?: number;
/** Enable automatic garbage collection */
autoGC?: boolean;
/** GC threshold (percentage of max size) */
gcThreshold?: number;
}
/**
* Supported pipeline tasks
*/
export type PipelineTask = 'text-classification' | 'token-classification' | 'question-answering' | 'fill-mask' | 'text-generation' | 'text2text-generation' | 'summarization' | 'translation' | 'feature-extraction' | 'sentiment-analysis' | 'zero-shot-classification' | 'image-classification' | 'object-detection' | 'image-segmentation' | 'depth-estimation' | 'image-to-text' | 'audio-classification' | 'automatic-speech-recognition' | 'text-to-speech';
/**
* Pipeline configuration
*/
export interface PipelineConfig {
/** Task type */
task: PipelineTask;
/** Model ID or path */
model: string;
/** Runtime to use */
runtime?: RuntimeType;
/** Enable caching */
cache?: boolean;
/** Quantization type */
quantization?: QuantizationType;
/** Device to use */
device?: 'cpu' | 'gpu';
/** Custom tokenizer config */
tokenizer?: TokenizerConfig;
}
/**
* Pipeline options passed during inference
*/
export interface PipelineOptions {
/** Batch size */
batchSize?: number;
/** Top K results */
topK?: number;
/** Temperature for generation */
temperature?: number;
/** Maximum length for generation */
maxLength?: number;
/** Task timeout in milliseconds */
timeout?: number;
}
/**
* Tokenizer configuration
*/
export interface TokenizerConfig {
/** Vocabulary size */
vocabSize: number;
/** Maximum sequence length */
maxLength: number;
/** Padding token ID */
padTokenId: number;
/** Unknown token ID */
unkTokenId: number;
/** Start of sequence token ID */
bosTokenId?: number;
/** End of sequence token ID */
eosTokenId?: number;
/** Separator token ID */
sepTokenId?: number;
/** CLS token ID */
clsTokenId?: number;
/** Mask token ID */
maskTokenId?: number;
}
/**
* Tokenized output
*/
export interface TokenizedOutput {
/** Input IDs */
inputIds: number[];
/** Attention mask */
attentionMask: number[];
/** Token type IDs (for segment embeddings) */
tokenTypeIds?: number[];
/** Special tokens mask */
specialTokensMask?: number[];
/** Offset mapping (for token-level tasks) */
offsetMapping?: [number, number][];
}
/**
* Base error class for edgeFlow errors
*/
export declare class EdgeFlowError extends Error {
readonly code: string;
readonly details?: Record | undefined;
constructor(message: string, code: string, details?: Record | undefined);
}
/**
* Error codes
*/
export declare const ErrorCodes: {
readonly RUNTIME_NOT_AVAILABLE: "RUNTIME_NOT_AVAILABLE";
readonly RUNTIME_INIT_FAILED: "RUNTIME_INIT_FAILED";
readonly RUNTIME_NOT_INITIALIZED: "RUNTIME_NOT_INITIALIZED";
readonly MODEL_NOT_FOUND: "MODEL_NOT_FOUND";
readonly MODEL_LOAD_FAILED: "MODEL_LOAD_FAILED";
readonly MODEL_INVALID_FORMAT: "MODEL_INVALID_FORMAT";
readonly MODEL_NOT_LOADED: "MODEL_NOT_LOADED";
readonly INFERENCE_FAILED: "INFERENCE_FAILED";
readonly INFERENCE_TIMEOUT: "INFERENCE_TIMEOUT";
readonly INFERENCE_CANCELLED: "INFERENCE_CANCELLED";
readonly OUT_OF_MEMORY: "OUT_OF_MEMORY";
readonly MEMORY_LEAK_DETECTED: "MEMORY_LEAK_DETECTED";
readonly TENSOR_SHAPE_MISMATCH: "TENSOR_SHAPE_MISMATCH";
readonly TENSOR_DTYPE_MISMATCH: "TENSOR_DTYPE_MISMATCH";
readonly TENSOR_DISPOSED: "TENSOR_DISPOSED";
readonly PIPELINE_NOT_SUPPORTED: "PIPELINE_NOT_SUPPORTED";
readonly PIPELINE_INPUT_INVALID: "PIPELINE_INPUT_INVALID";
readonly INVALID_ARGUMENT: "INVALID_ARGUMENT";
readonly NOT_IMPLEMENTED: "NOT_IMPLEMENTED";
readonly UNKNOWN_ERROR: "UNKNOWN_ERROR";
};
export type ErrorCode = typeof ErrorCodes[keyof typeof ErrorCodes];
/**
* Event types emitted by edgeFlow
*/
export type EventType = 'model:loading' | 'model:loaded' | 'model:unloaded' | 'inference:start' | 'inference:complete' | 'inference:error' | 'memory:warning' | 'memory:gc' | 'runtime:ready' | 'runtime:error';
/**
* Event payload interface
*/
export interface EdgeFlowEvent {
type: EventType;
timestamp: number;
data: T;
}
/**
* Event listener function type
*/
export type EventListener = (event: EdgeFlowEvent) => void;
//# sourceMappingURL=types.d.ts.map
================================================
FILE: dist/core/types.js
================================================
/**
* edgeFlow.js - Core Type Definitions
*
* This file contains all the core types used throughout the framework.
*/
// ============================================================================
// Error Types
// ============================================================================
/**
* Base error class for edgeFlow errors
*/
export class EdgeFlowError extends Error {
code;
details;
constructor(message, code, details) {
super(message);
this.code = code;
this.details = details;
this.name = 'EdgeFlowError';
}
}
/**
* Error codes
*/
export const ErrorCodes = {
// Runtime errors
RUNTIME_NOT_AVAILABLE: 'RUNTIME_NOT_AVAILABLE',
RUNTIME_INIT_FAILED: 'RUNTIME_INIT_FAILED',
RUNTIME_NOT_INITIALIZED: 'RUNTIME_NOT_INITIALIZED',
// Model errors
MODEL_NOT_FOUND: 'MODEL_NOT_FOUND',
MODEL_LOAD_FAILED: 'MODEL_LOAD_FAILED',
MODEL_INVALID_FORMAT: 'MODEL_INVALID_FORMAT',
MODEL_NOT_LOADED: 'MODEL_NOT_LOADED',
// Inference errors
INFERENCE_FAILED: 'INFERENCE_FAILED',
INFERENCE_TIMEOUT: 'INFERENCE_TIMEOUT',
INFERENCE_CANCELLED: 'INFERENCE_CANCELLED',
// Memory errors
OUT_OF_MEMORY: 'OUT_OF_MEMORY',
MEMORY_LEAK_DETECTED: 'MEMORY_LEAK_DETECTED',
// Tensor errors
TENSOR_SHAPE_MISMATCH: 'TENSOR_SHAPE_MISMATCH',
TENSOR_DTYPE_MISMATCH: 'TENSOR_DTYPE_MISMATCH',
TENSOR_DISPOSED: 'TENSOR_DISPOSED',
// Pipeline errors
PIPELINE_NOT_SUPPORTED: 'PIPELINE_NOT_SUPPORTED',
PIPELINE_INPUT_INVALID: 'PIPELINE_INPUT_INVALID',
// General errors
INVALID_ARGUMENT: 'INVALID_ARGUMENT',
NOT_IMPLEMENTED: 'NOT_IMPLEMENTED',
UNKNOWN_ERROR: 'UNKNOWN_ERROR',
};
//# sourceMappingURL=types.js.map
================================================
FILE: dist/core/worker.d.ts
================================================
/**
* edgeFlow.js - Web Worker Support
*
* Run inference in a Web Worker to avoid blocking the main thread.
*/
import type { Tensor, RuntimeType } from './types.js';
/**
* Worker message types
*/
export type WorkerMessageType = 'init' | 'load_model' | 'run_inference' | 'dispose' | 'ready' | 'result' | 'error' | 'progress';
/**
* Worker message structure
*/
export interface WorkerMessage {
id: string;
type: WorkerMessageType;
payload?: unknown;
}
/**
* Worker request for loading a model
*/
export interface LoadModelRequest {
url: string;
options?: {
runtime?: RuntimeType;
cache?: boolean;
};
}
/**
* Worker request for running inference
*/
export interface InferenceRequest {
modelId: string;
inputs: SerializedTensor[];
}
/**
* Serialized tensor for transfer
*/
export interface SerializedTensor {
data: ArrayBuffer;
shape: number[];
dtype: string;
}
/**
* Worker pool options
*/
export interface WorkerPoolOptions {
/** Number of workers (default: navigator.hardwareConcurrency or 4) */
numWorkers?: number;
/** Worker script URL (default: auto-detect) */
workerUrl?: string;
}
/**
* Serialize a tensor for transfer to worker
*/
export declare function serializeTensor(tensor: Tensor): SerializedTensor;
/**
* Deserialize a tensor from worker.
* Uses a lazy import to avoid circular dependency issues.
*/
export declare function deserializeTensor(serialized: SerializedTensor): Promise;
/**
* Synchronous deserialisation used internally where async is not feasible.
* Requires EdgeFlowTensor to be passed in to avoid require().
*/
export declare function deserializeTensorSync(serialized: SerializedTensor, TensorClass: new (data: Float32Array, shape: number[], dtype: string) => Tensor): Tensor;
export type WorkerHealthState = 'alive' | 'dead' | 'restarting';
/**
* InferenceWorker - Wrapper for a single Web Worker with auto-restart
*/
export declare class InferenceWorker {
private worker;
private pendingRequests;
private isReady;
private readyPromise;
private readyResolve;
private workerUrl;
private _health;
private restartAttempts;
constructor(workerUrl?: string);
get health(): WorkerHealthState;
/**
* Initialize the worker
*/
private initWorker;
/**
* Handle worker crash: reject pending, mark dead, attempt restart
*/
private handleCrash;
/**
* Restart the worker with exponential backoff
*/
private attemptRestart;
/**
* Restart: terminate old, create new
*/
restart(): void;
/**
* Create worker code as blob URL
*/
private createWorkerBlob;
/**
* Handle worker message
*/
private handleMessage;
/**
* Send a request to the worker
*/
private sendRequest;
/**
* Initialize the worker
*/
init(): Promise;
/**
* Load a model
*/
loadModel(url: string, options?: {
runtime?: RuntimeType;
cache?: boolean;
}): Promise;
/**
* Run inference
*/
runInference(modelId: string, inputs: Tensor[]): Promise;
/**
* Dispose a model
*/
dispose(modelId: string): Promise;
/**
* Terminate the worker
*/
terminate(): void;
}
/**
* WorkerPool - Manage multiple workers for parallel inference.
* Automatically falls back to healthy workers when one is dead.
*/
export declare class WorkerPool {
private workers;
private currentIndex;
private modelAssignments;
private poolOptions;
constructor(options?: WorkerPoolOptions);
/**
* Get next healthy worker (round-robin, skipping dead ones)
*/
private getNextHealthyWorker;
/**
* Get worker for a specific model, falling back to any healthy worker
*/
private getWorkerForModel;
/**
* Replace a worker at a given index with a fresh one
*/
replaceWorker(index: number): void;
/**
* Initialize all workers
*/
init(): Promise;
/**
* Load a model on a worker
*/
loadModel(url: string, options?: {
runtime?: RuntimeType;
cache?: boolean;
}): Promise;
/**
* Run inference (auto-retries on a healthy worker if assigned one is dead)
*/
runInference(modelId: string, inputs: Tensor[]): Promise;
/**
* Run inference on multiple inputs in parallel
*/
runBatch(modelId: string, batchInputs: Tensor[][]): Promise;
/**
* Dispose a model
*/
dispose(modelId: string): Promise;
/**
* Terminate all workers
*/
terminate(): void;
/**
* Get number of workers
*/
get size(): number;
}
/**
* Get or create global worker pool
*/
export declare function getWorkerPool(options?: WorkerPoolOptions): WorkerPool;
/**
* Run inference in a worker
*/
export declare function runInWorker(modelUrl: string, inputs: Tensor[], options?: {
cache?: boolean;
}): Promise;
/**
* Check if Web Workers are supported
*/
export declare function isWorkerSupported(): boolean;
//# sourceMappingURL=worker.d.ts.map
================================================
FILE: dist/core/worker.js
================================================
/**
* edgeFlow.js - Web Worker Support
*
* Run inference in a Web Worker to avoid blocking the main thread.
*/
// ============================================================================
// Tensor Serialization
// ============================================================================
/**
* Serialize a tensor for transfer to worker
*/
export function serializeTensor(tensor) {
const data = tensor.toFloat32Array();
// Create a copy of the ArrayBuffer
const buffer = new ArrayBuffer(data.byteLength);
new Float32Array(buffer).set(data);
return {
data: buffer,
shape: [...tensor.shape],
dtype: tensor.dtype,
};
}
/**
* Deserialize a tensor from worker.
* Uses a lazy import to avoid circular dependency issues.
*/
export async function deserializeTensor(serialized) {
const { EdgeFlowTensor } = await import('./tensor.js');
const data = new Float32Array(serialized.data);
return new EdgeFlowTensor(data, serialized.shape, serialized.dtype);
}
/**
* Synchronous deserialisation used internally where async is not feasible.
* Requires EdgeFlowTensor to be passed in to avoid require().
*/
export function deserializeTensorSync(serialized, TensorClass) {
const data = new Float32Array(serialized.data);
return new TensorClass(data, serialized.shape, serialized.dtype);
}
const MAX_RESTART_ATTEMPTS = 3;
const RESTART_BASE_DELAY_MS = 1000;
/**
* InferenceWorker - Wrapper for a single Web Worker with auto-restart
*/
export class InferenceWorker {
worker = null;
pendingRequests = new Map();
isReady = false;
readyPromise;
readyResolve;
workerUrl;
_health = 'alive';
restartAttempts = 0;
constructor(workerUrl) {
this.workerUrl = workerUrl;
this.readyPromise = new Promise(resolve => {
this.readyResolve = resolve;
});
this.initWorker(workerUrl);
}
get health() {
return this._health;
}
/**
* Initialize the worker
*/
initWorker(workerUrl) {
const url = workerUrl ?? this.createWorkerBlob();
this.worker = new Worker(url, { type: 'module' });
this.worker.onmessage = (event) => {
this.handleMessage(event.data);
};
this.worker.onerror = (error) => {
console.error('Worker error:', error);
this.handleCrash();
};
this.worker.onmessageerror = () => {
this.handleCrash();
};
}
/**
* Handle worker crash: reject pending, mark dead, attempt restart
*/
handleCrash() {
this._health = 'dead';
this.isReady = false;
const crashError = new Error('Worker crashed');
for (const [, { reject }] of this.pendingRequests) {
reject(crashError);
}
this.pendingRequests.clear();
this.attemptRestart();
}
/**
* Restart the worker with exponential backoff
*/
attemptRestart() {
if (this.restartAttempts >= MAX_RESTART_ATTEMPTS) {
console.error(`Worker failed to restart after ${MAX_RESTART_ATTEMPTS} attempts`);
return;
}
this._health = 'restarting';
const delay = RESTART_BASE_DELAY_MS * Math.pow(2, this.restartAttempts);
this.restartAttempts++;
setTimeout(() => {
this.restart();
}, delay);
}
/**
* Restart: terminate old, create new
*/
restart() {
if (this.worker) {
try {
this.worker.terminate();
}
catch { /* already dead */ }
this.worker = null;
}
this.readyPromise = new Promise(resolve => {
this.readyResolve = resolve;
});
this.isReady = false;
try {
this.initWorker(this.workerUrl);
this._health = 'alive';
this.restartAttempts = 0;
}
catch {
this._health = 'dead';
this.attemptRestart();
}
}
/**
* Create worker code as blob URL
*/
createWorkerBlob() {
const workerCode = `
// edgeFlow.js Worker
let models = new Map();
let ort = null;
// Load ONNX Runtime
async function loadOrt() {
if (ort) return ort;
ort = await import('https://cdn.jsdelivr.net/npm/onnxruntime-web@1.17.0/dist/esm/ort.min.js');
return ort;
}
// Handle messages
self.onmessage = async (event) => {
const { id, type, payload } = event.data;
try {
switch (type) {
case 'init': {
await loadOrt();
self.postMessage({ id, type: 'ready' });
break;
}
case 'load_model': {
await loadOrt();
const { url, options } = payload;
const response = await fetch(url);
const arrayBuffer = await response.arrayBuffer();
const session = await ort.InferenceSession.create(
new Uint8Array(arrayBuffer),
{ executionProviders: ['wasm'] }
);
const modelId = 'model_' + Date.now();
models.set(modelId, session);
self.postMessage({
id,
type: 'result',
payload: { modelId }
});
break;
}
case 'run_inference': {
const { modelId, inputs } = payload;
const session = models.get(modelId);
if (!session) {
throw new Error('Model not found: ' + modelId);
}
// Prepare inputs
const feeds = {};
const inputNames = session.inputNames;
for (let i = 0; i < inputs.length && i < inputNames.length; i++) {
const input = inputs[i];
const data = new Float32Array(input.data);
feeds[inputNames[i]] = new ort.Tensor(input.dtype, data, input.shape);
}
// Run inference
const results = await session.run(feeds);
// Serialize outputs
const outputs = [];
for (const name of session.outputNames) {
const tensor = results[name];
outputs.push({
data: tensor.data.buffer.slice(0),
shape: tensor.dims,
dtype: tensor.type
});
}
self.postMessage(
{ id, type: 'result', payload: { outputs } },
outputs.map(o => o.data)
);
break;
}
case 'dispose': {
const { modelId } = payload;
const session = models.get(modelId);
if (session) {
// session.release(); // Not available in all versions
models.delete(modelId);
}
self.postMessage({ id, type: 'result', payload: { success: true } });
break;
}
}
} catch (error) {
self.postMessage({
id,
type: 'error',
payload: { message: error.message }
});
}
};
`;
const blob = new Blob([workerCode], { type: 'application/javascript' });
return URL.createObjectURL(blob);
}
/**
* Handle worker message
*/
handleMessage(message) {
if (message.type === 'ready') {
this.isReady = true;
this.readyResolve();
return;
}
const request = this.pendingRequests.get(message.id);
if (!request)
return;
this.pendingRequests.delete(message.id);
if (message.type === 'error') {
const payload = message.payload;
request.reject(new Error(payload.message));
}
else {
request.resolve(message.payload);
}
}
/**
* Send a request to the worker
*/
async sendRequest(type, payload) {
if (!this.worker) {
throw new Error('Worker not initialized');
}
const id = `${Date.now()}-${Math.random().toString(36).slice(2)}`;
return new Promise((resolve, reject) => {
this.pendingRequests.set(id, { resolve: resolve, reject });
const message = { id, type, payload };
// Transfer ArrayBuffers for efficiency
const transfers = [];
if (payload && typeof payload === 'object' && 'inputs' in payload) {
const inputs = payload.inputs;
for (const input of inputs) {
if (input.data instanceof ArrayBuffer) {
transfers.push(input.data);
}
}
}
this.worker.postMessage(message, transfers);
});
}
/**
* Initialize the worker
*/
async init() {
if (this.isReady)
return;
await this.sendRequest('init');
await this.readyPromise;
}
/**
* Load a model
*/
async loadModel(url, options) {
await this.init();
const result = await this.sendRequest('load_model', { url, options });
return result.modelId;
}
/**
* Run inference
*/
async runInference(modelId, inputs) {
const serializedInputs = inputs.map(serializeTensor);
const result = await this.sendRequest('run_inference', { modelId, inputs: serializedInputs });
return Promise.all(result.outputs.map(deserializeTensor));
}
/**
* Dispose a model
*/
async dispose(modelId) {
await this.sendRequest('dispose', { modelId });
}
/**
* Terminate the worker
*/
terminate() {
if (this.worker) {
this.worker.terminate();
this.worker = null;
}
this.pendingRequests.clear();
}
}
// ============================================================================
// Worker Pool
// ============================================================================
/**
* WorkerPool - Manage multiple workers for parallel inference.
* Automatically falls back to healthy workers when one is dead.
*/
export class WorkerPool {
workers = [];
currentIndex = 0;
modelAssignments = new Map();
poolOptions;
constructor(options = {}) {
this.poolOptions = options;
const numWorkers = options.numWorkers ??
(typeof navigator !== 'undefined' ? navigator.hardwareConcurrency : 4) ?? 4;
for (let i = 0; i < numWorkers; i++) {
this.workers.push(new InferenceWorker(options.workerUrl));
}
}
/**
* Get next healthy worker (round-robin, skipping dead ones)
*/
getNextHealthyWorker() {
const len = this.workers.length;
for (let attempt = 0; attempt < len; attempt++) {
const worker = this.workers[this.currentIndex];
this.currentIndex = (this.currentIndex + 1) % len;
if (worker.health === 'alive')
return worker;
}
// All dead — try restarting first one and return it
const worker = this.workers[0];
if (worker.health === 'dead')
worker.restart();
return worker;
}
/**
* Get worker for a specific model, falling back to any healthy worker
*/
getWorkerForModel(modelId) {
const index = this.modelAssignments.get(modelId);
if (index !== undefined) {
const worker = this.workers[index];
if (worker.health === 'alive')
return worker;
// Assigned worker is dead — pick a healthy one and reassign
const replacement = this.getNextHealthyWorker();
this.modelAssignments.set(modelId, this.workers.indexOf(replacement));
return replacement;
}
return this.getNextHealthyWorker();
}
/**
* Replace a worker at a given index with a fresh one
*/
replaceWorker(index) {
if (index < 0 || index >= this.workers.length)
return;
const old = this.workers[index];
old.terminate();
this.workers[index] = new InferenceWorker(this.poolOptions.workerUrl);
}
/**
* Initialize all workers
*/
async init() {
await Promise.all(this.workers.map(w => w.init()));
}
/**
* Load a model on a worker
*/
async loadModel(url, options) {
const worker = this.getNextHealthyWorker();
const modelId = await worker.loadModel(url, options);
this.modelAssignments.set(modelId, this.workers.indexOf(worker));
return modelId;
}
/**
* Run inference (auto-retries on a healthy worker if assigned one is dead)
*/
async runInference(modelId, inputs) {
const worker = this.getWorkerForModel(modelId);
return worker.runInference(modelId, inputs);
}
/**
* Run inference on multiple inputs in parallel
*/
async runBatch(modelId, batchInputs) {
const results = await Promise.all(batchInputs.map((inputs, i) => {
const worker = this.workers[i % this.workers.length];
return worker.runInference(modelId, inputs);
}));
return results;
}
/**
* Dispose a model
*/
async dispose(modelId) {
const worker = this.getWorkerForModel(modelId);
await worker.dispose(modelId);
this.modelAssignments.delete(modelId);
}
/**
* Terminate all workers
*/
terminate() {
for (const worker of this.workers) {
worker.terminate();
}
this.workers = [];
this.modelAssignments.clear();
}
/**
* Get number of workers
*/
get size() {
return this.workers.length;
}
}
// ============================================================================
// Global Instance
// ============================================================================
let globalWorkerPool = null;
/**
* Get or create global worker pool
*/
export function getWorkerPool(options) {
if (!globalWorkerPool) {
globalWorkerPool = new WorkerPool(options);
}
return globalWorkerPool;
}
/**
* Run inference in a worker
*/
export async function runInWorker(modelUrl, inputs, options) {
const pool = getWorkerPool();
await pool.init();
const modelId = await pool.loadModel(modelUrl, options);
const outputs = await pool.runInference(modelId, inputs);
return outputs;
}
/**
* Check if Web Workers are supported
*/
export function isWorkerSupported() {
return typeof Worker !== 'undefined';
}
//# sourceMappingURL=worker.js.map
================================================
FILE: dist/edgeflow.browser.js
================================================
/* edgeFlow.js - Browser Bundle */
var __defProp = Object.defineProperty;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __defNormalProp = (obj, key, value) => key in obj ? __defProp(obj, key, { enumerable: true, configurable: true, writable: true, value }) : obj[key] = value;
var __esm = (fn, res) => function __init() {
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
};
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __publicField = (obj, key, value) => {
__defNormalProp(obj, typeof key !== "symbol" ? key + "" : key, value);
return value;
};
// dist/core/types.js
var EdgeFlowError, ErrorCodes;
var init_types = __esm({
"dist/core/types.js"() {
"use strict";
EdgeFlowError = class extends Error {
constructor(message, code, details) {
super(message);
__publicField(this, "code");
__publicField(this, "details");
this.code = code;
this.details = details;
this.name = "EdgeFlowError";
}
};
ErrorCodes = {
// Runtime errors
RUNTIME_NOT_AVAILABLE: "RUNTIME_NOT_AVAILABLE",
RUNTIME_INIT_FAILED: "RUNTIME_INIT_FAILED",
RUNTIME_NOT_INITIALIZED: "RUNTIME_NOT_INITIALIZED",
// Model errors
MODEL_NOT_FOUND: "MODEL_NOT_FOUND",
MODEL_LOAD_FAILED: "MODEL_LOAD_FAILED",
MODEL_INVALID_FORMAT: "MODEL_INVALID_FORMAT",
MODEL_NOT_LOADED: "MODEL_NOT_LOADED",
// Inference errors
INFERENCE_FAILED: "INFERENCE_FAILED",
INFERENCE_TIMEOUT: "INFERENCE_TIMEOUT",
INFERENCE_CANCELLED: "INFERENCE_CANCELLED",
// Memory errors
OUT_OF_MEMORY: "OUT_OF_MEMORY",
MEMORY_LEAK_DETECTED: "MEMORY_LEAK_DETECTED",
// Tensor errors
TENSOR_SHAPE_MISMATCH: "TENSOR_SHAPE_MISMATCH",
TENSOR_DTYPE_MISMATCH: "TENSOR_DTYPE_MISMATCH",
TENSOR_DISPOSED: "TENSOR_DISPOSED",
// Pipeline errors
PIPELINE_NOT_SUPPORTED: "PIPELINE_NOT_SUPPORTED",
PIPELINE_INPUT_INVALID: "PIPELINE_INPUT_INVALID",
// General errors
INVALID_ARGUMENT: "INVALID_ARGUMENT",
NOT_IMPLEMENTED: "NOT_IMPLEMENTED",
UNKNOWN_ERROR: "UNKNOWN_ERROR"
};
}
});
// dist/core/tensor.js
function generateTensorId() {
return `tensor_${++tensorIdCounter}_${Date.now().toString(36)}`;
}
function getTypedArrayConstructor(dtype) {
switch (dtype) {
case "float32":
return Float32Array;
case "float16":
return Float32Array;
case "int32":
return Int32Array;
case "int64":
return BigInt64Array;
case "uint8":
case "bool":
return Uint8Array;
case "int8":
return Int8Array;
default:
throw new EdgeFlowError(`Unsupported data type: ${dtype}`, ErrorCodes.INVALID_ARGUMENT, { dtype });
}
}
function calculateSize(shape) {
if (shape.length === 0)
return 1;
return shape.reduce((acc, dim) => acc * dim, 1);
}
function validateShape(shape) {
for (let i = 0; i < shape.length; i++) {
const dim = shape[i];
if (dim === void 0 || !Number.isInteger(dim) || dim < 0) {
throw new EdgeFlowError(`Invalid shape dimension at index ${i}: ${dim}`, ErrorCodes.INVALID_ARGUMENT, { shape, index: i, dimension: dim });
}
}
}
function tensor(data, shape, dtype = "float32") {
if (Array.isArray(data) && data.length > 0 && Array.isArray(data[0])) {
const rows = data.length;
const cols = data[0].length;
const flatData = [];
for (const row of data) {
if (row.length !== cols) {
throw new EdgeFlowError("Nested arrays must have consistent dimensions", ErrorCodes.INVALID_ARGUMENT);
}
flatData.push(...row);
}
return new EdgeFlowTensor(flatData, shape ?? [rows, cols], dtype);
}
const inferredShape = shape ?? [data.length];
return new EdgeFlowTensor(data, inferredShape, dtype);
}
function zeros(shape, dtype = "float32") {
const size = calculateSize(shape);
const TypedArrayCtor = getTypedArrayConstructor(dtype);
const data = new TypedArrayCtor(size);
return new EdgeFlowTensor(data, shape, dtype);
}
function ones(shape, dtype = "float32") {
const size = calculateSize(shape);
const TypedArrayCtor = getTypedArrayConstructor(dtype);
const data = new TypedArrayCtor(size);
data.fill(1);
return new EdgeFlowTensor(data, shape, dtype);
}
function full(shape, value, dtype = "float32") {
const size = calculateSize(shape);
const TypedArrayCtor = getTypedArrayConstructor(dtype);
const data = new TypedArrayCtor(size);
data.fill(value);
return new EdgeFlowTensor(data, shape, dtype);
}
function random(shape, dtype = "float32") {
const size = calculateSize(shape);
const data = new Float32Array(size);
for (let i = 0; i < size; i++) {
data[i] = Math.random();
}
return new EdgeFlowTensor(data, shape, dtype);
}
function randn(shape, dtype = "float32") {
const size = calculateSize(shape);
const data = new Float32Array(size);
for (let i = 0; i < size; i += 2) {
const u1 = Math.random();
const u2 = Math.random();
const r = Math.sqrt(-2 * Math.log(u1));
const theta = 2 * Math.PI * u2;
data[i] = r * Math.cos(theta);
if (i + 1 < size) {
data[i + 1] = r * Math.sin(theta);
}
}
return new EdgeFlowTensor(data, shape, dtype);
}
function arange(start, stop, step = 1, dtype = "float32") {
if (stop === void 0) {
stop = start;
start = 0;
}
const size = Math.ceil((stop - start) / step);
const data = new Float32Array(size);
for (let i = 0; i < size; i++) {
data[i] = start + i * step;
}
return new EdgeFlowTensor(data, [size], dtype);
}
function linspace(start, stop, num = 50, dtype = "float32") {
const data = new Float32Array(num);
const step = (stop - start) / (num - 1);
for (let i = 0; i < num; i++) {
data[i] = start + i * step;
}
return new EdgeFlowTensor(data, [num], dtype);
}
function eye(n, dtype = "float32") {
const data = new Float32Array(n * n);
for (let i = 0; i < n; i++) {
data[i * n + i] = 1;
}
return new EdgeFlowTensor(data, [n, n], dtype);
}
function add(a, b) {
if (typeof b === "number") {
const result2 = new Float32Array(a.size);
const aData2 = a.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result2[i] = (aData2[i] ?? 0) + b;
}
return new EdgeFlowTensor(result2, a.shape, a.dtype);
}
if (a.size !== b.size) {
throw new EdgeFlowError("Tensor sizes must match for element-wise operations", ErrorCodes.TENSOR_SHAPE_MISMATCH, { aShape: a.shape, bShape: b.shape });
}
const result = new Float32Array(a.size);
const aData = a.toFloat32Array();
const bData = b.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result[i] = (aData[i] ?? 0) + (bData[i] ?? 0);
}
return new EdgeFlowTensor(result, a.shape, a.dtype);
}
function sub(a, b) {
if (typeof b === "number") {
const result2 = new Float32Array(a.size);
const aData2 = a.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result2[i] = (aData2[i] ?? 0) - b;
}
return new EdgeFlowTensor(result2, a.shape, a.dtype);
}
if (a.size !== b.size) {
throw new EdgeFlowError("Tensor sizes must match for element-wise operations", ErrorCodes.TENSOR_SHAPE_MISMATCH, { aShape: a.shape, bShape: b.shape });
}
const result = new Float32Array(a.size);
const aData = a.toFloat32Array();
const bData = b.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result[i] = (aData[i] ?? 0) - (bData[i] ?? 0);
}
return new EdgeFlowTensor(result, a.shape, a.dtype);
}
function mul(a, b) {
if (typeof b === "number") {
const result2 = new Float32Array(a.size);
const aData2 = a.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result2[i] = (aData2[i] ?? 0) * b;
}
return new EdgeFlowTensor(result2, a.shape, a.dtype);
}
if (a.size !== b.size) {
throw new EdgeFlowError("Tensor sizes must match for element-wise operations", ErrorCodes.TENSOR_SHAPE_MISMATCH, { aShape: a.shape, bShape: b.shape });
}
const result = new Float32Array(a.size);
const aData = a.toFloat32Array();
const bData = b.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result[i] = (aData[i] ?? 0) * (bData[i] ?? 0);
}
return new EdgeFlowTensor(result, a.shape, a.dtype);
}
function div(a, b) {
if (typeof b === "number") {
const result2 = new Float32Array(a.size);
const aData2 = a.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result2[i] = (aData2[i] ?? 0) / b;
}
return new EdgeFlowTensor(result2, a.shape, a.dtype);
}
if (a.size !== b.size) {
throw new EdgeFlowError("Tensor sizes must match for element-wise operations", ErrorCodes.TENSOR_SHAPE_MISMATCH, { aShape: a.shape, bShape: b.shape });
}
const result = new Float32Array(a.size);
const aData = a.toFloat32Array();
const bData = b.toFloat32Array();
for (let i = 0; i < a.size; i++) {
result[i] = (aData[i] ?? 0) / (bData[i] ?? 0);
}
return new EdgeFlowTensor(result, a.shape, a.dtype);
}
function matmul(a, b) {
if (a.shape.length !== 2 || b.shape.length !== 2) {
throw new EdgeFlowError("matmul requires 2D tensors", ErrorCodes.INVALID_ARGUMENT, { aShape: a.shape, bShape: b.shape });
}
const [m, k1] = a.shape;
const [k2, n] = b.shape;
if (k1 !== k2) {
throw new EdgeFlowError(`Matrix dimensions incompatible for multiplication: (${m}x${k1}) @ (${k2}x${n})`, ErrorCodes.TENSOR_SHAPE_MISMATCH, { aShape: a.shape, bShape: b.shape });
}
const result = new Float32Array(m * n);
const aData = a.toFloat32Array();
const bData = b.toFloat32Array();
for (let i = 0; i < m; i++) {
for (let j = 0; j < n; j++) {
let sum2 = 0;
for (let k = 0; k < k1; k++) {
sum2 += (aData[i * k1 + k] ?? 0) * (bData[k * n + j] ?? 0);
}
result[i * n + j] = sum2;
}
}
return new EdgeFlowTensor(result, [m, n], a.dtype);
}
function softmax(t, axis = -1) {
const data = t.toFloat32Array();
const result = new Float32Array(t.size);
const actualAxis = axis < 0 ? t.shape.length + axis : axis;
if (actualAxis < 0 || actualAxis >= t.shape.length) {
throw new EdgeFlowError(`Invalid axis ${axis} for tensor with ${t.shape.length} dimensions`, ErrorCodes.INVALID_ARGUMENT, { axis, shape: t.shape });
}
if (t.shape.length === 1) {
let max = -Infinity;
for (let i = 0; i < t.size; i++) {
if ((data[i] ?? 0) > max)
max = data[i] ?? 0;
}
let sum2 = 0;
for (let i = 0; i < t.size; i++) {
result[i] = Math.exp((data[i] ?? 0) - max);
sum2 += result[i] ?? 0;
}
for (let i = 0; i < t.size; i++) {
result[i] = (result[i] ?? 0) / sum2;
}
return new EdgeFlowTensor(result, t.shape, t.dtype);
}
if (t.shape.length === 2 && actualAxis === 1) {
const [rows, cols] = t.shape;
for (let i = 0; i < rows; i++) {
let max = -Infinity;
for (let j = 0; j < cols; j++) {
if ((data[i * cols + j] ?? 0) > max)
max = data[i * cols + j] ?? 0;
}
let sum2 = 0;
for (let j = 0; j < cols; j++) {
result[i * cols + j] = Math.exp((data[i * cols + j] ?? 0) - max);
sum2 += result[i * cols + j] ?? 0;
}
for (let j = 0; j < cols; j++) {
result[i * cols + j] = (result[i * cols + j] ?? 0) / sum2;
}
}
return new EdgeFlowTensor(result, t.shape, t.dtype);
}
throw new EdgeFlowError("Softmax currently only supports 1D tensors or 2D tensors along the last axis", ErrorCodes.NOT_IMPLEMENTED, { shape: t.shape, axis });
}
function relu(t) {
const data = t.toFloat32Array();
const result = new Float32Array(t.size);
for (let i = 0; i < t.size; i++) {
result[i] = Math.max(0, data[i] ?? 0);
}
return new EdgeFlowTensor(result, t.shape, t.dtype);
}
function sigmoid(t) {
const data = t.toFloat32Array();
const result = new Float32Array(t.size);
for (let i = 0; i < t.size; i++) {
result[i] = 1 / (1 + Math.exp(-(data[i] ?? 0)));
}
return new EdgeFlowTensor(result, t.shape, t.dtype);
}
function tanh(t) {
const data = t.toFloat32Array();
const result = new Float32Array(t.size);
for (let i = 0; i < t.size; i++) {
result[i] = Math.tanh(data[i] ?? 0);
}
return new EdgeFlowTensor(result, t.shape, t.dtype);
}
function sum(t, axis) {
const data = t.toFloat32Array();
if (axis === void 0) {
let total = 0;
for (let i = 0; i < t.size; i++) {
total += data[i] ?? 0;
}
return total;
}
const actualAxis = axis < 0 ? t.shape.length + axis : axis;
if (actualAxis < 0 || actualAxis >= t.shape.length) {
throw new EdgeFlowError(`Invalid axis ${axis} for tensor with ${t.shape.length} dimensions`, ErrorCodes.INVALID_ARGUMENT, { axis, shape: t.shape });
}
const newShape = [...t.shape];
newShape.splice(actualAxis, 1);
if (newShape.length === 0) {
let total = 0;
for (let i = 0; i < t.size; i++) {
total += data[i] ?? 0;
}
return total;
}
if (t.shape.length === 2) {
const [rows, cols] = t.shape;
if (actualAxis === 0) {
const result = new Float32Array(cols);
for (let j = 0; j < cols; j++) {
for (let i = 0; i < rows; i++) {
result[j] = (result[j] ?? 0) + (data[i * cols + j] ?? 0);
}
}
return new EdgeFlowTensor(result, [cols], t.dtype);
} else {
const result = new Float32Array(rows);
for (let i = 0; i < rows; i++) {
for (let j = 0; j < cols; j++) {
result[i] = (result[i] ?? 0) + (data[i * cols + j] ?? 0);
}
}
return new EdgeFlowTensor(result, [rows], t.dtype);
}
}
throw new EdgeFlowError("Sum along axis currently only supports up to 2D tensors", ErrorCodes.NOT_IMPLEMENTED, { shape: t.shape, axis });
}
function mean(t, axis) {
if (axis === void 0) {
return sum(t) / t.size;
}
const result = sum(t, axis);
if (typeof result === "number") {
return result / (t.shape[axis] ?? 1);
}
const axisSize = t.shape[axis] ?? 1;
return div(result, axisSize);
}
function argmax(t, axis) {
const data = t.toFloat32Array();
if (axis === void 0) {
let maxIdx = 0;
let maxVal = data[0] ?? -Infinity;
for (let i = 1; i < t.size; i++) {
if ((data[i] ?? -Infinity) > maxVal) {
maxVal = data[i] ?? -Infinity;
maxIdx = i;
}
}
return maxIdx;
}
const actualAxis = axis < 0 ? t.shape.length + axis : axis;
if (t.shape.length === 2 && actualAxis === 1) {
const [rows, cols] = t.shape;
const result = new Float32Array(rows);
for (let i = 0; i < rows; i++) {
let maxIdx = 0;
let maxVal = data[i * cols] ?? -Infinity;
for (let j = 1; j < cols; j++) {
if ((data[i * cols + j] ?? -Infinity) > maxVal) {
maxVal = data[i * cols + j] ?? -Infinity;
maxIdx = j;
}
}
result[i] = maxIdx;
}
return new EdgeFlowTensor(result, [rows], "int32");
}
throw new EdgeFlowError("Argmax along axis currently only supports 2D tensors along the last axis", ErrorCodes.NOT_IMPLEMENTED, { shape: t.shape, axis });
}
function concat(tensors, axis = 0) {
if (tensors.length === 0) {
throw new EdgeFlowError("Cannot concatenate empty array of tensors", ErrorCodes.INVALID_ARGUMENT);
}
if (tensors.length === 1) {
return tensors[0]?.clone() ?? zeros([0]);
}
const first = tensors[0];
if (!first) {
throw new EdgeFlowError("First tensor is undefined", ErrorCodes.INVALID_ARGUMENT);
}
const actualAxis = axis < 0 ? first.shape.length + axis : axis;
for (let i = 1; i < tensors.length; i++) {
const t = tensors[i];
if (!t)
continue;
if (t.shape.length !== first.shape.length) {
throw new EdgeFlowError("All tensors must have the same number of dimensions", ErrorCodes.TENSOR_SHAPE_MISMATCH);
}
for (let j = 0; j < first.shape.length; j++) {
if (j !== actualAxis && first.shape[j] !== t.shape[j]) {
throw new EdgeFlowError(`Shape mismatch at dimension ${j}`, ErrorCodes.TENSOR_SHAPE_MISMATCH);
}
}
}
const newShape = [...first.shape];
let totalAxisSize = 0;
for (const t of tensors) {
if (t)
totalAxisSize += t.shape[actualAxis] ?? 0;
}
newShape[actualAxis] = totalAxisSize;
if (first.shape.length === 1) {
const result = new Float32Array(totalAxisSize);
let offset = 0;
for (const t of tensors) {
if (!t)
continue;
result.set(t.toFloat32Array(), offset);
offset += t.size;
}
return new EdgeFlowTensor(result, newShape, first.dtype);
}
throw new EdgeFlowError("Concatenation currently only supports 1D tensors", ErrorCodes.NOT_IMPLEMENTED);
}
var tensorIdCounter, EdgeFlowTensor;
var init_tensor = __esm({
"dist/core/tensor.js"() {
"use strict";
init_types();
tensorIdCounter = 0;
EdgeFlowTensor = class _EdgeFlowTensor {
constructor(data, shape, dtype = "float32") {
__publicField(this, "id");
__publicField(this, "dtype");
__publicField(this, "shape");
__publicField(this, "size");
__publicField(this, "_data");
__publicField(this, "_isDisposed", false);
validateShape(shape);
this.id = generateTensorId();
this.dtype = dtype;
this.shape = Object.freeze([...shape]);
this.size = calculateSize(this.shape);
const expectedSize = this.size;
if (data.length !== expectedSize) {
throw new EdgeFlowError(`Data length (${data.length}) does not match shape ${JSON.stringify(shape)} (expected ${expectedSize})`, ErrorCodes.TENSOR_SHAPE_MISMATCH, { dataLength: data.length, expectedSize, shape });
}
if (data instanceof Array) {
const TypedArrayCtor = getTypedArrayConstructor(dtype);
this._data = new TypedArrayCtor(data.length);
if (dtype === "int64") {
const bigIntData = this._data;
for (let i = 0; i < data.length; i++) {
bigIntData[i] = BigInt(Math.round(data[i] ?? 0));
}
} else {
for (let i = 0; i < data.length; i++) {
this._data[i] = data[i] ?? 0;
}
}
} else {
this._data = data;
}
}
get data() {
this.checkDisposed();
return this._data;
}
get isDisposed() {
return this._isDisposed;
}
/**
* Check if tensor has been disposed
*/
checkDisposed() {
if (this._isDisposed) {
throw new EdgeFlowError("Cannot access disposed tensor", ErrorCodes.TENSOR_DISPOSED, { tensorId: this.id });
}
}
/**
* Convert to Float32Array
*/
toFloat32Array() {
this.checkDisposed();
if (this._data instanceof Float32Array) {
return this._data;
}
const result = new Float32Array(this.size);
for (let i = 0; i < this.size; i++) {
result[i] = Number(this._data[i] ?? 0);
}
return result;
}
/**
* Convert to regular array
*/
toArray() {
this.checkDisposed();
if (this.dtype === "int64") {
const bigIntData = this._data;
const result = [];
for (let i = 0; i < bigIntData.length; i++) {
result.push(Number(bigIntData[i]));
}
return result;
}
return Array.from(this._data);
}
/**
* Clone the tensor
*/
clone() {
this.checkDisposed();
const TypedArrayCtor = this._data.constructor;
const clonedData = new TypedArrayCtor(this._data);
return new _EdgeFlowTensor(clonedData, this.shape, this.dtype);
}
/**
* Dispose the tensor and free memory
*/
dispose() {
if (!this._isDisposed) {
this._isDisposed = true;
Object.assign(this, { _data: null });
}
}
/**
* Get value at specific indices
*/
get(...indices) {
this.checkDisposed();
if (indices.length !== this.shape.length) {
throw new EdgeFlowError(`Expected ${this.shape.length} indices, got ${indices.length}`, ErrorCodes.INVALID_ARGUMENT, { expectedIndices: this.shape.length, gotIndices: indices.length });
}
let flatIndex = 0;
let stride = 1;
for (let i = this.shape.length - 1; i >= 0; i--) {
const idx = indices[i] ?? 0;
const dim = this.shape[i] ?? 1;
if (idx < 0 || idx >= dim) {
throw new EdgeFlowError(`Index ${idx} out of bounds for dimension ${i} with size ${dim}`, ErrorCodes.INVALID_ARGUMENT, { index: idx, dimension: i, size: dim });
}
flatIndex += idx * stride;
stride *= dim;
}
return Number(this._data[flatIndex] ?? 0);
}
/**
* Set value at specific indices
*/
set(value, ...indices) {
this.checkDisposed();
if (indices.length !== this.shape.length) {
throw new EdgeFlowError(`Expected ${this.shape.length} indices, got ${indices.length}`, ErrorCodes.INVALID_ARGUMENT, { expectedIndices: this.shape.length, gotIndices: indices.length });
}
let flatIndex = 0;
let stride = 1;
for (let i = this.shape.length - 1; i >= 0; i--) {
const idx = indices[i] ?? 0;
const dim = this.shape[i] ?? 1;
if (idx < 0 || idx >= dim) {
throw new EdgeFlowError(`Index ${idx} out of bounds for dimension ${i} with size ${dim}`, ErrorCodes.INVALID_ARGUMENT, { index: idx, dimension: i, size: dim });
}
flatIndex += idx * stride;
stride *= dim;
}
this._data[flatIndex] = value;
}
/**
* Reshape the tensor (returns new tensor)
*/
reshape(newShape) {
this.checkDisposed();
const newSize = calculateSize(newShape);
if (newSize !== this.size) {
throw new EdgeFlowError(`Cannot reshape tensor of size ${this.size} to shape ${JSON.stringify(newShape)} (size ${newSize})`, ErrorCodes.TENSOR_SHAPE_MISMATCH, { currentSize: this.size, newSize, newShape });
}
const TypedArrayCtor = this._data.constructor;
const clonedData = new TypedArrayCtor(this._data);
return new _EdgeFlowTensor(clonedData, newShape, this.dtype);
}
/**
* Transpose the tensor (2D only for now)
*/
transpose() {
this.checkDisposed();
if (this.shape.length !== 2) {
throw new EdgeFlowError("Transpose is currently only supported for 2D tensors", ErrorCodes.NOT_IMPLEMENTED, { shape: this.shape });
}
const [rows, cols] = this.shape;
const result = new Float32Array(this.size);
for (let i = 0; i < rows; i++) {
for (let j = 0; j < cols; j++) {
result[j * rows + i] = Number(this._data[i * cols + j] ?? 0);
}
}
return new _EdgeFlowTensor(result, [cols, rows], this.dtype);
}
/**
* Create string representation
*/
toString() {
return `Tensor(shape=[${this.shape.join(", ")}], dtype=${this.dtype})`;
}
};
}
});
// dist/utils/model-loader.js
var model_loader_exports = {};
__export(model_loader_exports, {
cancelPreload: () => cancelPreload,
clearModelCache: () => clearModelCache,
deleteCachedModel: () => deleteCachedModel,
getCachedModel: () => getCachedModel,
getModelCacheStats: () => getModelCacheStats,
getPreloadStatus: () => getPreloadStatus,
getPreloadedModel: () => getPreloadedModel,
isModelCached: () => isModelCached,
loadModelData: () => loadModelData,
preloadModel: () => preloadModel,
preloadModels: () => preloadModels
});
async function supportsRangeRequests(url) {
try {
const response = await fetch(url, { method: "HEAD" });
const acceptRanges = response.headers.get("Accept-Ranges");
const contentLength = response.headers.get("Content-Length");
const etag = response.headers.get("ETag") ?? void 0;
return {
supports: acceptRanges === "bytes",
size: contentLength ? parseInt(contentLength, 10) : 0,
etag
};
} catch {
return { supports: false, size: 0 };
}
}
async function downloadChunk(url, start, end, timeout) {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
try {
const response = await fetch(url, {
headers: { Range: `bytes=${start}-${end}` },
signal: controller.signal
});
if (response.status !== 206 && response.status !== 200) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
return await response.arrayBuffer();
} finally {
clearTimeout(timeoutId);
}
}
async function downloadWithResume(url, options) {
const {
chunkSize = 5 * 1024 * 1024,
// 5MB
parallelConnections = 4,
timeout = 3e4,
onProgress
} = options;
const { supports: supportsRange, size: totalSize, etag } = await supportsRangeRequests(url);
if (!supportsRange || totalSize < chunkSize * 2) {
return downloadSimple(url, timeout, onProgress);
}
let state = await modelCache.getDownloadState(url);
if (!state || etag && state.totalSize !== totalSize) {
const numChunks = Math.ceil(totalSize / chunkSize);
const chunks2 = [];
for (let i = 0; i < numChunks; i++) {
const start = i * chunkSize;
const end = Math.min(start + chunkSize - 1, totalSize - 1);
chunks2.push({ index: i, start, end, downloaded: false });
}
state = {
url,
totalSize,
downloadedSize: 0,
chunks: chunks2,
startedAt: Date.now()
};
await modelCache.deleteModel(url);
}
const pendingChunks = state.chunks.filter((c) => !c.downloaded);
let downloadedSize = state.downloadedSize;
const startTime = Date.now();
let lastProgressTime = startTime;
let lastDownloadedSize = downloadedSize;
const reportProgress = () => {
if (!onProgress)
return;
const now = Date.now();
const elapsed = (now - lastProgressTime) / 1e3;
const bytesDownloaded = downloadedSize - lastDownloadedSize;
const speed = elapsed > 0 ? bytesDownloaded / elapsed : 0;
const remaining = totalSize - downloadedSize;
const eta = speed > 0 ? remaining / speed * 1e3 : 0;
onProgress({
loaded: downloadedSize,
total: totalSize,
percent: downloadedSize / totalSize * 100,
speed,
eta,
currentChunk: state.chunks.filter((c) => c.downloaded).length,
totalChunks: state.chunks.length
});
lastProgressTime = now;
lastDownloadedSize = downloadedSize;
};
const downloadQueue = [...pendingChunks];
const inProgress = /* @__PURE__ */ new Map();
while (downloadQueue.length > 0 || inProgress.size > 0) {
while (downloadQueue.length > 0 && inProgress.size < parallelConnections) {
const chunk = downloadQueue.shift();
const downloadPromise = (async () => {
try {
const data = await downloadChunk(url, chunk.start, chunk.end, timeout);
await modelCache.saveChunk(url, chunk.index, data);
chunk.downloaded = true;
downloadedSize += data.byteLength;
state.downloadedSize = downloadedSize;
await modelCache.saveDownloadState(state);
reportProgress();
} finally {
inProgress.delete(chunk.index);
}
})();
inProgress.set(chunk.index, downloadPromise);
}
if (inProgress.size > 0) {
await Promise.race(inProgress.values());
}
}
const chunks = await modelCache.getChunks(url);
const result = new Uint8Array(totalSize);
let offset = 0;
for (const chunk of chunks) {
result.set(new Uint8Array(chunk), offset);
offset += chunk.byteLength;
}
await modelCache.saveMeta({
url,
size: totalSize,
etag,
cachedAt: Date.now(),
chunks: chunks.length,
complete: true
});
await modelCache.deleteDownloadState(url);
return result.buffer;
}
async function downloadSimple(url, timeout, onProgress) {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
try {
const response = await fetch(url, { signal: controller.signal });
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const contentLength = response.headers.get("Content-Length");
const total = contentLength ? parseInt(contentLength, 10) : 0;
if (!response.body || !onProgress || total === 0) {
return await response.arrayBuffer();
}
const reader = response.body.getReader();
const chunks = [];
let loaded = 0;
const startTime = Date.now();
while (true) {
const { done, value } = await reader.read();
if (done)
break;
chunks.push(value);
loaded += value.length;
const elapsed = (Date.now() - startTime) / 1e3;
const speed = elapsed > 0 ? loaded / elapsed : 0;
const remaining = total - loaded;
const eta = speed > 0 ? remaining / speed * 1e3 : 0;
onProgress({
loaded,
total,
percent: loaded / total * 100,
speed,
eta
});
}
const result = new Uint8Array(loaded);
let offset = 0;
for (const chunk of chunks) {
result.set(chunk, offset);
offset += chunk.length;
}
return result.buffer;
} finally {
clearTimeout(timeoutId);
}
}
async function loadModelData(url, options = {}) {
const { cache = true, forceDownload = false, resumable = true } = options;
if (cache && !forceDownload) {
const cached = await modelCache.getModel(url);
if (cached) {
const firstByte = new Uint8Array(cached)[0];
const isHtmlOrText = firstByte === 60 || firstByte === 123;
if (isHtmlOrText || cached.byteLength < 1024) {
console.warn(`[edgeFlow.js] Cached model for ${url} appears corrupt (${cached.byteLength} bytes, first byte 0x${firstByte?.toString(16)}). Evicting and re-downloading.`);
await modelCache.deleteModel(url);
} else {
console.log(`\u2713 Model loaded from cache: ${url}`);
options.onProgress?.({
loaded: cached.byteLength,
total: cached.byteLength,
percent: 100,
speed: 0,
eta: 0
});
return cached;
}
}
}
let data;
if (resumable) {
data = await downloadWithResume(url, options);
} else {
data = await downloadSimple(url, options.timeout ?? 3e4, options.onProgress);
}
if (cache) {
if (!resumable) {
await modelCache.saveChunk(url, 0, data);
await modelCache.saveMeta({
url,
size: data.byteLength,
cachedAt: Date.now(),
chunks: 1,
complete: true
});
}
}
return data;
}
function preloadModel(url, options = {}) {
return preloadManager.preload(url, options);
}
function preloadModels(urls, options = {}) {
return Promise.all(urls.map(({ url, priority }) => preloadManager.preload(url, { ...options, priority })));
}
async function isModelCached(url) {
const meta = await modelCache.getMeta(url);
return meta?.complete ?? false;
}
async function getCachedModel(url) {
return modelCache.getModel(url);
}
async function deleteCachedModel(url) {
return modelCache.deleteModel(url);
}
async function clearModelCache() {
return modelCache.clear();
}
async function getModelCacheStats() {
return modelCache.getStats();
}
function getPreloadStatus(url) {
return preloadManager.getStatus(url);
}
function cancelPreload(url) {
preloadManager.cancel(url);
}
async function getPreloadedModel(url) {
return preloadManager.get(url);
}
var DB_NAME, DB_VERSION, STORE_META, STORE_CHUNKS, STORE_STATE, ModelCache2, modelCache, PreloadManager, preloadManager;
var init_model_loader = __esm({
"dist/utils/model-loader.js"() {
"use strict";
DB_NAME = "edgeflow-model-cache";
DB_VERSION = 1;
STORE_META = "meta";
STORE_CHUNKS = "chunks";
STORE_STATE = "download-state";
ModelCache2 = class {
constructor() {
__publicField(this, "db", null);
__publicField(this, "dbPromise", null);
}
/**
* Open the database
*/
async openDB() {
if (this.db)
return this.db;
if (this.dbPromise)
return this.dbPromise;
this.dbPromise = new Promise((resolve, reject) => {
const request = indexedDB.open(DB_NAME, DB_VERSION);
request.onupgradeneeded = (event) => {
const db = event.target.result;
if (!db.objectStoreNames.contains(STORE_META)) {
db.createObjectStore(STORE_META, { keyPath: "url" });
}
if (!db.objectStoreNames.contains(STORE_CHUNKS)) {
const chunkStore = db.createObjectStore(STORE_CHUNKS, { keyPath: ["url", "index"] });
chunkStore.createIndex("url", "url", { unique: false });
}
if (!db.objectStoreNames.contains(STORE_STATE)) {
db.createObjectStore(STORE_STATE, { keyPath: "url" });
}
};
request.onsuccess = () => {
this.db = request.result;
resolve(this.db);
};
request.onerror = () => reject(request.error);
});
return this.dbPromise;
}
/**
* Get cached model metadata
*/
async getMeta(url) {
const db = await this.openDB();
return new Promise((resolve, reject) => {
const tx = db.transaction(STORE_META, "readonly");
const store = tx.objectStore(STORE_META);
const request = store.get(url);
request.onsuccess = () => resolve(request.result ?? null);
request.onerror = () => reject(request.error);
});
}
/**
* Save model metadata (with quota error handling)
*/
async saveMeta(meta) {
try {
await this.putInStore(STORE_META, meta);
} catch (err) {
if (this.isQuotaError(err)) {
await this.evictOldest(meta.size);
try {
await this.putInStore(STORE_META, meta);
} catch {
console.warn("[edgeFlow.js] IndexedDB quota exceeded even after eviction; skipping cache.");
}
} else {
throw err;
}
}
}
/**
* Save a chunk (with quota error handling)
*/
async saveChunk(url, index, data) {
try {
await this.putInStore(STORE_CHUNKS, { url, index, data });
} catch (err) {
if (this.isQuotaError(err)) {
await this.evictOldest(data.byteLength);
try {
await this.putInStore(STORE_CHUNKS, { url, index, data });
} catch {
console.warn("[edgeFlow.js] IndexedDB quota exceeded even after eviction; skipping cache for chunk.");
}
} else {
throw err;
}
}
}
/**
* Generic put helper
*/
async putInStore(storeName, value) {
const db = await this.openDB();
return new Promise((resolve, reject) => {
const tx = db.transaction(storeName, "readwrite");
const store = tx.objectStore(storeName);
store.put(value);
tx.oncomplete = () => resolve();
tx.onerror = () => reject(tx.error);
});
}
/**
* Detect IndexedDB quota exceeded errors
*/
isQuotaError(err) {
if (err instanceof DOMException) {
return err.name === "QuotaExceededError" || err.code === 22;
}
return false;
}
/**
* Evict oldest cached models to free space.
* Deletes models by ascending `cachedAt` until at least `bytesNeeded` is freed.
*/
async evictOldest(bytesNeeded) {
const db = await this.openDB();
const allMeta = await new Promise((resolve, reject) => {
const tx = db.transaction(STORE_META, "readonly");
const store = tx.objectStore(STORE_META);
const request = store.getAll();
request.onsuccess = () => resolve(request.result ?? []);
request.onerror = () => reject(request.error);
});
allMeta.sort((a, b) => a.cachedAt - b.cachedAt);
let freed = 0;
for (const meta of allMeta) {
if (freed >= bytesNeeded)
break;
await this.deleteModel(meta.url);
freed += meta.size;
}
}
/**
* Get all chunks for a URL
*/
async getChunks(url) {
const db = await this.openDB();
return new Promise((resolve, reject) => {
const tx = db.transaction(STORE_CHUNKS, "readonly");
const store = tx.objectStore(STORE_CHUNKS);
const index = store.index("url");
const request = index.getAll(url);
request.onsuccess = () => {
const results = request.result;
results.sort((a, b) => a.index - b.index);
resolve(results.map((r) => r.data));
};
request.onerror = () => reject(request.error);
});
}
/**
* Get complete model data (merged chunks)
*/
async getModel(url) {
const meta = await this.getMeta(url);
if (!meta || !meta.complete)
return null;
const chunks = await this.getChunks(url);
if (chunks.length === 0)
return null;
const totalSize = chunks.reduce((sum2, chunk) => sum2 + chunk.byteLength, 0);
const result = new Uint8Array(totalSize);
let offset = 0;
for (const chunk of chunks) {
result.set(new Uint8Array(chunk), offset);
offset += chunk.byteLength;
}
return result.buffer;
}
/**
* Save download state (for resume, with quota handling)
*/
async saveDownloadState(state) {
try {
await this.putInStore(STORE_STATE, state);
} catch (err) {
if (this.isQuotaError(err)) {
console.warn("[edgeFlow.js] IndexedDB quota exceeded saving download state; resume may not work.");
} else {
throw err;
}
}
}
/**
* Get download state
*/
async getDownloadState(url) {
const db = await this.openDB();
return new Promise((resolve, reject) => {
const tx = db.transaction(STORE_STATE, "readonly");
const store = tx.objectStore(STORE_STATE);
const request = store.get(url);
request.onsuccess = () => resolve(request.result ?? null);
request.onerror = () => reject(request.error);
});
}
/**
* Delete download state
*/
async deleteDownloadState(url) {
const db = await this.openDB();
return new Promise((resolve, reject) => {
const tx = db.transaction(STORE_STATE, "readwrite");
const store = tx.objectStore(STORE_STATE);
store.delete(url);
tx.oncomplete = () => resolve();
tx.onerror = () => reject(tx.error);
});
}
/**
* Delete cached model
*/
async deleteModel(url) {
const db = await this.openDB();
await new Promise((resolve, reject) => {
const tx = db.transaction(STORE_META, "readwrite");
const store = tx.objectStore(STORE_META);
store.delete(url);
tx.oncomplete = () => resolve();
tx.onerror = () => reject(tx.error);
});
const chunks = await this.getChunks(url);
if (chunks.length > 0) {
await new Promise((resolve, reject) => {
const tx = db.transaction(STORE_CHUNKS, "readwrite");
const store = tx.objectStore(STORE_CHUNKS);
const index = store.index("url");
const request = index.openCursor(IDBKeyRange.only(url));
request.onsuccess = (event) => {
const cursor = event.target.result;
if (cursor) {
cursor.delete();
cursor.continue();
}
};
tx.oncomplete = () => resolve();
tx.onerror = () => reject(tx.error);
});
}
await this.deleteDownloadState(url);
}
/**
* Clear all cached models
*/
async clear() {
const db = await this.openDB();
const stores = [STORE_META, STORE_CHUNKS, STORE_STATE];
for (const storeName of stores) {
await new Promise((resolve, reject) => {
const tx = db.transaction(storeName, "readwrite");
const store = tx.objectStore(storeName);
store.clear();
tx.oncomplete = () => resolve();
tx.onerror = () => reject(tx.error);
});
}
}
/**
* Get cache statistics
*/
async getStats() {
const db = await this.openDB();
return new Promise((resolve, reject) => {
const tx = db.transaction(STORE_META, "readonly");
const store = tx.objectStore(STORE_META);
const request = store.getAll();
request.onsuccess = () => {
const metas = request.result;
resolve({
models: metas.filter((m) => m.complete).length,
totalSize: metas.reduce((sum2, m) => sum2 + (m.complete ? m.size : 0), 0)
});
};
request.onerror = () => reject(request.error);
});
}
};
modelCache = new ModelCache2();
PreloadManager = class {
constructor() {
__publicField(this, "tasks", /* @__PURE__ */ new Map());
__publicField(this, "queue", []);
__publicField(this, "maxConcurrent", 2);
__publicField(this, "activeCount", 0);
}
/**
* Preload a model in the background
*/
preload(url, options = {}) {
const existing = this.tasks.get(url);
if (existing) {
return existing.promise;
}
let resolve;
let reject;
const promise = new Promise((res, rej) => {
resolve = res;
reject = rej;
});
const task = {
url,
priority: options.priority ?? 0,
options,
promise,
resolve,
reject,
status: "pending"
};
this.tasks.set(url, task);
const insertIndex = this.queue.findIndex((u) => {
const t = this.tasks.get(u);
return t && t.priority < task.priority;
});
if (insertIndex === -1) {
this.queue.push(url);
} else {
this.queue.splice(insertIndex, 0, url);
}
this.processQueue();
return promise;
}
/**
* Process the preload queue
*/
async processQueue() {
while (this.queue.length > 0 && this.activeCount < this.maxConcurrent) {
const url = this.queue.shift();
if (!url)
break;
const task = this.tasks.get(url);
if (!task || task.status !== "pending")
continue;
this.activeCount++;
task.status = "loading";
this.downloadTask(task).finally(() => {
this.activeCount--;
this.processQueue();
});
}
}
/**
* Download a preload task
*/
async downloadTask(task) {
try {
const data = await loadModelData(task.url, task.options);
task.status = "complete";
task.resolve(data);
} catch (error) {
task.status = "error";
task.reject(error instanceof Error ? error : new Error(String(error)));
}
}
/**
* Check if a model is preloaded
*/
isPreloaded(url) {
const task = this.tasks.get(url);
return task?.status === "complete";
}
/**
* Get preload status
*/
getStatus(url) {
const task = this.tasks.get(url);
return task?.status ?? "not_found";
}
/**
* Get preloaded model data
*/
async get(url) {
const task = this.tasks.get(url);
if (!task)
return null;
if (task.status === "complete" || task.status === "loading") {
return task.promise;
}
return null;
}
/**
* Cancel preload
*/
cancel(url) {
const task = this.tasks.get(url);
if (task && task.status === "pending") {
this.tasks.delete(url);
this.queue = this.queue.filter((u) => u !== url);
task.reject(new Error("Preload cancelled"));
}
}
/**
* Clear all preloads
*/
clear() {
for (const [, task] of this.tasks) {
if (task.status === "pending") {
task.reject(new Error("Preload cleared"));
}
}
this.tasks.clear();
this.queue = [];
}
};
preloadManager = new PreloadManager();
}
});
// dist/index.js
init_types();
init_tensor();
// dist/core/scheduler.js
init_types();
var Task = class {
constructor(id, modelId, priority, executor) {
__publicField(this, "id");
__publicField(this, "modelId");
__publicField(this, "priority");
__publicField(this, "createdAt");
__publicField(this, "_status", "pending");
__publicField(this, "_startedAt");
__publicField(this, "_completedAt");
__publicField(this, "_result");
__publicField(this, "_error");
__publicField(this, "_executor");
__publicField(this, "_resolvers", []);
__publicField(this, "_cancelled", false);
this.id = id;
this.modelId = modelId;
this.priority = priority;
this.createdAt = Date.now();
this._executor = executor;
}
get status() {
return this._status;
}
get startedAt() {
return this._startedAt;
}
get completedAt() {
return this._completedAt;
}
get result() {
return this._result;
}
get error() {
return this._error;
}
/**
* Cancel the task
*/
cancel() {
if (this._status === "pending") {
this._cancelled = true;
this._status = "cancelled";
this._completedAt = Date.now();
const cancelError = new EdgeFlowError("Task was cancelled", ErrorCodes.INFERENCE_CANCELLED, { taskId: this.id });
for (const { reject } of this._resolvers) {
reject(cancelError);
}
this._resolvers = [];
}
}
/**
* Wait for task completion
*/
wait() {
if (this._status === "completed") {
return Promise.resolve(this._result);
}
if (this._status === "failed") {
return Promise.reject(this._error);
}
if (this._status === "cancelled") {
return Promise.reject(new EdgeFlowError("Task was cancelled", ErrorCodes.INFERENCE_CANCELLED, { taskId: this.id }));
}
return new Promise((resolve, reject) => {
this._resolvers.push({ resolve, reject });
});
}
/**
* Execute the task
*/
async execute() {
if (this._cancelled) {
return;
}
this._status = "running";
this._startedAt = Date.now();
try {
this._result = await this._executor();
this._status = "completed";
this._completedAt = Date.now();
for (const { resolve } of this._resolvers) {
resolve(this._result);
}
} catch (err) {
this._error = err instanceof Error ? err : new Error(String(err));
this._status = "failed";
this._completedAt = Date.now();
for (const { reject } of this._resolvers) {
reject(this._error);
}
}
this._resolvers = [];
}
};
var PRIORITY_ORDER = {
critical: 0,
high: 1,
normal: 2,
low: 3
};
var PriorityQueue = class {
constructor() {
__publicField(this, "items", []);
}
get length() {
return this.items.length;
}
isEmpty() {
return this.items.length === 0;
}
/**
* Add item to queue with priority ordering
*/
enqueue(item) {
let inserted = false;
for (let i = 0; i < this.items.length; i++) {
const currentItem = this.items[i];
if (currentItem && PRIORITY_ORDER[item.priority] < PRIORITY_ORDER[currentItem.priority]) {
this.items.splice(i, 0, item);
inserted = true;
break;
}
}
if (!inserted) {
this.items.push(item);
}
}
/**
* Remove and return highest priority item
*/
dequeue() {
return this.items.shift();
}
/**
* Peek at highest priority item without removing
*/
peek() {
return this.items[0];
}
/**
* Remove a specific item by ID
*/
remove(id) {
const index = this.items.findIndex((item) => item.id === id);
if (index !== -1) {
const [removed] = this.items.splice(index, 1);
return removed;
}
return void 0;
}
/**
* Get all items
*/
getAll() {
return [...this.items];
}
/**
* Clear the queue
*/
clear() {
this.items = [];
}
};
var taskIdCounter = 0;
function generateTaskId() {
return `task_${++taskIdCounter}_${Date.now().toString(36)}`;
}
var DEFAULT_OPTIONS = {
maxConcurrentTasks: 4,
maxConcurrentPerModel: 1,
defaultTimeout: 3e4,
enableBatching: false,
maxBatchSize: 32,
batchTimeout: 50,
maxRetries: 0,
retryBaseDelay: 1e3,
circuitBreaker: false,
circuitBreakerThreshold: 5,
circuitBreakerResetTimeout: 3e4
};
var InferenceScheduler = class {
constructor(options = {}) {
__publicField(this, "options");
__publicField(this, "queues", /* @__PURE__ */ new Map());
__publicField(this, "runningTasks", /* @__PURE__ */ new Map());
__publicField(this, "allTasks", /* @__PURE__ */ new Map());
__publicField(this, "batchers", /* @__PURE__ */ new Map());
__publicField(this, "listeners", /* @__PURE__ */ new Map());
__publicField(this, "circuits", /* @__PURE__ */ new Map());
__publicField(this, "globalRunningCount", 0);
__publicField(this, "isProcessing", false);
__publicField(this, "disposed", false);
this.options = { ...DEFAULT_OPTIONS, ...options };
}
/**
* Get circuit breaker state for a model, creating default if absent
*/
getCircuit(modelId) {
let c = this.circuits.get(modelId);
if (!c) {
c = { failures: 0, state: "closed", lastFailure: 0 };
this.circuits.set(modelId, c);
}
return c;
}
/**
* Check if the circuit for a model allows new tasks
*/
isCircuitOpen(modelId) {
if (!this.options.circuitBreaker)
return false;
const c = this.getCircuit(modelId);
if (c.state === "closed")
return false;
if (c.state === "open") {
if (Date.now() - c.lastFailure > this.options.circuitBreakerResetTimeout) {
c.state = "half-open";
return false;
}
return true;
}
return false;
}
/**
* Record a success for circuit breaker
*/
circuitSuccess(modelId) {
if (!this.options.circuitBreaker)
return;
const c = this.getCircuit(modelId);
c.failures = 0;
c.state = "closed";
}
/**
* Record a failure for circuit breaker
*/
circuitFailure(modelId) {
if (!this.options.circuitBreaker)
return;
const c = this.getCircuit(modelId);
c.failures++;
c.lastFailure = Date.now();
if (c.failures >= this.options.circuitBreakerThreshold) {
c.state = "open";
this.emit("inference:error", {
modelId,
error: new Error(`Circuit breaker opened after ${c.failures} consecutive failures`)
});
}
}
/**
* Get or create queue for a model
*/
getQueue(modelId) {
let queue = this.queues.get(modelId);
if (!queue) {
queue = new PriorityQueue();
this.queues.set(modelId, queue);
}
return queue;
}
/**
* Get or create running set for a model
*/
getRunningSet(modelId) {
let running = this.runningTasks.get(modelId);
if (!running) {
running = /* @__PURE__ */ new Set();
this.runningTasks.set(modelId, running);
}
return running;
}
/**
* Check if we can start a new task for a model
*/
canStartTask(modelId) {
if (this.globalRunningCount >= this.options.maxConcurrentTasks) {
return false;
}
const running = this.runningTasks.get(modelId);
if (running && running.size >= this.options.maxConcurrentPerModel) {
return false;
}
return true;
}
/**
* Process pending tasks
*/
async processQueue() {
if (this.isProcessing || this.disposed) {
return;
}
this.isProcessing = true;
try {
const tasksToStart = [];
for (const [modelId, queue] of this.queues) {
while (!queue.isEmpty() && this.canStartTask(modelId)) {
const task = queue.dequeue();
if (task && task.status === "pending") {
tasksToStart.push(task);
const running = this.getRunningSet(modelId);
running.add(task.id);
this.globalRunningCount++;
}
}
}
await Promise.all(tasksToStart.map(async (task) => {
this.emit("inference:start", { taskId: task.id, modelId: task.modelId });
try {
await task.execute();
this.emit("inference:complete", {
taskId: task.id,
modelId: task.modelId,
duration: (task.completedAt ?? 0) - (task.startedAt ?? 0)
});
} catch (error) {
this.emit("inference:error", {
taskId: task.id,
modelId: task.modelId,
error
});
} finally {
const running = this.runningTasks.get(task.modelId);
if (running) {
running.delete(task.id);
}
this.globalRunningCount--;
}
}));
} finally {
this.isProcessing = false;
}
let hasPending = false;
for (const queue of this.queues.values()) {
if (!queue.isEmpty()) {
hasPending = true;
break;
}
}
if (hasPending) {
setTimeout(() => this.processQueue(), 0);
}
}
/**
* Schedule a task for execution
*/
schedule(modelId, executor, priority = "normal") {
if (this.disposed) {
throw new EdgeFlowError("Scheduler has been disposed", ErrorCodes.RUNTIME_NOT_INITIALIZED);
}
if (this.isCircuitOpen(modelId)) {
throw new EdgeFlowError(`Circuit breaker is open for model ${modelId} \u2014 too many consecutive failures. Retry after ${this.options.circuitBreakerResetTimeout}ms.`, ErrorCodes.INFERENCE_FAILED, { modelId });
}
const maxRetries = this.options.maxRetries;
const baseDelay = this.options.retryBaseDelay;
const wrappedExecutor = maxRetries > 0 ? async () => {
let lastError;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const result = await executor();
this.circuitSuccess(modelId);
return result;
} catch (err) {
lastError = err instanceof Error ? err : new Error(String(err));
this.circuitFailure(modelId);
if (attempt < maxRetries) {
const delay = baseDelay * Math.pow(2, attempt);
await new Promise((r) => setTimeout(r, delay));
}
}
}
throw lastError;
} : async () => {
try {
const result = await executor();
this.circuitSuccess(modelId);
return result;
} catch (err) {
this.circuitFailure(modelId);
throw err;
}
};
const task = new Task(generateTaskId(), modelId, priority, wrappedExecutor);
this.allTasks.set(task.id, task);
const queue = this.getQueue(modelId);
queue.enqueue(task);
this.processQueue();
return task;
}
/**
* Schedule with timeout
*/
scheduleWithTimeout(modelId, executor, timeout = this.options.defaultTimeout, priority = "normal") {
const timeoutExecutor = () => {
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
reject(new EdgeFlowError(`Task timed out after ${timeout}ms`, ErrorCodes.INFERENCE_TIMEOUT, { timeout }));
}, timeout);
executor().then((result) => {
clearTimeout(timer);
resolve(result);
}).catch((error) => {
clearTimeout(timer);
reject(error);
});
});
};
return this.schedule(modelId, timeoutExecutor, priority);
}
/**
* Schedule multiple tasks and wait for all
*/
async scheduleAll(tasks) {
const scheduledTasks = tasks.map(({ modelId, executor, priority }) => this.schedule(modelId, executor, priority));
return Promise.all(scheduledTasks.map((task) => task.wait()));
}
/**
* Get task by ID
*/
getTask(taskId) {
return this.allTasks.get(taskId);
}
/**
* Cancel a task
*/
cancelTask(taskId) {
const task = this.allTasks.get(taskId);
if (task && task.status === "pending") {
task.cancel();
for (const queue of this.queues.values()) {
queue.remove(taskId);
}
return true;
}
return false;
}
/**
* Cancel all tasks for a model
*/
cancelAllForModel(modelId) {
const queue = this.queues.get(modelId);
if (!queue)
return 0;
let cancelled = 0;
for (const task of queue.getAll()) {
if (task.status === "pending") {
task.cancel();
cancelled++;
}
}
queue.clear();
return cancelled;
}
/**
* Get statistics
*/
getStats() {
const stats = {
totalTasks: this.allTasks.size,
pendingTasks: 0,
runningTasks: 0,
completedTasks: 0,
failedTasks: 0,
cancelledTasks: 0,
queuedByModel: {}
};
for (const task of this.allTasks.values()) {
switch (task.status) {
case "pending":
stats.pendingTasks++;
break;
case "running":
stats.runningTasks++;
break;
case "completed":
stats.completedTasks++;
break;
case "failed":
stats.failedTasks++;
break;
case "cancelled":
stats.cancelledTasks++;
break;
}
}
for (const [modelId, queue] of this.queues) {
stats.queuedByModel[modelId] = queue.length;
}
return stats;
}
/**
* Add event listener
*/
on(event, listener) {
let listeners = this.listeners.get(event);
if (!listeners) {
listeners = /* @__PURE__ */ new Set();
this.listeners.set(event, listeners);
}
listeners.add(listener);
}
/**
* Remove event listener
*/
off(event, listener) {
const listeners = this.listeners.get(event);
if (listeners) {
listeners.delete(listener);
}
}
/**
* Emit event
*/
emit(type, data) {
const event = {
type,
timestamp: Date.now(),
data
};
const listeners = this.listeners.get(type);
if (listeners) {
for (const listener of listeners) {
try {
listener(event);
} catch (error) {
console.error("Error in event listener:", error);
}
}
}
}
/**
* Clear completed/failed/cancelled tasks from history
*/
clearHistory() {
for (const [taskId, task] of this.allTasks) {
if (task.status === "completed" || task.status === "failed" || task.status === "cancelled") {
this.allTasks.delete(taskId);
}
}
}
/**
* Dispose the scheduler
*/
dispose() {
this.disposed = true;
for (const queue of this.queues.values()) {
for (const task of queue.getAll()) {
task.cancel();
}
queue.clear();
}
for (const batcher of this.batchers.values()) {
batcher.clear();
}
this.queues.clear();
this.runningTasks.clear();
this.allTasks.clear();
this.batchers.clear();
this.listeners.clear();
}
};
var globalScheduler = null;
function getScheduler() {
if (!globalScheduler) {
globalScheduler = new InferenceScheduler();
}
return globalScheduler;
}
function setScheduler(scheduler) {
if (globalScheduler) {
globalScheduler.dispose();
}
globalScheduler = scheduler;
}
function configureScheduler(options) {
setScheduler(new InferenceScheduler(options));
}
// dist/core/memory.js
var DEFAULT_POOL_CONFIG = {
initialSize: 64 * 1024 * 1024,
// 64MB
maxSize: 512 * 1024 * 1024,
// 512MB
growthFactor: 1.5,
autoGC: true,
gcThreshold: 0.8
// 80%
};
var _MemoryManager = class _MemoryManager {
constructor(config = {}) {
__publicField(this, "config");
__publicField(this, "resources", /* @__PURE__ */ new Map());
__publicField(this, "disposers", /* @__PURE__ */ new Map());
__publicField(this, "listeners", /* @__PURE__ */ new Map());
__publicField(this, "allocated", 0);
__publicField(this, "peak", 0);
__publicField(this, "gcScheduled", false);
__publicField(this, "disposed", false);
this.config = { ...DEFAULT_POOL_CONFIG, ...config };
}
/**
* Get singleton instance
*/
static getInstance() {
if (!_MemoryManager.instance) {
_MemoryManager.instance = new _MemoryManager();
}
return _MemoryManager.instance;
}
/**
* Configure the memory manager
*/
static configure(config) {
if (_MemoryManager.instance) {
console.warn("MemoryManager already initialized, configuration may not apply");
}
_MemoryManager.instance = new _MemoryManager(config);
}
/**
* Track a tensor
*/
track(tensor2, disposer) {
if (this.disposed)
return;
const size = this.estimateTensorSize(tensor2);
this.resources.set(tensor2.id, {
id: tensor2.id,
type: "tensor",
size,
createdAt: Date.now(),
stackTrace: this.captureStackTrace()
});
if (disposer) {
this.disposers.set(tensor2.id, disposer);
}
this.allocated += size;
this.peak = Math.max(this.peak, this.allocated);
this.checkMemoryThreshold();
}
/**
* Track a model
*/
trackModel(model, disposer) {
if (this.disposed)
return;
const size = model.metadata.sizeBytes;
this.resources.set(model.id, {
id: model.id,
type: "model",
size,
createdAt: Date.now(),
stackTrace: this.captureStackTrace()
});
if (disposer) {
this.disposers.set(model.id, disposer);
}
this.allocated += size;
this.peak = Math.max(this.peak, this.allocated);
this.checkMemoryThreshold();
}
/**
* Untrack a resource
*/
untrack(id) {
const resource = this.resources.get(id);
if (resource) {
this.allocated -= resource.size;
this.resources.delete(id);
this.disposers.delete(id);
}
}
/**
* Release a resource
*/
release(resourceOrId) {
const id = typeof resourceOrId === "string" ? resourceOrId : resourceOrId.id;
const disposer = this.disposers.get(id);
if (disposer) {
try {
disposer();
} catch (error) {
console.error("Error disposing resource:", error);
}
}
this.untrack(id);
}
/**
* Estimate tensor memory size
*/
estimateTensorSize(tensor2) {
const bytesPerElement = this.getBytesPerElement(tensor2.dtype);
return tensor2.size * bytesPerElement;
}
/**
* Get bytes per element for a data type
*/
getBytesPerElement(dtype) {
switch (dtype) {
case "float32":
return 4;
case "float16":
return 2;
case "int32":
return 4;
case "int64":
return 8;
case "uint8":
case "int8":
case "bool":
return 1;
default:
return 4;
}
}
/**
* Capture stack trace for debugging
*/
captureStackTrace() {
if (typeof Error.captureStackTrace === "function") {
const obj = {};
Error.captureStackTrace(obj, this.captureStackTrace);
return obj.stack;
}
return new Error().stack;
}
/**
* Check if memory threshold is exceeded
*/
checkMemoryThreshold() {
if (!this.config.autoGC)
return;
const usage = this.allocated / this.config.maxSize;
if (usage >= this.config.gcThreshold && !this.gcScheduled) {
this.gcScheduled = true;
this.emit("memory:warning", {
allocated: this.allocated,
maxSize: this.config.maxSize,
usage
});
setTimeout(() => {
this.gc();
this.gcScheduled = false;
}, 0);
}
}
/**
* Garbage collection helper.
*
* Identifies stale resources and optionally evicts them.
* @param evict - If true, actually dispose stale resources (default: false)
* @param maxAge - Resources older than this (ms) are considered stale (default: 5 min)
*/
gc(evict = false, maxAge = 5 * 60 * 1e3) {
this.emit("memory:gc", { before: this.allocated });
const now = Date.now();
const staleIds = [];
for (const [id, resource] of this.resources) {
if (now - resource.createdAt > maxAge) {
staleIds.push(id);
}
}
if (evict) {
for (const id of staleIds) {
this.release(id);
}
}
this.emit("memory:gc", {
after: this.allocated,
evicted: evict ? staleIds.length : 0,
potentialCleanup: staleIds.length
});
}
/**
* Query actual browser memory usage via performance.measureUserAgentSpecificMemory()
* (Chrome 89+, requires cross-origin isolation). Returns null if unavailable.
*/
async measureBrowserMemory() {
try {
if (typeof performance !== "undefined" && "measureUserAgentSpecificMemory" in performance) {
const result = await performance.measureUserAgentSpecificMemory();
return result;
}
} catch {
}
return null;
}
/**
* Get the device's total memory hint (navigator.deviceMemory).
* Returns null if unavailable. Value is in GiB, rounded (e.g. 4, 8).
*/
getDeviceMemory() {
try {
if (typeof navigator !== "undefined" && "deviceMemory" in navigator) {
return navigator.deviceMemory ?? null;
}
} catch {
}
return null;
}
/**
* Get memory statistics
*/
getStats() {
let tensorCount = 0;
let modelCount = 0;
for (const resource of this.resources.values()) {
if (resource.type === "tensor") {
tensorCount++;
} else {
modelCount++;
}
}
return {
allocated: this.allocated,
used: this.allocated,
// In JS, allocated = used
peak: this.peak,
tensorCount,
modelCount
};
}
/**
* Get detailed resource list (for debugging)
*/
getResourceDetails() {
return Array.from(this.resources.values());
}
/**
* Check for potential memory leaks
*/
detectLeaks(maxAge = 10 * 60 * 1e3) {
const now = Date.now();
const potentialLeaks = [];
for (const resource of this.resources.values()) {
if (now - resource.createdAt > maxAge) {
potentialLeaks.push(resource);
}
}
return potentialLeaks;
}
/**
* Add event listener
*/
on(event, listener) {
let listeners = this.listeners.get(event);
if (!listeners) {
listeners = /* @__PURE__ */ new Set();
this.listeners.set(event, listeners);
}
listeners.add(listener);
}
/**
* Remove event listener
*/
off(event, listener) {
const listeners = this.listeners.get(event);
if (listeners) {
listeners.delete(listener);
}
}
/**
* Emit event
*/
emit(type, data) {
const event = {
type,
timestamp: Date.now(),
data
};
const listeners = this.listeners.get(type);
if (listeners) {
for (const listener of listeners) {
try {
listener(event);
} catch (error) {
console.error("Error in event listener:", error);
}
}
}
}
/**
* Reset statistics
*/
resetStats() {
this.peak = this.allocated;
}
/**
* Dispose all resources
*/
disposeAll() {
for (const id of this.resources.keys()) {
this.release(id);
}
}
/**
* Dispose the manager
*/
dispose() {
this.disposeAll();
this.disposed = true;
this.listeners.clear();
_MemoryManager.instance = null;
}
};
__publicField(_MemoryManager, "instance", null);
var MemoryManager = _MemoryManager;
var MemoryScope = class _MemoryScope {
constructor(parent) {
__publicField(this, "resources", []);
__publicField(this, "children", []);
__publicField(this, "parent", null);
if (parent) {
this.parent = parent;
parent.children.push(this);
}
}
/**
* Track a resource in this scope
*/
track(resource) {
this.resources.push(resource);
return resource;
}
/**
* Create a child scope
*/
createChild() {
return new _MemoryScope(this);
}
/**
* Keep a resource (don't dispose it when scope ends)
*/
keep(resource) {
const index = this.resources.indexOf(resource);
if (index !== -1) {
this.resources.splice(index, 1);
}
return resource;
}
/**
* Dispose all resources in this scope
*/
dispose() {
for (const child of this.children) {
child.dispose();
}
this.children = [];
for (let i = this.resources.length - 1; i >= 0; i--) {
try {
this.resources[i]?.dispose();
} catch (error) {
console.error("Error disposing resource in scope:", error);
}
}
this.resources = [];
if (this.parent) {
const index = this.parent.children.indexOf(this);
if (index !== -1) {
this.parent.children.splice(index, 1);
}
this.parent = null;
}
}
};
async function withMemoryScope(fn) {
const scope = new MemoryScope();
try {
return await fn(scope);
} finally {
scope.dispose();
}
}
function withMemoryScopeSync(fn) {
const scope = new MemoryScope();
try {
return fn(scope);
} finally {
scope.dispose();
}
}
var ModelCache = class {
constructor(options = {}) {
__publicField(this, "maxSize");
__publicField(this, "maxModels");
__publicField(this, "cache", /* @__PURE__ */ new Map());
__publicField(this, "currentSize", 0);
this.maxSize = options.maxSize ?? 256 * 1024 * 1024;
this.maxModels = options.maxModels ?? 5;
}
/**
* Get a model from cache
*/
get(key) {
const entry = this.cache.get(key);
if (entry) {
entry.lastAccess = Date.now();
return entry.model;
}
return void 0;
}
/**
* Add a model to cache
*/
set(key, model) {
const size = model.metadata.sizeBytes;
while ((this.currentSize + size > this.maxSize || this.cache.size >= this.maxModels) && this.cache.size > 0) {
this.evictLRU();
}
this.cache.set(key, {
model,
size,
lastAccess: Date.now()
});
this.currentSize += size;
}
/**
* Remove a model from cache
*/
delete(key) {
const entry = this.cache.get(key);
if (entry) {
entry.model.dispose();
this.currentSize -= entry.size;
this.cache.delete(key);
return true;
}
return false;
}
/**
* Check if model is in cache
*/
has(key) {
return this.cache.has(key);
}
/**
* Evict least recently used model
*/
evictLRU() {
let oldestKey = null;
let oldestTime = Infinity;
for (const [key, entry] of this.cache) {
if (entry.lastAccess < oldestTime) {
oldestTime = entry.lastAccess;
oldestKey = key;
}
}
if (oldestKey) {
this.delete(oldestKey);
}
}
/**
* Clear the cache
*/
clear() {
for (const entry of this.cache.values()) {
entry.model.dispose();
}
this.cache.clear();
this.currentSize = 0;
}
/**
* Get cache statistics
*/
getStats() {
return {
size: this.currentSize,
count: this.cache.size,
maxSize: this.maxSize,
maxModels: this.maxModels
};
}
};
function getMemoryManager() {
return MemoryManager.getInstance();
}
function getMemoryStats() {
return MemoryManager.getInstance().getStats();
}
function release(resource) {
MemoryManager.getInstance().release(resource);
}
function gc() {
MemoryManager.getInstance().gc();
}
// dist/core/runtime.js
init_types();
var runtimeFactories = /* @__PURE__ */ new Map();
var runtimeInstances = /* @__PURE__ */ new Map();
var RUNTIME_PRIORITY = ["webgpu", "webnn", "wasm"];
var _RuntimeManager = class _RuntimeManager {
constructor() {
__publicField(this, "listeners", /* @__PURE__ */ new Map());
__publicField(this, "defaultRuntime", "auto");
}
/**
* Get singleton instance
*/
static getInstance() {
if (!_RuntimeManager.instance) {
_RuntimeManager.instance = new _RuntimeManager();
}
return _RuntimeManager.instance;
}
/**
* Register a runtime factory
*/
register(type, factory) {
runtimeFactories.set(type, factory);
}
/**
* Get a runtime instance
*/
async getRuntime(type = "auto") {
if (type === "auto") {
return this.getBestRuntime();
}
let runtime = runtimeInstances.get(type);
if (runtime) {
return runtime;
}
const factory = runtimeFactories.get(type);
if (!factory) {
throw new EdgeFlowError(`Runtime '${type}' is not registered`, ErrorCodes.RUNTIME_NOT_AVAILABLE, { runtime: type });
}
runtime = factory();
const available = await runtime.isAvailable();
if (!available) {
throw new EdgeFlowError(`Runtime '${type}' is not available in this environment`, ErrorCodes.RUNTIME_NOT_AVAILABLE, { runtime: type });
}
try {
await runtime.initialize();
} catch (error) {
throw new EdgeFlowError(`Failed to initialize runtime '${type}': ${error instanceof Error ? error.message : String(error)}`, ErrorCodes.RUNTIME_INIT_FAILED, { runtime: type, error });
}
runtimeInstances.set(type, runtime);
this.emit("runtime:ready", { runtime: type });
return runtime;
}
/**
* Get the best available runtime
*/
async getBestRuntime() {
for (const type of RUNTIME_PRIORITY) {
try {
const existing = runtimeInstances.get(type);
if (existing) {
return existing;
}
const factory = runtimeFactories.get(type);
if (!factory)
continue;
const runtime = factory();
const available = await runtime.isAvailable();
if (available) {
await runtime.initialize();
runtimeInstances.set(type, runtime);
this.emit("runtime:ready", { runtime: type });
return runtime;
}
} catch {
continue;
}
}
throw new EdgeFlowError("No runtime available. Please ensure WebGPU, WebNN, or WASM is supported.", ErrorCodes.RUNTIME_NOT_AVAILABLE, { triedRuntimes: RUNTIME_PRIORITY });
}
/**
* Check which runtimes are available
*/
async detectAvailableRuntimes() {
const results = /* @__PURE__ */ new Map();
for (const type of RUNTIME_PRIORITY) {
const factory = runtimeFactories.get(type);
if (!factory) {
results.set(type, false);
continue;
}
try {
const runtime = factory();
results.set(type, await runtime.isAvailable());
} catch {
results.set(type, false);
}
}
return results;
}
/**
* Get capabilities of a runtime
*/
async getCapabilities(type) {
const runtime = await this.getRuntime(type);
return runtime.capabilities;
}
/**
* Set default runtime
*/
setDefaultRuntime(type) {
this.defaultRuntime = type;
}
/**
* Get default runtime type
*/
getDefaultRuntimeType() {
return this.defaultRuntime;
}
/**
* Dispose a specific runtime
*/
disposeRuntime(type) {
const runtime = runtimeInstances.get(type);
if (runtime) {
runtime.dispose();
runtimeInstances.delete(type);
}
}
/**
* Dispose all runtimes
*/
disposeAll() {
for (const [type, runtime] of runtimeInstances) {
runtime.dispose();
runtimeInstances.delete(type);
}
}
/**
* Add event listener
*/
on(event, listener) {
let listeners = this.listeners.get(event);
if (!listeners) {
listeners = /* @__PURE__ */ new Set();
this.listeners.set(event, listeners);
}
listeners.add(listener);
}
/**
* Remove event listener
*/
off(event, listener) {
const listeners = this.listeners.get(event);
if (listeners) {
listeners.delete(listener);
}
}
/**
* Emit event
*/
emit(type, data) {
const event = {
type,
timestamp: Date.now(),
data
};
const listeners = this.listeners.get(type);
if (listeners) {
for (const listener of listeners) {
try {
listener(event);
} catch (error) {
console.error("Error in event listener:", error);
}
}
}
}
};
__publicField(_RuntimeManager, "instance", null);
var RuntimeManager = _RuntimeManager;
var modelIdCounter = 0;
function generateModelId() {
return `model_${++modelIdCounter}_${Date.now().toString(36)}`;
}
var LoadedModelImpl = class {
constructor(metadata, runtime, dispose) {
__publicField(this, "id");
__publicField(this, "metadata");
__publicField(this, "runtime");
__publicField(this, "_isLoaded", true);
__publicField(this, "_dispose");
this.id = generateModelId();
this.metadata = metadata;
this.runtime = runtime;
this._dispose = dispose;
}
get isLoaded() {
return this._isLoaded;
}
dispose() {
if (this._isLoaded) {
this._isLoaded = false;
this._dispose();
getMemoryManager().untrack(this.id);
}
}
};
async function loadModel(url, options = {}) {
const manager = RuntimeManager.getInstance();
const runtime = await manager.getRuntime(options.runtime ?? "auto");
const { loadModelData: loadModelData2 } = await Promise.resolve().then(() => (init_model_loader(), model_loader_exports));
const modelData = await loadModelData2(url, {
cache: options.cache ?? true,
resumable: options.resumable ?? true,
chunkSize: options.chunkSize,
forceDownload: options.forceDownload,
onProgress: options.onProgress ? (progress) => {
options.onProgress(progress.percent / 100);
} : void 0
});
const model = await runtime.loadModel(modelData, options);
return model;
}
async function loadModelFromBuffer(data, options = {}) {
const manager = RuntimeManager.getInstance();
const runtime = await manager.getRuntime(options.runtime ?? "auto");
return runtime.loadModel(data, options);
}
async function runInference(model, inputs) {
if (!model.isLoaded) {
throw new EdgeFlowError("Model has been disposed", ErrorCodes.MODEL_NOT_LOADED, { modelId: model.id });
}
const manager = RuntimeManager.getInstance();
const runtime = await manager.getRuntime(model.runtime);
const scheduler = getScheduler();
const task = scheduler.schedule(model.id, () => runtime.run(model, inputs));
return task.wait();
}
async function runInferenceNamed(model, namedInputs) {
if (!model.isLoaded) {
throw new EdgeFlowError("Model has been disposed", ErrorCodes.MODEL_NOT_LOADED, { modelId: model.id });
}
const manager = RuntimeManager.getInstance();
const runtime = await manager.getRuntime(model.runtime);
if (!("runNamed" in runtime)) {
throw new EdgeFlowError("Runtime does not support named inputs", ErrorCodes.INFERENCE_FAILED, { modelId: model.id });
}
const scheduler = getScheduler();
const task = scheduler.schedule(model.id, () => runtime.runNamed(model, namedInputs));
return task.wait();
}
async function runBatchInference(model, batches) {
const scheduler = getScheduler();
const manager = RuntimeManager.getInstance();
const runtime = await manager.getRuntime(model.runtime);
const tasks = batches.map((inputs) => scheduler.schedule(model.id, () => runtime.run(model, inputs)));
return Promise.all(tasks.map((task) => task.wait()));
}
function getRuntimeManager() {
return RuntimeManager.getInstance();
}
function registerRuntime(type, factory) {
RuntimeManager.getInstance().register(type, factory);
}
async function getBestRuntime() {
return RuntimeManager.getInstance().getBestRuntime();
}
async function getAvailableRuntimes() {
return RuntimeManager.getInstance().detectAvailableRuntimes();
}
// dist/core/plugin.js
var registeredPlugins = /* @__PURE__ */ new Map();
var pluginPipelines = /* @__PURE__ */ new Map();
var pluginMiddleware = [];
async function registerPlugin(plugin) {
if (registeredPlugins.has(plugin.name)) {
console.warn(`[edgeFlow.js] Plugin "${plugin.name}" is already registered \u2014 skipping.`);
return;
}
if (plugin.setup) {
await plugin.setup();
}
if (plugin.pipelines) {
for (const [task, entry] of Object.entries(plugin.pipelines)) {
pluginPipelines.set(task, entry);
}
}
if (plugin.backends) {
for (const [name, entry] of Object.entries(plugin.backends)) {
registerRuntime(name, entry.factory);
}
}
if (plugin.middleware) {
pluginMiddleware.push(...plugin.middleware);
}
registeredPlugins.set(plugin.name, plugin);
}
function getPluginPipeline(task) {
return pluginPipelines.get(task);
}
function getPluginMiddleware() {
return pluginMiddleware;
}
function listPlugins() {
return Array.from(registeredPlugins.values()).map((p) => ({
name: p.name,
version: p.version
}));
}
function unregisterPlugin(name) {
const plugin = registeredPlugins.get(name);
if (!plugin)
return false;
if (plugin.pipelines) {
for (const task of Object.keys(plugin.pipelines)) {
pluginPipelines.delete(task);
}
}
if (plugin.middleware) {
for (const mw of plugin.middleware) {
const idx = pluginMiddleware.indexOf(mw);
if (idx !== -1)
pluginMiddleware.splice(idx, 1);
}
}
registeredPlugins.delete(name);
return true;
}
// dist/core/device-profiler.js
var cachedProfile = null;
async function getDeviceProfile() {
if (cachedProfile)
return cachedProfile;
const cores = typeof navigator !== "undefined" ? navigator.hardwareConcurrency ?? 2 : 2;
const memoryGiB = typeof navigator !== "undefined" && "deviceMemory" in navigator ? navigator.deviceMemory ?? null : null;
const mobile = typeof navigator !== "undefined" ? /Android|iPhone|iPad|iPod|Mobile/i.test(navigator.userAgent) : false;
let webgpu = false;
let gpuInfo;
if (typeof navigator !== "undefined" && "gpu" in navigator) {
try {
const adapter = await navigator.gpu.requestAdapter();
webgpu = adapter != null;
if (adapter && typeof adapter === "object") {
try {
const info = adapter["info"];
if (info) {
gpuInfo = `${info["vendor"] ?? ""} ${info["architecture"] ?? ""}`.trim() || void 0;
}
} catch {
}
}
} catch {
}
}
let webnn = false;
if (typeof navigator !== "undefined" && "ml" in navigator) {
try {
const ml = navigator.ml;
if (ml) {
const ctx = await ml.createContext();
webnn = ctx != null;
}
} catch {
}
}
let tier;
if (webgpu && cores >= 8 && (memoryGiB === null || memoryGiB >= 8)) {
tier = "high";
} else if (cores >= 4 && (memoryGiB === null || memoryGiB >= 4)) {
tier = "medium";
} else {
tier = "low";
}
if (mobile && tier === "high") {
tier = "medium";
}
const recommendedBatchSize = tier === "high" ? 32 : tier === "medium" ? 8 : 1;
const recommendedConcurrency = tier === "high" ? 4 : tier === "medium" ? 2 : 1;
cachedProfile = {
tier,
cores,
memoryGiB,
webgpu,
webnn,
recommendedBatchSize,
recommendedConcurrency,
mobile,
gpuInfo
};
return cachedProfile;
}
function recommendQuantization(profile) {
if (profile.tier === "high" && profile.webgpu)
return "float16";
if (profile.tier === "medium")
return "int8";
return "int8";
}
async function recommendModelVariant() {
const profile = await getDeviceProfile();
return {
quantization: recommendQuantization(profile),
executionProvider: profile.webgpu ? "webgpu" : "wasm",
batchSize: profile.recommendedBatchSize,
useWorker: profile.cores >= 4
};
}
function resetDeviceProfile() {
cachedProfile = null;
}
// dist/backends/webgpu.js
init_types();
init_tensor();
var GPUBufferUsage = {
STORAGE: 128,
COPY_SRC: 4,
COPY_DST: 8,
MAP_READ: 1
};
var GPUShaderStage = {
COMPUTE: 4
};
var WebGPURuntime = class {
constructor() {
__publicField(this, "name", "webgpu");
__publicField(this, "adapter", null);
__publicField(this, "device", null);
__publicField(this, "models", /* @__PURE__ */ new Map());
__publicField(this, "initialized", false);
}
get capabilities() {
return {
concurrency: true,
quantization: true,
float16: true,
dynamicShapes: false,
maxBatchSize: 64,
availableMemory: this.device?.limits.maxBufferSize ?? 256 * 1024 * 1024
};
}
/**
* Check if WebGPU is available
*/
async isAvailable() {
if (typeof navigator === "undefined")
return false;
if (!navigator.gpu)
return false;
try {
const adapter = await navigator.gpu.requestAdapter();
return adapter !== null;
} catch {
return false;
}
}
/**
* Initialize the WebGPU runtime
*/
async initialize() {
if (this.initialized)
return;
if (!navigator.gpu) {
throw new EdgeFlowError("WebGPU is not supported in this browser", ErrorCodes.RUNTIME_NOT_AVAILABLE);
}
this.adapter = await navigator.gpu.requestAdapter({
powerPreference: "high-performance"
});
if (!this.adapter) {
throw new EdgeFlowError("Failed to get WebGPU adapter", ErrorCodes.RUNTIME_INIT_FAILED);
}
this.device = await this.adapter.requestDevice({
requiredFeatures: [],
requiredLimits: {}
});
this.device.lost.then((info) => {
console.error("WebGPU device was lost:", info.message);
this.initialized = false;
this.device = null;
});
this.initialized = true;
}
/**
* Load a model
*/
async loadModel(modelData, options = {}) {
this.ensureInitialized();
const config = this.parseModelData(modelData);
const webgpuData = {
shaders: /* @__PURE__ */ new Map(),
pipelines: /* @__PURE__ */ new Map(),
weights: /* @__PURE__ */ new Map(),
bindGroupLayouts: [],
config
};
await this.uploadWeights(modelData, webgpuData);
await this.createPipelines(webgpuData);
const modelId = `webgpu_${Date.now().toString(36)}`;
this.models.set(modelId, webgpuData);
const metadata = {
name: config.name || options.metadata?.name || "unknown",
version: config.version,
inputs: config.inputs.map((i) => ({
name: i.name,
dtype: i.dtype,
shape: i.shape
})),
outputs: config.outputs.map((o) => ({
name: o.name,
dtype: o.dtype,
shape: o.shape
})),
sizeBytes: modelData.byteLength,
quantization: options.quantization ?? "float32",
format: "edgeflow"
};
const model = new LoadedModelImpl(metadata, "webgpu", () => this.unloadModel(modelId));
getMemoryManager().trackModel(model, () => model.dispose());
return model;
}
/**
* Run inference
*/
async run(model, inputs) {
this.ensureInitialized();
return this.executeModel(inputs, model.metadata);
}
/**
* Execute model (simplified implementation)
*/
async executeModel(inputs, metadata) {
const device = this.device;
const outputs = [];
for (const outputSpec of metadata.outputs) {
const outputSize = outputSpec.shape.reduce((a, b) => a * b, 1);
const outputBuffer = device.createBuffer({
size: outputSize * 4,
// float32
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
});
const stagingBuffer = device.createBuffer({
size: outputSize * 4,
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
});
const outputData = new Float32Array(outputSize);
if (inputs.length > 0 && inputs[0]) {
const inputData = inputs[0].toFloat32Array();
for (let i = 0; i < Math.min(outputSize, inputData.length); i++) {
outputData[i] = inputData[i] ?? 0;
}
}
outputs.push(new EdgeFlowTensor(outputData, outputSpec.shape, "float32"));
outputBuffer.destroy();
stagingBuffer.destroy();
}
return outputs;
}
/**
* Parse model data
*/
parseModelData(data) {
try {
const decoder = new TextDecoder();
const text = decoder.decode(new Uint8Array(data, 0, Math.min(1024, data.byteLength)));
if (text.trim().startsWith("{")) {
let jsonEnd = text.indexOf("\n---\n");
if (jsonEnd === -1)
jsonEnd = data.byteLength;
const jsonStr = decoder.decode(new Uint8Array(data, 0, jsonEnd));
return JSON.parse(jsonStr);
}
} catch {
}
return {
name: "unknown",
version: "1.0.0",
layers: [],
inputs: [{ name: "input", shape: [-1, 768], dtype: "float32" }],
outputs: [{ name: "output", shape: [-1, 768], dtype: "float32" }]
};
}
/**
* Upload weights to GPU
*/
async uploadWeights(_data, modelData) {
const device = this.device;
const weightsBuffer = device.createBuffer({
size: 1024,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
});
modelData.weights.set("default", weightsBuffer);
}
/**
* Create compute pipelines
*/
async createPipelines(modelData) {
const device = this.device;
const shaderCode = (
/* wgsl */
`
@group(0) @binding(0) var input: array;
@group(0) @binding(1) var output: array;
@compute @workgroup_size(64)
fn main(@builtin(global_invocation_id) gid: vec3) {
let idx = gid.x;
if (idx < arrayLength(&input)) {
output[idx] = input[idx];
}
}
`
);
const shaderModule = device.createShaderModule({
code: shaderCode
});
modelData.shaders.set("default", shaderModule);
const bindGroupLayout = device.createBindGroupLayout({
entries: [
{
binding: 0,
visibility: GPUShaderStage.COMPUTE,
buffer: { type: "read-only-storage" }
},
{
binding: 1,
visibility: GPUShaderStage.COMPUTE,
buffer: { type: "storage" }
}
]
});
modelData.bindGroupLayouts.push(bindGroupLayout);
const pipelineLayout = device.createPipelineLayout({
bindGroupLayouts: [bindGroupLayout]
});
const pipeline2 = device.createComputePipeline({
layout: pipelineLayout,
compute: {
module: shaderModule,
entryPoint: "main"
}
});
modelData.pipelines.set("default", pipeline2);
}
/**
* Unload a model
*/
unloadModel(modelId) {
const modelData = this.models.get(modelId);
if (modelData) {
for (const buffer of modelData.weights.values()) {
buffer.destroy();
}
this.models.delete(modelId);
}
}
/**
* Ensure runtime is initialized
*/
ensureInitialized() {
if (!this.initialized || !this.device) {
throw new EdgeFlowError("WebGPU runtime is not initialized", ErrorCodes.RUNTIME_NOT_INITIALIZED);
}
}
/**
* Dispose the runtime
*/
dispose() {
for (const modelId of this.models.keys()) {
this.unloadModel(modelId);
}
if (this.device) {
this.device.destroy();
this.device = null;
}
this.adapter = null;
this.initialized = false;
}
};
function createWebGPURuntime() {
return new WebGPURuntime();
}
// dist/backends/webnn.js
init_types();
init_tensor();
var WebNNRuntime = class {
constructor() {
__publicField(this, "name", "webnn");
__publicField(this, "context", null);
__publicField(this, "models", /* @__PURE__ */ new Map());
__publicField(this, "initialized", false);
__publicField(this, "deviceType", "default");
}
get capabilities() {
return {
concurrency: true,
quantization: true,
float16: true,
dynamicShapes: false,
maxBatchSize: 32,
availableMemory: 256 * 1024 * 1024
// Estimated
};
}
/**
* Check if WebNN is available
*/
async isAvailable() {
if (typeof navigator === "undefined")
return false;
if (!navigator.ml)
return false;
try {
const context = await navigator.ml.createContext({ deviceType: "default" });
return context !== null;
} catch {
return false;
}
}
/**
* Initialize the WebNN runtime
*/
async initialize() {
if (this.initialized)
return;
if (!navigator.ml) {
throw new EdgeFlowError("WebNN is not supported in this browser", ErrorCodes.RUNTIME_NOT_AVAILABLE);
}
try {
this.context = await navigator.ml.createContext({
deviceType: "gpu",
powerPreference: "high-performance"
});
this.deviceType = "gpu";
} catch {
try {
this.context = await navigator.ml.createContext({ deviceType: "cpu" });
this.deviceType = "cpu";
} catch (error) {
throw new EdgeFlowError(`Failed to create WebNN context: ${error instanceof Error ? error.message : String(error)}`, ErrorCodes.RUNTIME_INIT_FAILED);
}
}
this.initialized = true;
}
/**
* Load a model
*/
async loadModel(modelData, options = {}) {
this.ensureInitialized();
const config = this.parseModelConfig(modelData);
const modelId = `webnn_${Date.now().toString(36)}`;
const metadata = {
name: config.name || options.metadata?.name || "unknown",
version: config.version || "1.0.0",
inputs: config.inputs.map((i) => ({
name: i.name,
dtype: i.dtype,
shape: i.shape
})),
outputs: config.outputs.map((o) => ({
name: o.name,
dtype: o.dtype,
shape: o.shape
})),
sizeBytes: modelData.byteLength,
quantization: options.quantization ?? "float32",
format: "edgeflow"
};
const model = new LoadedModelImpl(metadata, "webnn", () => this.unloadModel(modelId));
getMemoryManager().trackModel(model, () => model.dispose());
return model;
}
/**
* Run inference
*/
async run(model, inputs) {
this.ensureInitialized();
return this.executeModel(inputs, model.metadata);
}
/**
* Execute model (simplified implementation)
*/
async executeModel(inputs, metadata) {
const outputs = [];
for (const outputSpec of metadata.outputs) {
const outputSize = outputSpec.shape.reduce((a, b) => a * b, 1);
const outputData = new Float32Array(outputSize);
if (inputs.length > 0 && inputs[0]) {
const inputData = inputs[0].toFloat32Array();
for (let i = 0; i < Math.min(outputSize, inputData.length); i++) {
outputData[i] = inputData[i] ?? 0;
}
}
outputs.push(new EdgeFlowTensor(outputData, outputSpec.shape, "float32"));
}
return outputs;
}
/**
* Parse model configuration
*/
parseModelConfig(data) {
try {
const decoder = new TextDecoder();
const text = decoder.decode(new Uint8Array(data, 0, Math.min(1024, data.byteLength)));
if (text.trim().startsWith("{")) {
let jsonEnd = text.indexOf("\n---\n");
if (jsonEnd === -1)
jsonEnd = data.byteLength;
const jsonStr = decoder.decode(new Uint8Array(data, 0, jsonEnd));
return JSON.parse(jsonStr);
}
} catch {
}
return {
name: "unknown",
version: "1.0.0",
inputs: [{ name: "input", shape: [-1, 768], dtype: "float32" }],
outputs: [{ name: "output", shape: [-1, 768], dtype: "float32" }]
};
}
/**
* Unload a model
*/
unloadModel(modelId) {
this.models.delete(modelId);
}
/**
* Ensure runtime is initialized
*/
ensureInitialized() {
if (!this.initialized || !this.context) {
throw new EdgeFlowError("WebNN runtime is not initialized", ErrorCodes.RUNTIME_NOT_INITIALIZED);
}
}
/**
* Get device type
*/
getDeviceType() {
return this.deviceType;
}
/**
* Dispose the runtime
*/
dispose() {
this.models.clear();
this.context = null;
this.initialized = false;
}
};
function createWebNNRuntime() {
return new WebNNRuntime();
}
// dist/backends/wasm.js
init_types();
init_tensor();
var WASMRuntime = class {
constructor() {
__publicField(this, "name", "wasm");
__publicField(this, "module", null);
__publicField(this, "simdSupported", false);
__publicField(this, "models", /* @__PURE__ */ new Map());
__publicField(this, "initialized", false);
}
get capabilities() {
return {
concurrency: false,
// WASM is single-threaded by default
quantization: true,
float16: false,
dynamicShapes: true,
maxBatchSize: 16,
availableMemory: 128 * 1024 * 1024
// 128MB default
};
}
/**
* Check if WASM is available
*/
async isAvailable() {
if (typeof WebAssembly === "undefined")
return false;
try {
const bytes = new Uint8Array([
0,
97,
115,
109,
// Magic number
1,
0,
0,
0
// Version
]);
await WebAssembly.instantiate(bytes);
return true;
} catch {
return false;
}
}
/**
* Initialize the WASM runtime
*/
async initialize() {
if (this.initialized)
return;
this.simdSupported = await this.checkSIMDSupport();
const memory = new WebAssembly.Memory({
initial: 256,
// 16MB initial
maximum: 2048
// 128MB maximum
});
this.module = {
memory,
exports: this.createJSFallback(memory)
};
this.initialized = true;
}
/**
* Check SIMD support
*/
async checkSIMDSupport() {
try {
const simdTest = new Uint8Array([
0,
97,
115,
109,
1,
0,
0,
0,
1,
5,
1,
96,
0,
1,
123,
3,
2,
1,
0,
10,
10,
1,
8,
0,
253,
12,
0,
0,
0,
0,
11
]);
await WebAssembly.instantiate(simdTest);
return true;
} catch {
return false;
}
}
/**
* Create JavaScript fallback for WASM operations
*/
createJSFallback(memory) {
let nextPtr = 0;
const allocations = /* @__PURE__ */ new Map();
return {
malloc: (size) => {
const ptr = nextPtr;
nextPtr += size;
allocations.set(ptr, size);
return ptr;
},
free: (ptr) => {
allocations.delete(ptr);
},
matmul_f32: (aPtr, aRows, aCols, bPtr, _bRows, bCols, outPtr) => {
const view = new Float32Array(memory.buffer);
const aOffset = aPtr / 4;
const bOffset = bPtr / 4;
const outOffset = outPtr / 4;
for (let i = 0; i < aRows; i++) {
for (let j = 0; j < bCols; j++) {
let sum2 = 0;
for (let k = 0; k < aCols; k++) {
sum2 += (view[aOffset + i * aCols + k] ?? 0) * (view[bOffset + k * bCols + j] ?? 0);
}
view[outOffset + i * bCols + j] = sum2;
}
}
},
add_f32: (aPtr, bPtr, outPtr, size) => {
const view = new Float32Array(memory.buffer);
const aOffset = aPtr / 4;
const bOffset = bPtr / 4;
const outOffset = outPtr / 4;
for (let i = 0; i < size; i++) {
view[outOffset + i] = (view[aOffset + i] ?? 0) + (view[bOffset + i] ?? 0);
}
},
mul_f32: (aPtr, bPtr, outPtr, size) => {
const view = new Float32Array(memory.buffer);
const aOffset = aPtr / 4;
const bOffset = bPtr / 4;
const outOffset = outPtr / 4;
for (let i = 0; i < size; i++) {
view[outOffset + i] = (view[aOffset + i] ?? 0) * (view[bOffset + i] ?? 0);
}
},
relu_f32: (inputPtr, outputPtr, size) => {
const view = new Float32Array(memory.buffer);
const inOffset = inputPtr / 4;
const outOffset = outputPtr / 4;
for (let i = 0; i < size; i++) {
view[outOffset + i] = Math.max(0, view[inOffset + i] ?? 0);
}
},
sigmoid_f32: (inputPtr, outputPtr, size) => {
const view = new Float32Array(memory.buffer);
const inOffset = inputPtr / 4;
const outOffset = outputPtr / 4;
for (let i = 0; i < size; i++) {
view[outOffset + i] = 1 / (1 + Math.exp(-(view[inOffset + i] ?? 0)));
}
},
softmax_f32: (inputPtr, outputPtr, size) => {
const view = new Float32Array(memory.buffer);
const inOffset = inputPtr / 4;
const outOffset = outputPtr / 4;
let max = -Infinity;
for (let i = 0; i < size; i++) {
if ((view[inOffset + i] ?? 0) > max)
max = view[inOffset + i] ?? 0;
}
let sum2 = 0;
for (let i = 0; i < size; i++) {
view[outOffset + i] = Math.exp((view[inOffset + i] ?? 0) - max);
sum2 += view[outOffset + i] ?? 0;
}
for (let i = 0; i < size; i++) {
view[outOffset + i] = (view[outOffset + i] ?? 0) / sum2;
}
}
};
}
/**
* Load a model
*/
async loadModel(modelData, options = {}) {
this.ensureInitialized();
const config = this.parseModelConfig(modelData);
const wasmData = {
weights: /* @__PURE__ */ new Map(),
config,
executionOrder: config.layers.map((l) => l.name)
};
await this.loadWeights(modelData, wasmData);
const modelId = `wasm_${Date.now().toString(36)}`;
this.models.set(modelId, wasmData);
const metadata = {
name: config.name || options.metadata?.name || "unknown",
version: config.version || "1.0.0",
inputs: config.inputs.map((i) => ({
name: i.name,
dtype: i.dtype,
shape: i.shape
})),
outputs: config.outputs.map((o) => ({
name: o.name,
dtype: o.dtype,
shape: o.shape
})),
sizeBytes: modelData.byteLength,
quantization: options.quantization ?? "float32",
format: "edgeflow"
};
const model = new LoadedModelImpl(metadata, "wasm", () => this.unloadModel(modelId));
getMemoryManager().trackModel(model, () => model.dispose());
return model;
}
/**
* Run inference
*/
async run(model, inputs) {
this.ensureInitialized();
return this.executeModel(inputs, model.metadata);
}
/**
* Execute model
*/
async executeModel(inputs, metadata) {
const outputs = [];
for (const outputSpec of metadata.outputs) {
const outputSize = outputSpec.shape.reduce((a, b) => a * b, 1);
let outputTensor;
if (inputs.length > 0 && inputs[0]) {
const inputTensor = inputs[0];
if (outputSpec.name.includes("logits") || outputSpec.name.includes("class")) {
outputTensor = softmax(inputTensor);
} else if (outputSpec.name.includes("relu")) {
outputTensor = relu(inputTensor);
} else if (outputSpec.name.includes("sigmoid")) {
outputTensor = sigmoid(inputTensor);
} else {
const outputData = new Float32Array(outputSize);
const inputData = inputTensor.toFloat32Array();
for (let i = 0; i < Math.min(outputSize, inputData.length); i++) {
outputData[i] = inputData[i] ?? 0;
}
outputTensor = new EdgeFlowTensor(outputData, outputSpec.shape, "float32");
}
} else {
outputTensor = new EdgeFlowTensor(new Float32Array(outputSize), outputSpec.shape, "float32");
}
outputs.push(outputTensor);
}
return outputs;
}
/**
* Parse model configuration
*/
parseModelConfig(data) {
try {
const decoder = new TextDecoder();
const text = decoder.decode(new Uint8Array(data, 0, Math.min(2048, data.byteLength)));
if (text.trim().startsWith("{")) {
let jsonEnd = text.indexOf("\n---\n");
if (jsonEnd === -1) {
try {
return JSON.parse(text);
} catch {
jsonEnd = data.byteLength;
}
}
const jsonStr = decoder.decode(new Uint8Array(data, 0, jsonEnd));
return JSON.parse(jsonStr);
}
} catch {
}
return {
name: "unknown",
version: "1.0.0",
layers: [],
inputs: [{ name: "input", shape: [-1, 768], dtype: "float32" }],
outputs: [{ name: "output", shape: [-1, 768], dtype: "float32" }]
};
}
/**
* Load weights into WASM memory
*/
async loadWeights(_modelData, _wasmData) {
}
/**
* Unload a model
*/
unloadModel(modelId) {
const modelData = this.models.get(modelId);
if (modelData && this.module) {
for (const weight of modelData.weights.values()) {
this.module.exports.free(weight.ptr);
}
}
this.models.delete(modelId);
}
/**
* Ensure runtime is initialized
*/
ensureInitialized() {
if (!this.initialized || !this.module) {
throw new EdgeFlowError("WASM runtime is not initialized", ErrorCodes.RUNTIME_NOT_INITIALIZED);
}
}
/**
* Check if SIMD is supported
*/
hasSIMDSupport() {
return this.simdSupported;
}
/**
* Dispose the runtime
*/
dispose() {
for (const modelId of this.models.keys()) {
this.unloadModel(modelId);
}
this.module = null;
this.initialized = false;
}
};
function createWASMRuntime() {
return new WASMRuntime();
}
// dist/backends/onnx.js
init_types();
init_tensor();
var ort = null;
async function getOrt() {
if (ort)
return ort;
try {
ort = await import("onnxruntime-web/wasm");
return ort;
} catch {
return null;
}
}
async function isOnnxAvailable() {
return await getOrt() != null;
}
var sessionStore = /* @__PURE__ */ new Map();
var ONNXRuntime = class {
constructor() {
__publicField(this, "name", "wasm");
// Register as wasm since it's the fallback
__publicField(this, "initialized", false);
__publicField(this, "executionProvider", "wasm");
}
get capabilities() {
return {
concurrency: true,
quantization: true,
float16: this.executionProvider === "webgpu",
dynamicShapes: true,
maxBatchSize: 32,
availableMemory: 512 * 1024 * 1024
// 512MB
};
}
/**
* Check if ONNX Runtime is available (peer dependency installed)
*/
async isAvailable() {
return isOnnxAvailable();
}
/**
* Initialize the ONNX runtime
*/
async initialize() {
if (this.initialized)
return;
const ortModule = await getOrt();
if (!ortModule) {
throw new EdgeFlowError("onnxruntime-web is not installed. Install it with: npm install onnxruntime-web", ErrorCodes.RUNTIME_NOT_AVAILABLE);
}
if (typeof window !== "undefined" && ortModule.env?.wasm) {
ortModule.env.wasm.wasmPaths = "/ort/";
ortModule.env.wasm.numThreads = 1;
}
this.initialized = true;
}
/**
* Load a model from ArrayBuffer
*/
async loadModel(modelData, options = {}) {
if (!this.initialized) {
await this.initialize();
}
try {
const ortModule = await getOrt();
if (!ortModule) {
throw new Error("onnxruntime-web is not installed");
}
const sessionOptions = {
executionProviders: ["wasm"],
graphOptimizationLevel: "all"
};
const modelBytes = new Uint8Array(modelData);
const session = await ortModule.InferenceSession.create(modelBytes, sessionOptions);
const inputNames = session.inputNames;
const outputNames = session.outputNames;
const modelId = `onnx_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
sessionStore.set(modelId, {
session,
inputNames: [...inputNames],
outputNames: [...outputNames]
});
const metadata = {
name: options.metadata?.name ?? "onnx-model",
version: "1.0.0",
inputs: inputNames.map((name) => ({
name,
dtype: "float32",
shape: [-1]
// Dynamic shape
})),
outputs: outputNames.map((name) => ({
name,
dtype: "float32",
shape: [-1]
})),
sizeBytes: modelData.byteLength,
quantization: options.quantization ?? "float32",
format: "onnx"
};
const model = new LoadedModelImpl(metadata, "wasm", () => this.unloadModel(modelId));
Object.defineProperty(model, "id", { value: modelId, writable: false });
getMemoryManager().trackModel(model, () => model.dispose());
return model;
} catch (error) {
throw new EdgeFlowError(`Failed to load ONNX model: ${error instanceof Error ? error.message : String(error)}`, ErrorCodes.MODEL_LOAD_FAILED, { error });
}
}
/**
* Run inference
*/
async run(model, inputs) {
const sessionData = sessionStore.get(model.id);
if (!sessionData) {
throw new EdgeFlowError(`ONNX session not found for model ${model.id}`, ErrorCodes.MODEL_NOT_LOADED, { modelId: model.id });
}
const { session, inputNames, outputNames } = sessionData;
try {
const ortModule = await getOrt();
const feeds = {};
for (let i = 0; i < Math.min(inputs.length, inputNames.length); i++) {
const inputName = inputNames[i];
const inputTensor = inputs[i];
if (inputName && inputTensor) {
const dtype = inputTensor.dtype;
let ortTensor;
if (dtype === "int64") {
const data = inputTensor.data;
ortTensor = new ortModule.Tensor("int64", data, inputTensor.shape);
} else if (dtype === "int32") {
const data = inputTensor.data;
ortTensor = new ortModule.Tensor("int32", data, inputTensor.shape);
} else {
const data = inputTensor.toFloat32Array();
ortTensor = new ortModule.Tensor("float32", data, inputTensor.shape);
}
feeds[inputName] = ortTensor;
}
}
const results = await session.run(feeds);
const outputs = [];
for (const outputName of outputNames) {
const ortTensor = results[outputName];
if (ortTensor) {
const data = ortTensor.data;
const shape = Array.from(ortTensor.dims).map((d) => Number(d));
outputs.push(new EdgeFlowTensor(new Float32Array(data), shape, "float32"));
}
}
return outputs;
} catch (error) {
throw new EdgeFlowError(`ONNX inference failed: ${error instanceof Error ? error.message : String(error)}`, ErrorCodes.INFERENCE_FAILED, { modelId: model.id, error });
}
}
/**
* Run inference with named inputs
*/
async runNamed(model, namedInputs) {
const sessionData = sessionStore.get(model.id);
if (!sessionData) {
throw new EdgeFlowError(`ONNX session not found for model ${model.id}`, ErrorCodes.MODEL_NOT_LOADED, { modelId: model.id });
}
const { session, inputNames, outputNames } = sessionData;
try {
const ortModule = await getOrt();
const feeds = {};
for (const [inputName, inputTensor] of namedInputs) {
const tensor2 = inputTensor;
const dtype = tensor2.dtype;
let ortTensor;
if (dtype === "int64") {
const data = tensor2.data;
ortTensor = new ortModule.Tensor("int64", data, tensor2.shape);
} else if (dtype === "int32") {
const data = tensor2.data;
ortTensor = new ortModule.Tensor("int32", data, tensor2.shape);
} else {
const data = tensor2.toFloat32Array();
ortTensor = new ortModule.Tensor("float32", data, tensor2.shape);
}
feeds[inputName] = ortTensor;
}
const results = await session.run(feeds);
const outputs = [];
for (const outputName of outputNames) {
const ortTensor = results[outputName];
if (ortTensor) {
const data = ortTensor.data;
const shape = Array.from(ortTensor.dims).map((d) => Number(d));
outputs.push(new EdgeFlowTensor(new Float32Array(data), shape, "float32"));
}
}
return outputs;
} catch (error) {
throw new EdgeFlowError(`ONNX inference failed: ${error instanceof Error ? error.message : String(error)}`, ErrorCodes.INFERENCE_FAILED, { modelId: model.id, expectedInputs: inputNames, providedInputs: Array.from(namedInputs.keys()), error });
}
}
/**
* Unload a model
*/
async unloadModel(modelId) {
const sessionData = sessionStore.get(modelId);
if (sessionData) {
sessionStore.delete(modelId);
}
}
/**
* Dispose the runtime
*/
dispose() {
sessionStore.clear();
this.initialized = false;
}
};
function createONNXRuntime() {
return new ONNXRuntime();
}
// dist/backends/transformers-adapter.js
init_types();
init_tensor();
var sessionStore2 = /* @__PURE__ */ new Map();
var adapterOptions = null;
var TransformersAdapterRuntime = class {
constructor() {
__publicField(this, "name", "wasm");
}
// registers under the wasm slot
get capabilities() {
return {
concurrency: true,
quantization: true,
float16: true,
dynamicShapes: true,
maxBatchSize: 128,
availableMemory: 1024 * 1024 * 1024
};
}
async isAvailable() {
return adapterOptions?.pipelineFactory != null;
}
async initialize() {
if (!adapterOptions?.pipelineFactory) {
throw new EdgeFlowError("TransformersAdapterRuntime requires a pipelineFactory. Call useTransformersBackend({ pipelineFactory }) first.", ErrorCodes.RUNTIME_INIT_FAILED);
}
}
async loadModel(modelData, options = {}) {
const modelName = options.metadata?.name ?? "default";
const metadata = {
name: modelName,
version: "1.0.0",
inputs: [{ name: "input", dtype: "float32", shape: [-1] }],
outputs: [{ name: "output", dtype: "float32", shape: [-1] }],
sizeBytes: modelData.byteLength || 0,
quantization: options.quantization ?? "float32",
format: "onnx"
};
const modelId = `tjs_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 6)}`;
const model = new LoadedModelImpl(metadata, this.name, () => {
const session = sessionStore2.get(modelId);
if (session?.instance.dispose) {
session.instance.dispose();
}
sessionStore2.delete(modelId);
});
getMemoryManager().trackModel(model, () => model.dispose());
return model;
}
/**
* Load a transformers.js pipeline by task + model name
* (called by the higher-level adapter pipeline, not via the
* standard loadModel path).
*/
async loadPipeline(task, model, pipelineOptions) {
if (!adapterOptions?.pipelineFactory) {
throw new EdgeFlowError("Adapter not initialised", ErrorCodes.RUNTIME_NOT_INITIALIZED);
}
const opts = { ...pipelineOptions };
if (adapterOptions.device)
opts["device"] = adapterOptions.device;
if (adapterOptions.dtype)
opts["dtype"] = adapterOptions.dtype;
const instance = await adapterOptions.pipelineFactory(task, model, opts);
const modelId = `tjs_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 6)}`;
sessionStore2.set(modelId, { instance, task, model });
return modelId;
}
/**
* Run inference by passing the raw input to the transformers.js pipeline.
* The result is returned as a single EdgeFlowTensor wrapping the JSON-encoded output
* (since transformers.js returns task-specific objects, not raw tensors).
*/
async run(model, inputs) {
const session = sessionStore2.get(model.id);
if (!session) {
throw new EdgeFlowError(`No transformers.js session for model ${model.id}`, ErrorCodes.MODEL_NOT_LOADED);
}
const inputData = inputs[0]?.toFloat32Array() ?? new Float32Array(0);
const result = await session.instance(inputData);
const resultArray = Array.isArray(result) ? new Float32Array(result.flat(Infinity)) : new Float32Array([0]);
return [new EdgeFlowTensor(resultArray, [resultArray.length], "float32")];
}
/**
* High-level: run the transformers.js pipeline directly with arbitrary input.
* Returns the raw result object (not a tensor).
*/
async runDirect(modelId, input, options) {
const session = sessionStore2.get(modelId);
if (!session) {
throw new EdgeFlowError(`No transformers.js session for model ${modelId}`, ErrorCodes.MODEL_NOT_LOADED);
}
return session.instance(input, options);
}
dispose() {
for (const [id, session] of sessionStore2) {
if (session.instance.dispose) {
session.instance.dispose();
}
sessionStore2.delete(id);
}
}
};
var adapterRuntime = null;
function useTransformersBackend(options) {
adapterOptions = options;
adapterRuntime = new TransformersAdapterRuntime();
registerRuntime("wasm", () => adapterRuntime);
}
function getTransformersAdapter() {
return adapterRuntime;
}
// dist/backends/index.js
function registerAllBackends() {
registerRuntime("wasm", createONNXRuntime);
}
registerAllBackends();
// dist/utils/cache.js
var Cache = class {
constructor(options = {}) {
__publicField(this, "options");
__publicField(this, "cache", /* @__PURE__ */ new Map());
__publicField(this, "currentSize", 0);
__publicField(this, "hits", 0);
__publicField(this, "misses", 0);
this.options = {
strategy: options.strategy ?? "lru",
maxSize: options.maxSize ?? 100 * 1024 * 1024,
// 100MB
maxEntries: options.maxEntries ?? 1e3,
ttl: options.ttl ?? 0,
// 0 = no TTL
persistent: options.persistent ?? false,
name: options.name ?? "edgeflow-cache"
};
if (this.options.persistent) {
this.loadFromStorage();
}
}
/**
* Get value from cache
*/
get(key) {
const entry = this.cache.get(key);
if (!entry) {
this.misses++;
return void 0;
}
if (entry.ttl && Date.now() - entry.createdAt > entry.ttl) {
this.delete(key);
this.misses++;
return void 0;
}
entry.accessedAt = Date.now();
entry.accessCount++;
this.hits++;
return entry.value;
}
/**
* Set value in cache
*/
set(key, value, size, ttl) {
if (this.cache.has(key)) {
this.delete(key);
}
while ((this.currentSize + size > this.options.maxSize || this.cache.size >= this.options.maxEntries) && this.cache.size > 0) {
this.evict();
}
const entryTtl = ttl !== void 0 ? ttl : this.options.ttl > 0 ? this.options.ttl : void 0;
const entry = {
value,
size,
createdAt: Date.now(),
accessedAt: Date.now(),
accessCount: 1,
ttl: entryTtl
};
this.cache.set(key, entry);
this.currentSize += size;
if (this.options.persistent) {
this.saveToStorage();
}
}
/**
* Check if key exists
*/
has(key) {
const entry = this.cache.get(key);
if (!entry)
return false;
if (entry.ttl && Date.now() - entry.createdAt > entry.ttl) {
this.delete(key);
return false;
}
return true;
}
/**
* Delete entry
*/
delete(key) {
const entry = this.cache.get(key);
if (entry) {
this.currentSize -= entry.size;
this.cache.delete(key);
if (this.options.persistent) {
this.saveToStorage();
}
return true;
}
return false;
}
/**
* Clear the cache
*/
clear() {
this.cache.clear();
this.currentSize = 0;
this.hits = 0;
this.misses = 0;
if (this.options.persistent) {
this.clearStorage();
}
}
/**
* Get cache statistics
*/
getStats() {
const total = this.hits + this.misses;
return {
entries: this.cache.size,
size: this.currentSize,
hits: this.hits,
misses: this.misses,
hitRate: total > 0 ? this.hits / total : 0
};
}
/**
* Evict an entry based on strategy
*/
evict() {
let keyToEvict = null;
switch (this.options.strategy) {
case "lru":
keyToEvict = this.findLRU();
break;
case "lfu":
keyToEvict = this.findLFU();
break;
case "fifo":
keyToEvict = this.findOldest();
break;
case "ttl":
keyToEvict = this.findExpired() ?? this.findOldest();
break;
}
if (keyToEvict) {
this.delete(keyToEvict);
}
}
/**
* Find least recently used entry
*/
findLRU() {
let oldest = null;
let oldestTime = Infinity;
for (const [key, entry] of this.cache) {
if (entry.accessedAt < oldestTime) {
oldestTime = entry.accessedAt;
oldest = key;
}
}
return oldest;
}
/**
* Find least frequently used entry
*/
findLFU() {
let lfu = null;
let minCount = Infinity;
for (const [key, entry] of this.cache) {
if (entry.accessCount < minCount) {
minCount = entry.accessCount;
lfu = key;
}
}
return lfu;
}
/**
* Find oldest entry (FIFO)
*/
findOldest() {
let oldest = null;
let oldestTime = Infinity;
for (const [key, entry] of this.cache) {
if (entry.createdAt < oldestTime) {
oldestTime = entry.createdAt;
oldest = key;
}
}
return oldest;
}
/**
* Find expired entry
*/
findExpired() {
const now = Date.now();
for (const [key, entry] of this.cache) {
if (entry.ttl && now - entry.createdAt > entry.ttl) {
return key;
}
}
return null;
}
/**
* Load cache from IndexedDB
*/
async loadFromStorage() {
if (typeof indexedDB === "undefined")
return;
try {
const db = await this.openDB();
const tx = db.transaction("cache", "readonly");
const store = tx.objectStore("cache");
const request = store.getAll();
return new Promise((resolve, reject) => {
request.onsuccess = () => {
const entries = request.result;
for (const { key, entry } of entries) {
this.cache.set(key, entry);
this.currentSize += entry.size;
}
resolve();
};
request.onerror = () => reject(request.error);
});
} catch {
}
}
/**
* Save cache to IndexedDB
*/
async saveToStorage() {
if (typeof indexedDB === "undefined")
return;
try {
const db = await this.openDB();
const tx = db.transaction("cache", "readwrite");
const store = tx.objectStore("cache");
store.clear();
for (const [key, entry] of this.cache) {
store.put({ key, entry });
}
return new Promise((resolve, reject) => {
tx.oncomplete = () => resolve();
tx.onerror = () => reject(tx.error);
});
} catch {
}
}
/**
* Clear IndexedDB storage
*/
async clearStorage() {
if (typeof indexedDB === "undefined")
return;
try {
const db = await this.openDB();
const tx = db.transaction("cache", "readwrite");
const store = tx.objectStore("cache");
store.clear();
} catch {
}
}
/**
* Open IndexedDB database
*/
openDB() {
return new Promise((resolve, reject) => {
const request = indexedDB.open(this.options.name, 1);
request.onupgradeneeded = () => {
const db = request.result;
if (!db.objectStoreNames.contains("cache")) {
db.createObjectStore("cache", { keyPath: "key" });
}
};
request.onsuccess = () => resolve(request.result);
request.onerror = () => reject(request.error);
});
}
};
var InferenceCache = class extends Cache {
/**
* Generate cache key from input
*/
generateKey(modelId, input) {
const inputArray = Array.isArray(input) ? input : Array.from(input);
const hash = this.hashArray(inputArray);
return `${modelId}:${hash}`;
}
/**
* Simple hash function for arrays
*/
hashArray(arr) {
let hash = 0;
const sample = arr.length > 100 ? arr.filter((_, i) => i % Math.floor(arr.length / 100) === 0) : arr;
for (let i = 0; i < sample.length; i++) {
const value = sample[i] ?? 0;
hash = (hash << 5) - hash + (value * 1e3 | 0);
hash |= 0;
}
return hash.toString(36);
}
};
var ModelDownloadCache = class {
constructor(cacheName = "edgeflow-models") {
__publicField(this, "cacheName");
__publicField(this, "cache", null);
this.cacheName = cacheName;
}
/**
* Initialize cache
*/
async ensureCache() {
if (!this.cache) {
if (typeof caches === "undefined") {
throw new Error("Cache API is not available");
}
this.cache = await caches.open(this.cacheName);
}
return this.cache;
}
/**
* Get cached response
*/
async get(url) {
try {
const cache = await this.ensureCache();
return await cache.match(url) ?? void 0;
} catch {
return void 0;
}
}
/**
* Store response in cache
*/
async put(url, response) {
try {
const cache = await this.ensureCache();
await cache.put(url, response.clone());
} catch {
}
}
/**
* Delete cached response
*/
async delete(url) {
try {
const cache = await this.ensureCache();
return await cache.delete(url);
} catch {
return false;
}
}
/**
* Clear all cached models
*/
async clear() {
try {
await caches.delete(this.cacheName);
this.cache = null;
} catch {
}
}
/**
* Get all cached URLs
*/
async keys() {
try {
const cache = await this.ensureCache();
const requests = await cache.keys();
return requests.map((r) => r.url);
} catch {
return [];
}
}
};
function createCache(preset = "medium", options = {}) {
const presets = {
small: {
maxSize: 10 * 1024 * 1024,
// 10MB
maxEntries: 100
},
medium: {
maxSize: 100 * 1024 * 1024,
// 100MB
maxEntries: 500
},
large: {
maxSize: 500 * 1024 * 1024,
// 500MB
maxEntries: 2e3
},
custom: {}
};
return new Cache({ ...presets[preset], ...options });
}
// dist/pipelines/base.js
var BasePipeline = class {
constructor(config) {
__publicField(this, "model", null);
__publicField(this, "config");
__publicField(this, "modelCache");
__publicField(this, "downloadCache");
__publicField(this, "isReady", false);
this.config = config;
this.modelCache = new ModelCache();
this.downloadCache = new ModelDownloadCache();
}
/**
* Initialize the pipeline (load model).
*
* Skips model loading when `config.model === 'default'` — concrete
* subclasses that define their own DEFAULT_MODELS handle all model
* loading in their overridden `initialize()` methods, so the base
* should not attempt to fetch a URL called "default".
*/
async initialize() {
if (this.isReady && this.model)
return;
if (this.config.model === "default") {
this.isReady = true;
return;
}
const cachedModel = this.modelCache.get(this.config.model);
if (cachedModel) {
this.model = cachedModel;
this.isReady = true;
return;
}
this.model = await this.loadModelWithCache(this.config.model);
this.isReady = true;
}
/**
* Load model with caching
*/
async loadModelWithCache(modelPath) {
const cachedResponse = await this.downloadCache.get(modelPath);
if (cachedResponse) {
}
try {
const response = await fetch(modelPath);
if (response.ok) {
await this.downloadCache.put(modelPath, response.clone());
}
} catch {
}
return loadModel(modelPath, {
runtime: this.config.runtime,
quantization: this.config.quantization,
cache: this.config.cache
});
}
/**
* Run inference (single input)
*/
async run(input, options) {
await this.initialize();
const startTime = performance.now();
const preprocessed = await this.preprocess(input);
const outputs = await runInference(this.model, preprocessed);
const result = await this.postprocess(outputs, options);
if (result && typeof result === "object" && "processingTime" in result) {
result.processingTime = performance.now() - startTime;
}
return result;
}
/**
* Run batch inference
*/
async runBatch(inputs, options) {
await this.initialize();
const results = await Promise.all(inputs.map((input) => this.run(input, options)));
return results;
}
/**
* Get the task type
*/
get task() {
return this.config.task;
}
/**
* Check if pipeline is ready
*/
get ready() {
return this.isReady;
}
/**
* Dispose the pipeline
*/
dispose() {
if (this.model) {
this.model.dispose();
this.model = null;
}
this.isReady = false;
}
};
var pipelineFactories = /* @__PURE__ */ new Map();
function registerPipeline(task, factory) {
pipelineFactories.set(task, factory);
}
function getPipelineFactory(task) {
return pipelineFactories.get(task);
}
var SENTIMENT_LABELS = ["negative", "positive"];
var EMOTION_LABELS = [
"anger",
"disgust",
"fear",
"joy",
"sadness",
"surprise",
"neutral"
];
var IMAGENET_LABELS = [
"tench",
"goldfish",
"great white shark",
"tiger shark",
"hammerhead",
"electric ray",
"stingray",
"cock",
"hen",
"ostrich"
];
// dist/pipelines/text-classification.js
init_tensor();
// dist/utils/tokenizer.js
init_types();
var Tokenizer = class _Tokenizer {
constructor() {
__publicField(this, "vocab", /* @__PURE__ */ new Map());
__publicField(this, "reverseVocab", /* @__PURE__ */ new Map());
__publicField(this, "merges", /* @__PURE__ */ new Map());
__publicField(this, "addedTokens", /* @__PURE__ */ new Map());
__publicField(this, "specialTokens", /* @__PURE__ */ new Set());
__publicField(this, "modelType", "BPE");
__publicField(this, "unkToken", "[UNK]");
__publicField(this, "continuingSubwordPrefix", "##");
// Special token IDs
__publicField(this, "padTokenId", 0);
__publicField(this, "unkTokenId", 0);
__publicField(this, "clsTokenId");
__publicField(this, "sepTokenId");
__publicField(this, "maskTokenId");
__publicField(this, "bosTokenId");
__publicField(this, "eosTokenId");
// Config
__publicField(this, "maxLength", 512);
__publicField(this, "doLowerCase", false);
__publicField(this, "stripAccents", false);
// Post-processor config
__publicField(this, "postProcessor");
// Byte encoder for BPE
__publicField(this, "byteEncoder", /* @__PURE__ */ new Map());
__publicField(this, "byteDecoder", /* @__PURE__ */ new Map());
this.initByteEncoder();
}
/**
* Initialize byte encoder/decoder for BPE
*/
initByteEncoder() {
const bytes = [];
for (let i = 33; i <= 126; i++)
bytes.push(i);
for (let i = 161; i <= 172; i++)
bytes.push(i);
for (let i = 174; i <= 255; i++)
bytes.push(i);
const chars = [...bytes];
let n = 0;
for (let i = 0; i < 256; i++) {
if (!bytes.includes(i)) {
bytes.push(i);
chars.push(256 + n);
n++;
}
}
for (let i = 0; i < bytes.length; i++) {
const byte = bytes[i];
const char = String.fromCharCode(chars[i]);
this.byteEncoder.set(byte, char);
this.byteDecoder.set(char, byte);
}
}
/**
* Load from HuggingFace tokenizer.json
*/
static async fromJSON(json) {
const tokenizer = new _Tokenizer();
const data = typeof json === "string" ? JSON.parse(json) : json;
if (data.model) {
tokenizer.modelType = data.model.type;
if (data.model.vocab) {
if (Array.isArray(data.model.vocab)) {
const unigramVocab = data.model.vocab;
for (let i = 0; i < unigramVocab.length; i++) {
const entry = unigramVocab[i];
const token = Array.isArray(entry) ? entry[0] : entry;
tokenizer.vocab.set(token, i);
tokenizer.reverseVocab.set(i, token);
}
} else {
for (const [token, id] of Object.entries(data.model.vocab)) {
tokenizer.vocab.set(token, id);
tokenizer.reverseVocab.set(id, token);
}
}
}
if (data.model.merges) {
for (let i = 0; i < data.model.merges.length; i++) {
tokenizer.merges.set(data.model.merges[i], i);
}
}
tokenizer.unkToken = data.model.unk_token ?? "[UNK]";
tokenizer.continuingSubwordPrefix = data.model.continuing_subword_prefix ?? "##";
}
if (data.added_tokens) {
for (const token of data.added_tokens) {
tokenizer.addedTokens.set(token.content, token.id);
tokenizer.reverseVocab.set(token.id, token.content);
if (token.special) {
tokenizer.specialTokens.add(token.content);
}
const content = token.content.toLowerCase();
if (content.includes("pad"))
tokenizer.padTokenId = token.id;
if (content.includes("unk"))
tokenizer.unkTokenId = token.id;
if (content.includes("cls") || content === "[cls]")
tokenizer.clsTokenId = token.id;
if (content.includes("sep") || content === "[sep]")
tokenizer.sepTokenId = token.id;
if (content.includes("mask"))
tokenizer.maskTokenId = token.id;
if (content.includes("bos") || content === "")
tokenizer.bosTokenId = token.id;
if (content.includes("eos") || content === " ")
tokenizer.eosTokenId = token.id;
}
}
if (data.normalizer) {
tokenizer.doLowerCase = data.normalizer.lowercase ?? false;
tokenizer.stripAccents = data.normalizer.strip_accents ?? false;
}
if (data.truncation) {
tokenizer.maxLength = data.truncation.max_length;
}
if (data.post_processor) {
tokenizer.postProcessor = data.post_processor;
}
return tokenizer;
}
/**
* Load from URL (tokenizer.json)
*/
static async fromUrl(url) {
const response = await fetch(url);
if (!response.ok) {
throw new EdgeFlowError(`Failed to load tokenizer from ${url}: ${response.status}`, ErrorCodes.MODEL_NOT_FOUND);
}
const json = await response.json();
return _Tokenizer.fromJSON(json);
}
/**
* Load from HuggingFace Hub
*/
static async fromHuggingFace(modelId, options) {
const revision = options?.revision ?? "main";
const url = `https://huggingface.co/${modelId}/resolve/${revision}/tokenizer.json`;
return _Tokenizer.fromUrl(url);
}
/**
* Normalize text
*/
normalize(text) {
let result = text;
if (this.doLowerCase) {
result = result.toLowerCase();
}
if (this.stripAccents) {
result = result.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
}
result = result.replace(/\s+/g, " ").trim();
return result;
}
/**
* Pre-tokenize text (split into words)
*/
preTokenize(text) {
const pattern = /'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+/gu;
const matches = text.match(pattern);
return matches ?? [text];
}
/**
* Encode text to bytes (for BPE)
*/
textToBytes(text) {
const encoder = new TextEncoder();
const bytes = encoder.encode(text);
return Array.from(bytes).map((b) => this.byteEncoder.get(b) ?? "").join("");
}
/**
* Decode bytes to text (for BPE)
*/
bytesToText(text) {
const bytes = new Uint8Array(text.split("").map((c) => this.byteDecoder.get(c) ?? 0));
const decoder = new TextDecoder("utf-8", { fatal: false });
return decoder.decode(bytes);
}
/**
* Get BPE pairs from word
*/
getPairs(word) {
const pairs = /* @__PURE__ */ new Set();
for (let i = 0; i < word.length - 1; i++) {
pairs.add(`${word[i]} ${word[i + 1]}`);
}
return pairs;
}
/**
* Apply BPE to a word
*/
bpe(token) {
if (this.vocab.has(token)) {
return [token];
}
let word = token.split("");
let pairs = this.getPairs(word);
if (pairs.size === 0) {
return [token];
}
while (true) {
let minPair = null;
let minRank = Infinity;
for (const pair of pairs) {
const rank = this.merges.get(pair);
if (rank !== void 0 && rank < minRank) {
minRank = rank;
minPair = pair;
}
}
if (minPair === null)
break;
const parts = minPair.split(" ");
const first = parts[0];
const second = parts[1];
if (!first || !second)
break;
const newWord = [];
let i = 0;
while (i < word.length) {
const j = word.indexOf(first, i);
if (j === -1) {
newWord.push(...word.slice(i));
break;
}
newWord.push(...word.slice(i, j));
if (word[j] === first && j < word.length - 1 && word[j + 1] === second) {
newWord.push(first + second);
i = j + 2;
} else {
newWord.push(word[j]);
i = j + 1;
}
}
word = newWord;
if (word.length === 1)
break;
pairs = this.getPairs(word);
}
return word;
}
/**
* WordPiece tokenization
*/
wordPiece(word) {
if (this.vocab.has(word)) {
return [word];
}
const tokens = [];
let start = 0;
while (start < word.length) {
let end = word.length;
let curSubstr = null;
while (start < end) {
let substr = word.slice(start, end);
if (start > 0) {
substr = this.continuingSubwordPrefix + substr;
}
if (this.vocab.has(substr)) {
curSubstr = substr;
break;
}
end--;
}
if (curSubstr === null) {
tokens.push(this.unkToken);
start++;
} else {
tokens.push(curSubstr);
start = end;
}
}
return tokens;
}
/**
* Tokenize a single word
*/
tokenizeWord(word) {
if (this.addedTokens.has(word)) {
return [word];
}
switch (this.modelType) {
case "BPE": {
const byteStr = this.textToBytes(word);
return this.bpe(byteStr);
}
case "WordPiece":
return this.wordPiece(word);
case "Unigram":
return this.unigramTokenize(word);
default:
return this.vocab.has(word) ? [word] : [this.unkToken];
}
}
/**
* Greedy longest-match tokenizer for SentencePiece Unigram models.
* Adds the U+2581 (▁) word-start prefix expected by SPM-based models.
*/
unigramTokenize(word) {
const prefixedWord = "\u2581" + word;
const tokens = [];
let start = 0;
const text = prefixedWord;
while (start < text.length) {
let end = text.length;
let found = false;
while (end > start) {
const sub2 = text.slice(start, end);
if (this.vocab.has(sub2)) {
tokens.push(sub2);
start = end;
found = true;
break;
}
end--;
}
if (!found) {
const ch = text[start];
tokens.push(this.vocab.has(ch) ? ch : this.unkToken);
start++;
}
}
return tokens.length > 0 ? tokens : [this.unkToken];
}
/**
* Main tokenization
*/
tokenize(text) {
const normalized = this.normalize(text);
const tokens = [];
let remaining = normalized;
const sortedAddedTokens = Array.from(this.addedTokens.keys()).sort((a, b) => b.length - a.length);
for (const addedToken of sortedAddedTokens) {
if (remaining.includes(addedToken)) {
const parts = remaining.split(addedToken);
const newRemaining = [];
for (let i = 0; i < parts.length; i++) {
if (parts[i]) {
newRemaining.push(parts[i]);
}
if (i < parts.length - 1) {
tokens.push(addedToken);
}
}
remaining = newRemaining.join(" ");
}
}
if (remaining.trim()) {
const words = this.preTokenize(remaining);
for (const word of words) {
if (!word)
continue;
const wordTokens = this.tokenizeWord(word);
tokens.push(...wordTokens);
}
}
return tokens;
}
/**
* Convert tokens to IDs
*/
convertTokensToIds(tokens) {
return tokens.map((token) => {
const addedId = this.addedTokens.get(token);
if (addedId !== void 0)
return addedId;
const vocabId = this.vocab.get(token);
if (vocabId !== void 0)
return vocabId;
return this.unkTokenId;
});
}
/**
* Convert IDs to tokens
*/
convertIdsToTokens(ids) {
return ids.map((id) => this.reverseVocab.get(id) ?? this.unkToken);
}
/**
* Apply post-processing (add special tokens)
*/
postProcess(ids, pairIds) {
if (!this.postProcessor) {
const result2 = [];
const typeIds2 = [];
if (this.clsTokenId !== void 0) {
result2.push(this.clsTokenId);
typeIds2.push(0);
}
result2.push(...ids);
typeIds2.push(...ids.map(() => 0));
if (this.sepTokenId !== void 0) {
result2.push(this.sepTokenId);
typeIds2.push(0);
}
if (pairIds) {
result2.push(...pairIds);
typeIds2.push(...pairIds.map(() => 1));
if (this.sepTokenId !== void 0) {
result2.push(this.sepTokenId);
typeIds2.push(1);
}
}
return { ids: result2, typeIds: typeIds2 };
}
const template = pairIds ? this.postProcessor.pair : this.postProcessor.single;
if (!template) {
return { ids, typeIds: ids.map(() => 0) };
}
const result = [];
const typeIds = [];
for (const item of template) {
if ("SpecialToken" in item) {
const specialToken = this.postProcessor.special_tokens?.[item.SpecialToken.id];
if (specialToken) {
result.push(...specialToken.ids);
typeIds.push(...specialToken.ids.map(() => item.SpecialToken.type_id));
}
} else if ("Sequence" in item) {
const seqIds = item.Sequence.id === "A" ? ids : pairIds ?? [];
result.push(...seqIds);
typeIds.push(...seqIds.map(() => item.Sequence.type_id));
}
}
return { ids: result, typeIds };
}
/**
* Encode text
*/
encode(text, options = {}) {
const { addSpecialTokens = true, maxLength = this.maxLength, padding = "max_length", truncation = true, returnAttentionMask = true, returnTokenTypeIds = false, textPair } = options;
const tokens = this.tokenize(text);
let inputIds = this.convertTokensToIds(tokens);
let pairIds;
if (textPair) {
const pairTokens = this.tokenize(textPair);
pairIds = this.convertTokensToIds(pairTokens);
}
let tokenTypeIds;
if (addSpecialTokens) {
const processed = this.postProcess(inputIds, pairIds);
inputIds = processed.ids;
if (returnTokenTypeIds) {
tokenTypeIds = processed.typeIds;
}
} else if (pairIds) {
inputIds = [...inputIds, ...pairIds];
if (returnTokenTypeIds) {
tokenTypeIds = [...inputIds.map(() => 0), ...pairIds.map(() => 1)];
}
}
if (truncation && inputIds.length > maxLength) {
inputIds = inputIds.slice(0, maxLength);
if (tokenTypeIds) {
tokenTypeIds = tokenTypeIds.slice(0, maxLength);
}
}
let attentionMask = [];
if (returnAttentionMask) {
attentionMask = inputIds.map(() => 1);
}
if (padding === "max_length" && inputIds.length < maxLength) {
const padLength = maxLength - inputIds.length;
inputIds = [...inputIds, ...new Array(padLength).fill(this.padTokenId)];
if (returnAttentionMask) {
attentionMask = [...attentionMask, ...new Array(padLength).fill(0)];
}
if (tokenTypeIds) {
tokenTypeIds = [...tokenTypeIds, ...new Array(padLength).fill(0)];
}
}
const result = {
inputIds,
attentionMask
};
if (returnTokenTypeIds && tokenTypeIds) {
result.tokenTypeIds = tokenTypeIds;
}
return result;
}
/**
* Batch encode
*/
encodeBatch(texts, options = {}) {
if (options.padding === "longest") {
const encodings = texts.map((t) => this.encode(t, { ...options, padding: "do_not_pad" }));
const maxLen = Math.max(...encodings.map((e) => e.inputIds.length));
return texts.map((t) => this.encode(t, { ...options, maxLength: maxLen, padding: "max_length" }));
}
return texts.map((t) => this.encode(t, options));
}
/**
* Decode IDs to text
*/
decode(ids, skipSpecialTokens = true) {
let tokens = this.convertIdsToTokens(ids);
if (skipSpecialTokens) {
tokens = tokens.filter((t) => !this.specialTokens.has(t));
}
let text = tokens.join("");
if (this.modelType === "BPE") {
text = this.bytesToText(text);
}
if (this.modelType === "WordPiece") {
text = text.replace(new RegExp(this.continuingSubwordPrefix, "g"), "");
}
text = text.replace(/\s+/g, " ").trim();
return text;
}
/**
* Decode batch
*/
decodeBatch(batchIds, skipSpecialTokens = true) {
return batchIds.map((ids) => this.decode(ids, skipSpecialTokens));
}
/**
* Get vocabulary size
*/
get vocabSize() {
return this.vocab.size + this.addedTokens.size;
}
/**
* Get special token IDs
*/
getSpecialTokenIds() {
return {
padTokenId: this.padTokenId,
unkTokenId: this.unkTokenId,
clsTokenId: this.clsTokenId,
sepTokenId: this.sepTokenId,
maskTokenId: this.maskTokenId,
bosTokenId: this.bosTokenId,
eosTokenId: this.eosTokenId
};
}
/**
* Get config
*/
getConfig() {
return {
vocabSize: this.vocabSize,
maxLength: this.maxLength,
padTokenId: this.padTokenId,
unkTokenId: this.unkTokenId,
clsTokenId: this.clsTokenId,
sepTokenId: this.sepTokenId,
maskTokenId: this.maskTokenId,
bosTokenId: this.bosTokenId,
eosTokenId: this.eosTokenId
};
}
/**
* Check if token is special
*/
isSpecialToken(token) {
return this.specialTokens.has(token);
}
/**
* Get token ID
*/
getTokenId(token) {
return this.addedTokens.get(token) ?? this.vocab.get(token);
}
/**
* Get token from ID
*/
getToken(id) {
return this.reverseVocab.get(id);
}
};
function createBasicTokenizer() {
const tokenizer = new Tokenizer();
return tokenizer;
}
async function loadTokenizer(url) {
return Tokenizer.fromUrl(url);
}
async function loadTokenizerFromHub(modelId, options) {
return Tokenizer.fromHuggingFace(modelId, options);
}
// dist/pipelines/text-classification.js
init_model_loader();
var DEFAULT_MODELS = {
model: "https://huggingface.co/Xenova/distilbert-base-uncased-finetuned-sst-2-english/resolve/main/onnx/model_quantized.onnx",
tokenizer: "https://huggingface.co/Xenova/distilbert-base-uncased-finetuned-sst-2-english/resolve/main/tokenizer.json"
};
var DEFAULT_SST2_LABELS = ["NEGATIVE", "POSITIVE"];
var TextClassificationPipeline = class extends BasePipeline {
constructor(config, labels) {
super(config);
__publicField(this, "tokenizer", null);
__publicField(this, "onnxModel", null);
__publicField(this, "labels");
__publicField(this, "modelUrl");
__publicField(this, "tokenizerUrl");
this.labels = labels ?? DEFAULT_SST2_LABELS;
this.modelUrl = config.model !== "default" ? config.model : DEFAULT_MODELS.model;
this.tokenizerUrl = DEFAULT_MODELS.tokenizer;
}
async initialize() {
await super.initialize();
if (!this.tokenizer) {
this.tokenizer = await Tokenizer.fromUrl(this.tokenizerUrl);
}
if (!this.onnxModel) {
const modelData = await loadModelData(this.modelUrl, { cache: this.config.cache ?? true });
this.onnxModel = await loadModelFromBuffer(modelData);
}
}
setLabels(labels) {
this.labels = labels;
}
async run(input, options) {
const isBatch = Array.isArray(input);
const inputs = isBatch ? input : [input];
await this.initialize();
const startTime = performance.now();
const results = [];
for (const text of inputs) {
const tensorInputs = await this.preprocess(text);
const outputs = await this.runInference(tensorInputs);
const result = await this.postprocess(outputs, options);
results.push(result);
}
const processingTime = performance.now() - startTime;
for (const result of results) {
result.processingTime = processingTime / results.length;
}
return isBatch ? results : results[0];
}
async preprocess(input) {
const text = Array.isArray(input) ? input[0] : input;
const encoded = this.tokenizer.encode(text, {
maxLength: 128,
padding: "max_length",
truncation: true
});
const inputIds = new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map((id) => BigInt(id))), [1, encoded.inputIds.length], "int64");
const attentionMask = new EdgeFlowTensor(BigInt64Array.from(encoded.attentionMask.map((m) => BigInt(m))), [1, encoded.attentionMask.length], "int64");
return [inputIds, attentionMask];
}
async runInference(inputs) {
const namedInputs = /* @__PURE__ */ new Map();
namedInputs.set("input_ids", inputs[0]);
namedInputs.set("attention_mask", inputs[1]);
const outputs = await runInferenceNamed(this.onnxModel, namedInputs);
return outputs;
}
async postprocess(outputs, options) {
const logits = outputs[0];
if (!logits) {
return { label: "unknown", score: 0 };
}
const probs = softmax(logits, -1);
const probsArray = probs.toFloat32Array();
let maxIdx = 0;
let maxScore = probsArray[0] ?? 0;
for (let i = 1; i < probsArray.length; i++) {
if ((probsArray[i] ?? 0) > maxScore) {
maxScore = probsArray[i] ?? 0;
maxIdx = i;
}
}
const label = options?.labels?.[maxIdx] ?? this.labels[maxIdx] ?? `class_${maxIdx}`;
return {
label,
score: maxScore
};
}
};
var SentimentAnalysisPipeline = class extends TextClassificationPipeline {
constructor(config) {
super(config, SENTIMENT_LABELS);
}
async analyze(text, options) {
return this.run(text, options);
}
};
function createTextClassificationPipeline(config = {}) {
return new TextClassificationPipeline({
task: "text-classification",
model: config.model ?? "default",
runtime: config.runtime,
cache: config.cache ?? true,
quantization: config.quantization
});
}
function createSentimentAnalysisPipeline(config = {}) {
return new SentimentAnalysisPipeline({
task: "sentiment-analysis",
model: config.model ?? "default",
runtime: config.runtime,
cache: config.cache ?? true,
quantization: config.quantization
});
}
registerPipeline("text-classification", (config) => new TextClassificationPipeline(config));
registerPipeline("sentiment-analysis", (config) => new SentimentAnalysisPipeline(config));
// dist/pipelines/feature-extraction.js
init_tensor();
init_model_loader();
var DEFAULT_MODELS2 = {
model: "https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/onnx/model_quantized.onnx",
tokenizer: "https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/tokenizer.json"
};
var DEFAULT_EMBEDDING_DIM = 384;
var FeatureExtractionPipeline = class extends BasePipeline {
constructor(config, embeddingDim = DEFAULT_EMBEDDING_DIM) {
super(config);
__publicField(this, "tokenizer", null);
__publicField(this, "onnxModel", null);
__publicField(this, "embeddingDim");
__publicField(this, "modelUrl");
__publicField(this, "tokenizerUrl");
this.embeddingDim = embeddingDim;
this.modelUrl = config.model !== "default" ? config.model : DEFAULT_MODELS2.model;
this.tokenizerUrl = DEFAULT_MODELS2.tokenizer;
}
async initialize() {
await super.initialize();
if (!this.tokenizer) {
this.tokenizer = await Tokenizer.fromUrl(this.tokenizerUrl);
}
if (!this.onnxModel) {
const modelData = await loadModelData(this.modelUrl, { cache: this.config.cache ?? true });
this.onnxModel = await loadModelFromBuffer(modelData);
}
}
async run(input, options) {
const isBatch = Array.isArray(input);
const inputs = isBatch ? input : [input];
await this.initialize();
const startTime = performance.now();
const results = [];
for (const text of inputs) {
const tensorInputs = await this.preprocess(text);
const outputs = await this.runInference(tensorInputs);
const result = await this.postprocess(outputs, options);
results.push(result);
}
const processingTime = performance.now() - startTime;
for (const result of results) {
result.processingTime = processingTime / results.length;
}
return isBatch ? results : results[0];
}
async preprocess(input) {
const text = Array.isArray(input) ? input[0] : input;
const encoded = this.tokenizer.encode(text, {
maxLength: 128,
padding: "max_length",
truncation: true
});
const inputIds = new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map((id) => BigInt(id))), [1, encoded.inputIds.length], "int64");
const attentionMask = new EdgeFlowTensor(BigInt64Array.from(encoded.attentionMask.map((m) => BigInt(m))), [1, encoded.attentionMask.length], "int64");
const tokenTypeIds = new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map(() => BigInt(0))), [1, encoded.inputIds.length], "int64");
return [inputIds, attentionMask, tokenTypeIds];
}
async runInference(inputs) {
const namedInputs = /* @__PURE__ */ new Map();
namedInputs.set("input_ids", inputs[0]);
namedInputs.set("attention_mask", inputs[1]);
namedInputs.set("token_type_ids", inputs[2]);
const outputs = await runInferenceNamed(this.onnxModel, namedInputs);
return outputs;
}
async postprocess(outputs, options) {
const hiddenStates = outputs[0];
if (!hiddenStates) {
return { embeddings: [] };
}
const pooling = options?.pooling ?? "mean";
const normalize = options?.normalize ?? true;
let embeddings;
switch (pooling) {
case "cls":
embeddings = this.extractCLSEmbedding(hiddenStates);
break;
case "max":
embeddings = this.maxPooling(hiddenStates);
break;
case "none":
embeddings = hiddenStates.toArray();
break;
case "mean":
default:
embeddings = this.meanPooling(hiddenStates);
break;
}
if (normalize) {
embeddings = this.normalizeVector(embeddings);
}
if (options?.outputDim && options.outputDim < embeddings.length) {
embeddings = embeddings.slice(0, options.outputDim);
}
return { embeddings };
}
extractCLSEmbedding(hiddenStates) {
const data = hiddenStates.toFloat32Array();
const embeddingDim = hiddenStates.shape[2] ?? this.embeddingDim;
return Array.from(data.slice(0, embeddingDim));
}
meanPooling(hiddenStates) {
const data = hiddenStates.toFloat32Array();
const seqLen = hiddenStates.shape[1] ?? 1;
const embeddingDim = hiddenStates.shape[2] ?? this.embeddingDim;
const result = new Float32Array(embeddingDim);
for (let i = 0; i < seqLen; i++) {
for (let j = 0; j < embeddingDim; j++) {
result[j] = (result[j] ?? 0) + (data[i * embeddingDim + j] ?? 0) / seqLen;
}
}
return Array.from(result);
}
maxPooling(hiddenStates) {
const data = hiddenStates.toFloat32Array();
const seqLen = hiddenStates.shape[1] ?? 1;
const embeddingDim = hiddenStates.shape[2] ?? this.embeddingDim;
const result = new Array(embeddingDim).fill(-Infinity);
for (let i = 0; i < seqLen; i++) {
for (let j = 0; j < embeddingDim; j++) {
const val = data[i * embeddingDim + j] ?? 0;
if (val > (result[j] ?? -Infinity)) {
result[j] = val;
}
}
}
return result;
}
normalizeVector(vec) {
let norm = 0;
for (const v of vec) {
norm += v * v;
}
norm = Math.sqrt(norm);
if (norm === 0)
return vec;
return vec.map((v) => v / norm);
}
};
function createFeatureExtractionPipeline(config = {}) {
return new FeatureExtractionPipeline({
task: "feature-extraction",
model: config.model ?? "default",
runtime: config.runtime,
cache: config.cache ?? true,
quantization: config.quantization
});
}
registerPipeline("feature-extraction", (config) => new FeatureExtractionPipeline(config));
// dist/pipelines/image-classification.js
init_tensor();
// dist/utils/preprocessor.js
init_tensor();
var DEFAULT_IMAGE_OPTIONS = {
width: 224,
height: 224,
resizeMode: "cover",
mean: [0.485, 0.456, 0.406],
std: [0.229, 0.224, 0.225],
rescaleFactor: 1 / 255,
grayscale: false,
channelFormat: "CHW",
dtype: "float32",
doResize: true,
doRescale: true,
doNormalize: true,
doCenterCrop: false,
paddingColor: [0, 0, 0]
};
var ImagePreprocessor = class _ImagePreprocessor {
constructor(options = {}) {
__publicField(this, "options");
__publicField(this, "canvas", null);
__publicField(this, "ctx", null);
const size = options.size;
const width = options.width ?? size ?? DEFAULT_IMAGE_OPTIONS.width;
const height = options.height ?? size ?? DEFAULT_IMAGE_OPTIONS.height;
this.options = {
...DEFAULT_IMAGE_OPTIONS,
...options,
width,
height,
size: size ?? width,
cropSize: options.cropSize ?? options.size ?? width
};
}
/**
* Load from HuggingFace preprocessor_config.json
*/
static fromConfig(config) {
const options = {};
const size = config["size"];
if (size !== void 0) {
if (typeof size === "number") {
options.size = size;
} else if (typeof size === "object" && size !== null) {
const sizeObj = size;
options.width = sizeObj.width ?? sizeObj.shortest_edge;
options.height = sizeObj.height ?? sizeObj.shortest_edge;
}
}
const cropSize = config["crop_size"];
if (cropSize !== void 0) {
if (typeof cropSize === "number") {
options.cropSize = cropSize;
} else if (typeof cropSize === "object" && cropSize !== null) {
const cropObj = cropSize;
options.cropSize = { width: cropObj.width ?? 224, height: cropObj.height ?? 224 };
}
}
const imageMean = config["image_mean"];
if (Array.isArray(imageMean)) {
options.mean = imageMean;
}
const imageStd = config["image_std"];
if (Array.isArray(imageStd)) {
options.std = imageStd;
}
const rescaleFactor = config["rescale_factor"];
if (typeof rescaleFactor === "number") {
options.rescaleFactor = rescaleFactor;
}
const doResize = config["do_resize"];
if (typeof doResize === "boolean") {
options.doResize = doResize;
}
const doRescale = config["do_rescale"];
if (typeof doRescale === "boolean") {
options.doRescale = doRescale;
}
const doNormalize = config["do_normalize"];
if (typeof doNormalize === "boolean") {
options.doNormalize = doNormalize;
}
const doCenterCrop = config["do_center_crop"];
if (typeof doCenterCrop === "boolean") {
options.doCenterCrop = doCenterCrop;
}
if (config["resample"] !== void 0) {
options.resizeMode = "cover";
}
return new _ImagePreprocessor(options);
}
/**
* Load from HuggingFace Hub
*/
static async fromUrl(url) {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to load preprocessor config from ${url}`);
}
const config = await response.json();
return _ImagePreprocessor.fromConfig(config);
}
/**
* Load from HuggingFace Hub by model ID
*/
static async fromHuggingFace(modelId, options) {
const revision = options?.revision ?? "main";
const url = `https://huggingface.co/${modelId}/resolve/${revision}/preprocessor_config.json`;
return _ImagePreprocessor.fromUrl(url);
}
/**
* Initialize canvas (lazy)
*/
ensureCanvas() {
if (!this.canvas) {
if (typeof document !== "undefined") {
this.canvas = document.createElement("canvas");
this.ctx = this.canvas.getContext("2d");
} else {
throw new Error("ImagePreprocessor requires a browser environment");
}
}
}
/**
* Process an image
*/
async process(input) {
let imageData;
if (typeof input === "string") {
imageData = await this.loadFromUrl(input);
} else if (input instanceof Blob || input instanceof File) {
imageData = await this.loadFromBlob(input);
} else if (input instanceof ImageData) {
imageData = input;
} else {
imageData = this.toImageData(input);
}
let processed = imageData;
if (this.options.doResize) {
processed = this.resize(processed);
}
if (this.options.doCenterCrop) {
processed = this.centerCrop(processed);
}
return this.toTensor(processed);
}
/**
* Process multiple images (batch)
*/
async processBatch(inputs) {
const tensors = await Promise.all(inputs.map((input) => this.process(input)));
const batchSize = tensors.length;
const firstTensor = tensors[0];
if (!firstTensor) {
return new EdgeFlowTensor(new Float32Array(0), [0], "float32");
}
const channels = firstTensor.shape[0] ?? 3;
const height = firstTensor.shape[1] ?? this.options.height;
const width = firstTensor.shape[2] ?? this.options.width;
const batchData = new Float32Array(batchSize * channels * height * width);
for (let i = 0; i < tensors.length; i++) {
const t = tensors[i];
if (t) {
batchData.set(t.toFloat32Array(), i * channels * height * width);
}
}
return new EdgeFlowTensor(batchData, [batchSize, channels, height, width], "float32");
}
/**
* Load image from URL or base64
*/
async loadFromUrl(url) {
return new Promise((resolve, reject) => {
const img = new Image();
img.crossOrigin = "anonymous";
img.onload = () => {
resolve(this.toImageData(img));
};
img.onerror = () => {
reject(new Error(`Failed to load image from ${url}`));
};
img.src = url;
});
}
/**
* Load image from Blob/File
*/
async loadFromBlob(blob) {
const url = URL.createObjectURL(blob);
try {
return await this.loadFromUrl(url);
} finally {
URL.revokeObjectURL(url);
}
}
/**
* Center crop image
*/
centerCrop(imageData) {
const cropSize = this.options.cropSize;
let cropWidth;
let cropHeight;
if (typeof cropSize === "number") {
cropWidth = cropSize;
cropHeight = cropSize;
} else {
cropWidth = cropSize.width;
cropHeight = cropSize.height;
}
const srcX = Math.max(0, Math.floor((imageData.width - cropWidth) / 2));
const srcY = Math.max(0, Math.floor((imageData.height - cropHeight) / 2));
this.ensureCanvas();
const srcCanvas = document.createElement("canvas");
srcCanvas.width = imageData.width;
srcCanvas.height = imageData.height;
const srcCtx = srcCanvas.getContext("2d");
srcCtx.putImageData(imageData, 0, 0);
this.canvas.width = cropWidth;
this.canvas.height = cropHeight;
this.ctx.drawImage(srcCanvas, srcX, srcY, cropWidth, cropHeight, 0, 0, cropWidth, cropHeight);
return this.ctx.getImageData(0, 0, cropWidth, cropHeight);
}
/**
* Convert image element to ImageData
*/
toImageData(source) {
this.ensureCanvas();
const { width, height } = source;
this.canvas.width = width;
this.canvas.height = height;
this.ctx.drawImage(source, 0, 0);
return this.ctx.getImageData(0, 0, width, height);
}
/**
* Resize image data
*/
resize(imageData) {
const { width, height, resizeMode } = this.options;
this.ensureCanvas();
let srcX = 0, srcY = 0, srcW = imageData.width, srcH = imageData.height;
let dstX = 0, dstY = 0, dstW = width, dstH = height;
if (resizeMode === "contain") {
const scale = Math.min(width / imageData.width, height / imageData.height);
dstW = Math.round(imageData.width * scale);
dstH = Math.round(imageData.height * scale);
dstX = Math.round((width - dstW) / 2);
dstY = Math.round((height - dstH) / 2);
} else if (resizeMode === "cover") {
const scale = Math.max(width / imageData.width, height / imageData.height);
srcW = Math.round(width / scale);
srcH = Math.round(height / scale);
srcX = Math.round((imageData.width - srcW) / 2);
srcY = Math.round((imageData.height - srcH) / 2);
}
const srcCanvas = document.createElement("canvas");
srcCanvas.width = imageData.width;
srcCanvas.height = imageData.height;
const srcCtx = srcCanvas.getContext("2d");
srcCtx.putImageData(imageData, 0, 0);
this.canvas.width = width;
this.canvas.height = height;
if (resizeMode === "contain" || resizeMode === "pad") {
this.ctx.fillStyle = "black";
this.ctx.fillRect(0, 0, width, height);
}
this.ctx.drawImage(srcCanvas, srcX, srcY, srcW, srcH, dstX, dstY, dstW, dstH);
return this.ctx.getImageData(0, 0, width, height);
}
/**
* Convert ImageData to tensor
*/
toTensor(imageData) {
const { mean: mean2, std, grayscale, channelFormat, dtype, doRescale, rescaleFactor, doNormalize } = this.options;
const height = imageData.height;
const width = imageData.width;
const channels = grayscale ? 1 : 3;
const data = new Float32Array(channels * height * width);
const pixels = imageData.data;
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const pixelIdx = (y * width + x) * 4;
if (grayscale) {
let gray = 0.299 * (pixels[pixelIdx] ?? 0) + 0.587 * (pixels[pixelIdx + 1] ?? 0) + 0.114 * (pixels[pixelIdx + 2] ?? 0);
if (doRescale) {
gray *= rescaleFactor;
}
if (doNormalize) {
gray = (gray - (mean2[0] ?? 0)) / (std[0] ?? 1);
}
const idx = y * width + x;
data[idx] = gray;
} else if (channelFormat === "CHW") {
for (let c = 0; c < 3; c++) {
let value = pixels[pixelIdx + c] ?? 0;
if (doRescale) {
value *= rescaleFactor;
}
if (doNormalize) {
value = (value - (mean2[c] ?? 0)) / (std[c] ?? 1);
}
const idx = c * height * width + y * width + x;
data[idx] = value;
}
} else {
for (let c = 0; c < 3; c++) {
let value = pixels[pixelIdx + c] ?? 0;
if (doRescale) {
value *= rescaleFactor;
}
if (doNormalize) {
value = (value - (mean2[c] ?? 0)) / (std[c] ?? 1);
}
const idx = y * width * 3 + x * 3 + c;
data[idx] = value;
}
}
}
}
const shape = channelFormat === "CHW" ? [channels, height, width] : [height, width, channels];
return new EdgeFlowTensor(data, shape, dtype);
}
/**
* Get current options
*/
getOptions() {
return { ...this.options };
}
};
var DEFAULT_AUDIO_OPTIONS = {
sampleRate: 16e3,
nMels: 80,
nFft: 400,
hopLength: 160,
normalize: true,
maxDuration: 30
};
var AudioPreprocessor = class _AudioPreprocessor {
constructor(options = {}) {
__publicField(this, "options");
__publicField(this, "audioContext", null);
this.options = { ...DEFAULT_AUDIO_OPTIONS, ...options };
}
/**
* Load from HuggingFace feature_extractor config
*/
static fromConfig(config) {
const options = {};
const samplingRate = config["sampling_rate"];
if (typeof samplingRate === "number") {
options.sampleRate = samplingRate;
}
const featureSize = config["feature_size"];
if (typeof featureSize === "number") {
options.nMels = featureSize;
}
const nFft = config["n_fft"];
if (typeof nFft === "number") {
options.nFft = nFft;
}
const hopLength = config["hop_length"];
if (typeof hopLength === "number") {
options.hopLength = hopLength;
}
return new _AudioPreprocessor(options);
}
/**
* Load from HuggingFace Hub
*/
static async fromHuggingFace(modelId, options) {
const revision = options?.revision ?? "main";
const url = `https://huggingface.co/${modelId}/resolve/${revision}/preprocessor_config.json`;
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to load audio config from ${url}`);
}
const config = await response.json();
return _AudioPreprocessor.fromConfig(config);
}
/**
* Initialize audio context (lazy)
*/
ensureAudioContext() {
if (!this.audioContext) {
if (typeof AudioContext !== "undefined") {
this.audioContext = new AudioContext({ sampleRate: this.options.sampleRate });
} else {
throw new Error("AudioPreprocessor requires Web Audio API support");
}
}
}
/**
* Process audio data
*/
async process(input) {
let audioData;
if (typeof input === "string") {
audioData = await this.loadFromUrl(input);
} else if (input instanceof Blob || input instanceof File) {
audioData = await this.loadFromBlob(input);
} else if (input instanceof AudioBuffer) {
audioData = this.audioBufferToFloat32(input);
} else if (input instanceof Float32Array) {
audioData = input;
} else {
audioData = await this.decodeAudioData(input);
}
if (this.options.normalize) {
audioData = this.normalizeAudio(audioData);
}
const maxSamples = this.options.maxDuration * this.options.sampleRate;
if (audioData.length > maxSamples) {
audioData = audioData.slice(0, maxSamples);
}
const melSpec = this.computeMelSpectrogram(audioData);
return melSpec;
}
/**
* Process raw waveform (for models that don't need mel spectrogram)
*/
async processRaw(input) {
let audioData;
if (typeof input === "string") {
audioData = await this.loadFromUrl(input);
} else if (input instanceof Blob || input instanceof File) {
audioData = await this.loadFromBlob(input);
} else if (input instanceof AudioBuffer) {
audioData = this.audioBufferToFloat32(input);
} else if (input instanceof Float32Array) {
audioData = input;
} else {
audioData = await this.decodeAudioData(input);
}
if (this.options.normalize) {
audioData = this.normalizeAudio(audioData);
}
const maxSamples = this.options.maxDuration * this.options.sampleRate;
if (audioData.length > maxSamples) {
audioData = audioData.slice(0, maxSamples);
}
return new EdgeFlowTensor(audioData, [1, audioData.length], "float32");
}
/**
* Load audio from URL
*/
async loadFromUrl(url) {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to load audio from ${url}`);
}
const arrayBuffer = await response.arrayBuffer();
return this.decodeAudioData(arrayBuffer);
}
/**
* Load audio from Blob/File
*/
async loadFromBlob(blob) {
const arrayBuffer = await blob.arrayBuffer();
return this.decodeAudioData(arrayBuffer);
}
/**
* Decode audio data
*/
async decodeAudioData(data) {
this.ensureAudioContext();
const audioBuffer = await this.audioContext.decodeAudioData(data.slice(0));
return this.audioBufferToFloat32(audioBuffer);
}
/**
* Convert AudioBuffer to Float32Array
*/
audioBufferToFloat32(buffer) {
const channelData = buffer.getChannelData(0);
return new Float32Array(channelData);
}
/**
* Normalize audio
*/
normalizeAudio(data) {
let max = 0;
for (let i = 0; i < data.length; i++) {
const abs = Math.abs(data[i] ?? 0);
if (abs > max)
max = abs;
}
if (max > 0) {
const result = new Float32Array(data.length);
for (let i = 0; i < data.length; i++) {
result[i] = (data[i] ?? 0) / max;
}
return result;
}
return data;
}
/**
* Compute mel spectrogram (simplified implementation)
*/
computeMelSpectrogram(audio) {
const { nMels, nFft, hopLength } = this.options;
const numFrames = Math.floor((audio.length - nFft) / hopLength) + 1;
if (numFrames <= 0) {
return new EdgeFlowTensor(new Float32Array(nMels), [1, nMels], "float32");
}
const melSpec = new Float32Array(numFrames * nMels);
for (let frame = 0; frame < numFrames; frame++) {
const start = frame * hopLength;
for (let mel = 0; mel < nMels; mel++) {
let energy = 0;
const freqStart = Math.floor(mel / nMels * (nFft / 2));
const freqEnd = Math.floor((mel + 1) / nMels * (nFft / 2));
for (let i = freqStart; i < Math.min(freqEnd, nFft); i++) {
const sample = audio[start + i] ?? 0;
energy += sample * sample;
}
melSpec[frame * nMels + mel] = Math.log(energy + 1e-10);
}
}
return new EdgeFlowTensor(melSpec, [numFrames, nMels], "float32");
}
/**
* Dispose resources
*/
dispose() {
if (this.audioContext) {
this.audioContext.close();
this.audioContext = null;
}
}
};
function preprocessText(text, options = {}) {
const { lowercase = true, removePunctuation = false, normalizeWhitespace = true, maxLength } = options;
let result = text;
if (lowercase) {
result = result.toLowerCase();
}
if (removePunctuation) {
result = result.replace(/[^\w\s]/g, "");
}
if (normalizeWhitespace) {
result = result.replace(/\s+/g, " ").trim();
}
if (maxLength && result.length > maxLength) {
result = result.slice(0, maxLength);
}
return result;
}
function createImagePreprocessor(preset = "imagenet", options = {}) {
const presets = {
imagenet: {
width: 224,
height: 224,
mean: [0.485, 0.456, 0.406],
std: [0.229, 0.224, 0.225]
},
clip: {
width: 224,
height: 224,
mean: [0.48145466, 0.4578275, 0.40821073],
std: [0.26862954, 0.26130258, 0.27577711]
},
vit: {
width: 224,
height: 224,
mean: [0.5, 0.5, 0.5],
std: [0.5, 0.5, 0.5]
},
custom: {}
};
return new ImagePreprocessor({ ...presets[preset], ...options });
}
function createAudioPreprocessor(preset = "whisper", options = {}) {
const presets = {
whisper: {
sampleRate: 16e3,
nMels: 80,
nFft: 400,
hopLength: 160
},
wav2vec: {
sampleRate: 16e3,
normalize: true
},
custom: {}
};
return new AudioPreprocessor({ ...presets[preset], ...options });
}
// dist/pipelines/image-classification.js
init_model_loader();
var DEFAULT_MODELS3 = {
model: "https://huggingface.co/Xenova/mobilevit-small/resolve/main/onnx/model_quantized.onnx"
};
var ImageClassificationPipeline = class extends BasePipeline {
constructor(config, labels, _numClasses = 1e3) {
super(config);
__publicField(this, "preprocessor", null);
__publicField(this, "onnxModel", null);
__publicField(this, "labels");
__publicField(this, "modelUrl");
this.labels = labels ?? IMAGENET_LABELS;
this.modelUrl = config.model !== "default" ? config.model : DEFAULT_MODELS3.model;
}
async initialize() {
await super.initialize();
if (!this.preprocessor) {
this.preprocessor = createImagePreprocessor("imagenet");
}
if (!this.onnxModel) {
const modelData = await loadModelData(this.modelUrl, { cache: this.config.cache ?? true });
this.onnxModel = await loadModelFromBuffer(modelData);
}
}
setLabels(labels) {
this.labels = labels;
}
async run(input, options) {
const isBatch = Array.isArray(input);
const inputs = isBatch ? input : [input];
await this.initialize();
const startTime = performance.now();
const results = [];
for (const image of inputs) {
const tensorInputs = await this.preprocess(image);
const outputs = await this.runModelInference(tensorInputs);
const result = await this.postprocess(outputs, options);
results.push(result);
}
const processingTime = performance.now() - startTime;
for (const result of results) {
result.processingTime = processingTime / results.length;
}
return isBatch ? results : results[0];
}
async preprocess(input) {
const image = Array.isArray(input) ? input[0] : input;
const tensor2 = await this.preprocessor.process(image);
if (tensor2.shape.length === 3) {
return [tensor2.reshape([1, ...tensor2.shape])];
}
return [tensor2];
}
async runModelInference(inputs) {
const outputs = await runInference(this.onnxModel, inputs);
return outputs;
}
async postprocess(outputs, options) {
const logits = outputs[0];
if (!logits) {
return { label: "unknown", score: 0 };
}
const probs = softmax(logits, -1);
const probsArray = probs.toFloat32Array();
let maxIdx = 0;
let maxScore = probsArray[0] ?? 0;
for (let i = 1; i < probsArray.length; i++) {
if ((probsArray[i] ?? 0) > maxScore) {
maxScore = probsArray[i] ?? 0;
maxIdx = i;
}
}
const label = options?.labels?.[maxIdx] ?? this.labels[maxIdx] ?? `class_${maxIdx}`;
return { label, score: maxScore };
}
};
function createImageClassificationPipeline(config = {}, labels) {
return new ImageClassificationPipeline({
task: "image-classification",
model: config.model ?? "default",
runtime: config.runtime,
cache: config.cache ?? true,
quantization: config.quantization
}, labels);
}
registerPipeline("image-classification", (config) => new ImageClassificationPipeline(config));
// dist/pipelines/text-generation.js
init_tensor();
var DEFAULT_LLM_MODELS = {
model: "https://huggingface.co/Xenova/TinyLlama-1.1B-Chat-v1.0/resolve/main/onnx/model_q4f16.onnx",
tokenizer: "https://huggingface.co/Xenova/TinyLlama-1.1B-Chat-v1.0/resolve/main/tokenizer.json"
};
var TextGenerationPipeline = class extends BasePipeline {
constructor(config) {
super(config ?? {
task: "text-generation",
model: "default"
});
__publicField(this, "tokenizer", null);
__publicField(this, "eosTokenId", 50256);
// GPT-2 default
__publicField(this, "llmModel", null);
__publicField(this, "modelsLoaded", false);
// Custom model URLs
__publicField(this, "modelUrl");
__publicField(this, "tokenizerUrl");
// ==========================================================================
// Chat / Conversation Support
// ==========================================================================
__publicField(this, "conversationHistory", []);
__publicField(this, "chatTemplateType", "chatml");
this.modelUrl = DEFAULT_LLM_MODELS.model;
this.tokenizerUrl = DEFAULT_LLM_MODELS.tokenizer;
}
/**
* Check if model is loaded
*/
get isModelLoaded() {
return this.modelsLoaded;
}
/**
* Set custom model URLs
*/
setModelUrls(model, tokenizer) {
this.modelUrl = model;
this.tokenizerUrl = tokenizer;
}
/**
* Load model and tokenizer with progress callback
*/
async loadModel(onProgress) {
if (this.modelsLoaded)
return;
onProgress?.({ stage: "tokenizer", loaded: 0, total: 100, progress: 0 });
try {
const tokenizerResponse = await fetch(this.tokenizerUrl);
if (!tokenizerResponse.ok) {
throw new Error(`Failed to fetch tokenizer: ${tokenizerResponse.status}`);
}
const tokenizerJson = await tokenizerResponse.json();
this.tokenizer = await Tokenizer.fromJSON(tokenizerJson);
const specialIds = this.tokenizer.getSpecialTokenIds();
this.eosTokenId = specialIds.eosTokenId ?? specialIds.sepTokenId ?? 2;
onProgress?.({ stage: "tokenizer", loaded: 100, total: 100, progress: 100 });
} catch (error) {
throw new Error(`Failed to load tokenizer: ${error}`);
}
onProgress?.({ stage: "model", loaded: 0, total: 100, progress: 0 });
const modelData = await this.fetchModelWithProgress(this.modelUrl, (loaded, total) => {
onProgress?.({
stage: "model",
loaded,
total,
progress: Math.round(loaded / total * 100)
});
});
this.llmModel = await loadModelFromBuffer(modelData, {
runtime: "wasm"
// Uses ONNXRuntime which auto-detects WebGPU internally
});
this.model = this.llmModel;
this.modelsLoaded = true;
}
/**
* Fetch model with progress tracking
*/
async fetchModelWithProgress(url, onProgress) {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to fetch model: ${response.status} ${response.statusText}`);
}
const contentLength = response.headers.get("content-length");
const total = contentLength ? parseInt(contentLength, 10) : 0;
if (!response.body) {
const buffer2 = await response.arrayBuffer();
onProgress(buffer2.byteLength, buffer2.byteLength);
return buffer2;
}
const reader = response.body.getReader();
const chunks = [];
let loaded = 0;
while (true) {
const { done, value } = await reader.read();
if (done)
break;
chunks.push(value);
loaded += value.length;
onProgress(loaded, total || loaded);
}
const buffer = new Uint8Array(loaded);
let offset = 0;
for (const chunk of chunks) {
buffer.set(chunk, offset);
offset += chunk.length;
}
return buffer.buffer;
}
/**
* Initialize pipeline (override to skip default model loading)
*/
async initialize() {
if (this.isReady)
return;
this.isReady = true;
}
/**
* Set tokenizer
*/
setTokenizer(tokenizer) {
this.tokenizer = tokenizer;
const specialIds = tokenizer.getSpecialTokenIds();
this.eosTokenId = specialIds.eosTokenId ?? specialIds.sepTokenId ?? 50256;
}
/**
* Preprocess - not used for text generation (handled in generateSingle)
*/
async preprocess(input) {
const text = Array.isArray(input) ? input[0] ?? "" : input;
if (!this.tokenizer) {
return [new EdgeFlowTensor(new Float32Array([0]), [1], "float32")];
}
const encoded = this.tokenizer.encode(text, {
addSpecialTokens: false,
padding: "do_not_pad"
});
return [new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map((id) => BigInt(id))), [1, encoded.inputIds.length], "int64")];
}
/**
* Postprocess - not used for text generation (handled in generateSingle)
*/
async postprocess(_outputs, _options) {
return {
generatedText: "",
tokenIds: [],
numTokens: 0,
processingTime: 0
};
}
/**
* Generate text (non-streaming)
*/
async run(prompt, options) {
await this.initialize();
const prompts = Array.isArray(prompt) ? prompt : [prompt];
const results = await Promise.all(prompts.map((p) => this.generateSingle(p, options ?? {})));
return Array.isArray(prompt) ? results : results[0];
}
/**
* Generate text with streaming (async generator)
*/
async *stream(prompt, options = {}) {
const startTime = performance.now();
if (!this.tokenizer) {
throw new Error("Tokenizer not set. Call setTokenizer() first.");
}
const { maxNewTokens = 50, maxLength = 512, temperature = 1, topK = 0, topP = 1, repetitionPenalty = 1, stopSequences = [], doSample = true } = options;
const encoded = this.tokenizer.encode(prompt, {
addSpecialTokens: false,
padding: "do_not_pad",
truncation: false
});
let inputIds = [...encoded.inputIds];
const generatedIds = [];
let generatedText = "";
for (let i = 0; i < maxNewTokens; i++) {
if (inputIds.length >= maxLength)
break;
const nextTokenId = await this.generateNextToken(inputIds, temperature, topK, topP, repetitionPenalty, doSample);
if (nextTokenId === this.eosTokenId) {
yield {
token: "",
tokenId: nextTokenId,
generatedText,
done: true
};
break;
}
const token = this.tokenizer.decode([nextTokenId], true);
generatedIds.push(nextTokenId);
inputIds.push(nextTokenId);
generatedText += token;
if (options.onToken) {
options.onToken(token, nextTokenId);
}
let shouldStop = false;
for (const stopSeq of stopSequences) {
if (generatedText.endsWith(stopSeq)) {
generatedText = generatedText.slice(0, -stopSeq.length);
shouldStop = true;
break;
}
}
yield {
token,
tokenId: nextTokenId,
generatedText,
done: shouldStop
};
if (shouldStop)
break;
}
const endTime = performance.now();
console.log(`Generation completed in ${(endTime - startTime).toFixed(2)}ms`);
}
/**
* Generate a single sequence (non-streaming)
*/
async generateSingle(prompt, options) {
const startTime = performance.now();
if (!this.tokenizer) {
throw new Error("Tokenizer not set. Call setTokenizer() first.");
}
const { maxNewTokens = 50, maxLength = 512, temperature = 1, topK = 0, topP = 1, repetitionPenalty = 1, stopSequences = [], doSample = true, returnFullText = false } = options;
const encoded = this.tokenizer.encode(prompt, {
addSpecialTokens: false,
padding: "do_not_pad",
truncation: false
});
let inputIds = [...encoded.inputIds];
const generatedIds = [];
for (let i = 0; i < maxNewTokens; i++) {
if (inputIds.length >= maxLength)
break;
const nextTokenId = await this.generateNextToken(inputIds, temperature, topK, topP, repetitionPenalty, doSample);
if (nextTokenId === this.eosTokenId)
break;
generatedIds.push(nextTokenId);
inputIds.push(nextTokenId);
if (options.onToken) {
const token = this.tokenizer.decode([nextTokenId], true);
options.onToken(token, nextTokenId);
}
const currentText = this.tokenizer.decode(generatedIds, true);
let shouldStop = false;
for (const stopSeq of stopSequences) {
if (currentText.endsWith(stopSeq)) {
shouldStop = true;
break;
}
}
if (shouldStop)
break;
}
const generatedText = this.tokenizer.decode(generatedIds, true);
const endTime = performance.now();
return {
generatedText,
fullText: returnFullText ? prompt + generatedText : void 0,
tokenIds: generatedIds,
numTokens: generatedIds.length,
processingTime: endTime - startTime
};
}
/**
* Generate next token using the model
*/
async generateNextToken(inputIds, temperature, topK, topP, repetitionPenalty, doSample) {
if (!this.model) {
throw new Error("Model not loaded");
}
const seqLen = inputIds.length;
const inputs = /* @__PURE__ */ new Map();
inputs.set("input_ids", new EdgeFlowTensor(BigInt64Array.from(inputIds.map((id) => BigInt(id))), [1, seqLen], "int64"));
inputs.set("attention_mask", new EdgeFlowTensor(BigInt64Array.from(inputIds.map(() => BigInt(1))), [1, seqLen], "int64"));
inputs.set("position_ids", new EdgeFlowTensor(BigInt64Array.from(Array.from({ length: seqLen }, (_, i) => BigInt(i))), [1, seqLen], "int64"));
const numLayers = 22;
const numKVHeads = 4;
const headDim = 64;
for (let i = 0; i < numLayers; i++) {
inputs.set(`past_key_values.${i}.key`, new EdgeFlowTensor(new Float32Array(0), [1, numKVHeads, 0, headDim], "float32"));
inputs.set(`past_key_values.${i}.value`, new EdgeFlowTensor(new Float32Array(0), [1, numKVHeads, 0, headDim], "float32"));
}
const outputs = await runInferenceNamed(this.model, inputs);
if (!outputs || outputs.length === 0) {
throw new Error("Model returned no outputs");
}
const logits = outputs[0];
const logitsData = logits.toFloat32Array();
const vocabSize = logits.shape[logits.shape.length - 1] ?? 50257;
const lastPositionLogits = new Float32Array(vocabSize);
const offset = (inputIds.length - 1) * vocabSize;
for (let i = 0; i < vocabSize; i++) {
lastPositionLogits[i] = logitsData[offset + i] ?? 0;
}
if (repetitionPenalty !== 1) {
for (const prevId of inputIds) {
if (prevId < vocabSize) {
const score = lastPositionLogits[prevId] ?? 0;
lastPositionLogits[prevId] = score > 0 ? score / repetitionPenalty : score * repetitionPenalty;
}
}
}
if (temperature !== 1) {
for (let i = 0; i < vocabSize; i++) {
lastPositionLogits[i] = (lastPositionLogits[i] ?? 0) / temperature;
}
}
const logitsTensor = new EdgeFlowTensor(lastPositionLogits, [vocabSize], "float32");
const probs = softmax(logitsTensor).toFloat32Array();
if (doSample) {
return this.sample(probs, topK, topP);
} else {
return this.greedy(probs);
}
}
/**
* Greedy decoding (argmax)
*/
greedy(probs) {
let maxIdx = 0;
let maxProb = probs[0] ?? 0;
for (let i = 1; i < probs.length; i++) {
if ((probs[i] ?? 0) > maxProb) {
maxProb = probs[i] ?? 0;
maxIdx = i;
}
}
return maxIdx;
}
/**
* Sample from probability distribution with top-k/top-p filtering
*/
sample(probs, topK, topP) {
const indices = Array.from({ length: probs.length }, (_, i) => i);
indices.sort((a, b) => (probs[b] ?? 0) - (probs[a] ?? 0));
let candidateIndices = indices;
if (topK > 0 && topK < probs.length) {
candidateIndices = indices.slice(0, topK);
}
if (topP < 1) {
let cumulativeProb = 0;
const filtered = [];
for (const idx of candidateIndices) {
filtered.push(idx);
cumulativeProb += probs[idx] ?? 0;
if (cumulativeProb >= topP)
break;
}
candidateIndices = filtered;
}
let totalProb = 0;
for (const idx of candidateIndices) {
totalProb += probs[idx] ?? 0;
}
const r = Math.random() * totalProb;
let cumulative = 0;
for (const idx of candidateIndices) {
cumulative += probs[idx] ?? 0;
if (cumulative >= r) {
return idx;
}
}
return candidateIndices[0] ?? 0;
}
/**
* Set the chat template type
*/
setChatTemplate(templateType) {
this.chatTemplateType = templateType;
}
/**
* Apply chat template to messages
*/
applyChatTemplate(messages, options) {
const templateType = options?.templateType ?? this.chatTemplateType;
switch (templateType) {
case "chatml":
return this.applyChatMLTemplate(messages);
case "llama2":
return this.applyLlama2Template(messages);
case "llama3":
return this.applyLlama3Template(messages);
case "mistral":
return this.applyMistralTemplate(messages);
case "phi3":
return this.applyPhi3Template(messages);
case "alpaca":
return this.applyAlpacaTemplate(messages);
case "vicuna":
return this.applyVicunaTemplate(messages);
case "custom":
return this.applyCustomTemplate(messages, options?.customTemplate ?? {});
default:
return this.applyChatMLTemplate(messages);
}
}
/**
* ChatML template (used by many models including Qwen, Yi)
*/
applyChatMLTemplate(messages) {
let prompt = "";
for (const msg of messages) {
prompt += `<|im_start|>${msg.role}
${msg.content}<|im_end|>
`;
}
prompt += "<|im_start|>assistant\n";
return prompt;
}
/**
* Llama 2 template
*/
applyLlama2Template(messages) {
let prompt = "";
let systemMsg = "";
for (const msg of messages) {
if (msg.role === "system") {
systemMsg = msg.content;
} else if (msg.role === "user") {
if (systemMsg) {
prompt += `[INST] <>
${systemMsg}
< >
${msg.content} [/INST]`;
systemMsg = "";
} else {
prompt += `[INST] ${msg.content} [/INST]`;
}
} else if (msg.role === "assistant") {
prompt += ` ${msg.content} `;
}
}
return prompt;
}
/**
* Llama 3 template
*/
applyLlama3Template(messages) {
let prompt = "<|begin_of_text|>";
for (const msg of messages) {
prompt += `<|start_header_id|>${msg.role}<|end_header_id|>
${msg.content}<|eot_id|>`;
}
prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n";
return prompt;
}
/**
* Mistral template
*/
applyMistralTemplate(messages) {
let prompt = "";
for (const msg of messages) {
if (msg.role === "user") {
prompt += `[INST] ${msg.content} [/INST]`;
} else if (msg.role === "assistant") {
prompt += ` ${msg.content} `;
} else if (msg.role === "system") {
prompt += `[INST] ${msg.content}
`;
}
}
return prompt;
}
/**
* Phi-3 template
*/
applyPhi3Template(messages) {
let prompt = "";
for (const msg of messages) {
prompt += `<|${msg.role}|>
${msg.content}<|end|>
`;
}
prompt += "<|assistant|>\n";
return prompt;
}
/**
* Alpaca template
*/
applyAlpacaTemplate(messages) {
let prompt = "";
let instruction = "";
let input = "";
for (const msg of messages) {
if (msg.role === "system") {
instruction = msg.content;
} else if (msg.role === "user") {
input = msg.content;
}
}
if (instruction) {
prompt = `### Instruction:
${instruction}
`;
}
if (input) {
prompt += `### Input:
${input}
`;
}
prompt += "### Response:\n";
return prompt;
}
/**
* Vicuna template
*/
applyVicunaTemplate(messages) {
let prompt = "";
for (const msg of messages) {
if (msg.role === "system") {
prompt += `${msg.content}
`;
} else if (msg.role === "user") {
prompt += `USER: ${msg.content}
`;
} else if (msg.role === "assistant") {
prompt += `ASSISTANT: ${msg.content}
`;
}
}
prompt += "ASSISTANT:";
return prompt;
}
/**
* Custom template
*/
applyCustomTemplate(messages, template) {
const { systemPrefix = "", systemSuffix = "\n", userPrefix = "User: ", userSuffix = "\n", assistantPrefix = "Assistant: ", assistantSuffix = "\n", separator = "" } = template;
let prompt = "";
for (let i = 0; i < messages.length; i++) {
const msg = messages[i];
if (i > 0)
prompt += separator;
switch (msg.role) {
case "system":
prompt += `${systemPrefix}${msg.content}${systemSuffix}`;
break;
case "user":
prompt += `${userPrefix}${msg.content}${userSuffix}`;
break;
case "assistant":
prompt += `${assistantPrefix}${msg.content}${assistantSuffix}`;
break;
}
}
prompt += assistantPrefix;
return prompt;
}
/**
* Chat with the model
*
* @example
* ```typescript
* const generator = await pipeline('text-generation', 'model');
*
* // Single turn
* const response = await generator.chat('Hello, how are you?');
*
* // Multi-turn with history
* const response1 = await generator.chat('What is AI?');
* const response2 = await generator.chat('Can you give an example?');
*
* // With system prompt
* const response = await generator.chat('Hello', {
* systemPrompt: 'You are a helpful assistant.',
* });
* ```
*/
async chat(userMessage, options) {
if (options?.systemPrompt && (this.conversationHistory.length === 0 || this.conversationHistory[0]?.role !== "system")) {
this.conversationHistory.unshift({
role: "system",
content: options.systemPrompt
});
}
this.conversationHistory.push({
role: "user",
content: userMessage
});
const prompt = this.applyChatTemplate(this.conversationHistory, options);
const result = await this.run(prompt, {
...options,
stopSequences: [
...options?.stopSequences ?? [],
"<|im_end|>",
"<|end|>",
"<|eot_id|>",
" ",
"\n\nUser:",
"\n\nHuman:"
]
});
const response = Array.isArray(result) ? result[0] : result;
this.conversationHistory.push({
role: "assistant",
content: response.generatedText.trim()
});
return response;
}
/**
* Stream chat response
*/
async *chatStream(userMessage, options) {
if (options?.systemPrompt && (this.conversationHistory.length === 0 || this.conversationHistory[0]?.role !== "system")) {
this.conversationHistory.unshift({
role: "system",
content: options.systemPrompt
});
}
this.conversationHistory.push({
role: "user",
content: userMessage
});
const prompt = this.applyChatTemplate(this.conversationHistory, options);
let fullResponse = "";
for await (const event of this.stream(prompt, {
...options,
stopSequences: [
...options?.stopSequences ?? [],
"<|im_end|>",
"<|end|>",
"<|eot_id|>",
""
]
})) {
fullResponse = event.generatedText;
yield event;
}
this.conversationHistory.push({
role: "assistant",
content: fullResponse.trim()
});
}
/**
* Get conversation history
*/
getConversationHistory() {
return [...this.conversationHistory];
}
/**
* Set conversation history
*/
setConversationHistory(messages) {
this.conversationHistory = [...messages];
}
/**
* Clear conversation history
*/
clearConversation() {
this.conversationHistory = [];
}
/**
* Remove last exchange (user message + assistant response)
*/
undoLastExchange() {
if (this.conversationHistory.length > 0 && this.conversationHistory[this.conversationHistory.length - 1]?.role === "assistant") {
this.conversationHistory.pop();
}
if (this.conversationHistory.length > 0 && this.conversationHistory[this.conversationHistory.length - 1]?.role === "user") {
this.conversationHistory.pop();
}
}
};
function createTextGenerationPipeline(config) {
return new TextGenerationPipeline(config);
}
// dist/pipelines/object-detection.js
init_tensor();
init_model_loader();
var DEFAULT_MODELS4 = {
model: "https://huggingface.co/Xenova/yolos-tiny/resolve/main/onnx/model_quantized.onnx"
};
var COCO_LABELS = [
"person",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic light",
"fire hydrant",
"stop sign",
"parking meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports ball",
"kite",
"baseball bat",
"baseball glove",
"skateboard",
"surfboard",
"tennis racket",
"bottle",
"wine glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted plant",
"bed",
"dining table",
"toilet",
"tv",
"laptop",
"mouse",
"remote",
"keyboard",
"cell phone",
"microwave",
"oven",
"toaster",
"sink",
"refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy bear",
"hair drier",
"toothbrush"
];
var ObjectDetectionPipeline = class extends BasePipeline {
constructor(config, labels) {
super(config ?? {
task: "object-detection",
model: "default"
});
__publicField(this, "preprocessor");
__publicField(this, "onnxModel", null);
__publicField(this, "labels");
__publicField(this, "modelUrl");
this.labels = labels ?? COCO_LABELS;
this.modelUrl = config?.model && config.model !== "default" ? config.model : DEFAULT_MODELS4.model;
this.preprocessor = new ImagePreprocessor({
width: 640,
height: 640,
mean: [0.485, 0.456, 0.406],
std: [0.229, 0.224, 0.225],
channelFormat: "CHW"
});
}
async initialize() {
await super.initialize();
if (!this.onnxModel) {
const modelData = await loadModelData(this.modelUrl, { cache: this.config.cache ?? true });
this.onnxModel = await loadModelFromBuffer(modelData);
}
}
setLabels(labels) {
this.labels = labels;
}
async run(input, options) {
await this.initialize();
const tensorInputs = await this.preprocess(input);
const outputs = await this.runModelInference(tensorInputs);
return this.postprocess(outputs, options);
}
async preprocess(input) {
const inputs = Array.isArray(input) ? input : [input];
if (inputs.length === 1) {
const tensor2 = await this.preprocessor.process(inputs[0]);
return [new EdgeFlowTensor(tensor2.toFloat32Array(), [1, ...tensor2.shape], "float32")];
}
return [await this.preprocessor.processBatch(inputs)];
}
async runModelInference(inputs) {
const outputs = await runInference(this.onnxModel, inputs);
return outputs;
}
async postprocess(outputs, options) {
const opts = options ?? {};
const threshold = opts.threshold ?? 0.5;
const topK = opts.topK ?? 100;
const nms = opts.nms ?? true;
const iouThreshold = opts.iouThreshold ?? 0.5;
if (!outputs[0]) {
return [];
}
const outputData = outputs[0].toFloat32Array();
const shape = [...outputs[0].shape];
const detections = this.parseDetections(outputData, shape, threshold);
let filtered = nms ? this.nonMaxSuppression(detections, iouThreshold) : detections;
filtered.sort((a, b) => b.score - a.score);
filtered = filtered.slice(0, topK);
return filtered;
}
parseDetections(data, shape, threshold) {
const detections = [];
const numBoxes = shape[1] ?? 0;
const boxSize = shape[2] ?? 0;
if (boxSize >= 5) {
const numClasses = boxSize - 5;
for (let i = 0; i < numBoxes; i++) {
const offset = i * boxSize;
const objectness = data[offset + 4] ?? 0;
if (objectness < threshold)
continue;
let maxClassScore = 0;
let maxClassIdx = 0;
for (let c = 0; c < numClasses; c++) {
const score = data[offset + 5 + c] ?? 0;
if (score > maxClassScore) {
maxClassScore = score;
maxClassIdx = c;
}
}
const confidence = objectness * maxClassScore;
if (confidence < threshold)
continue;
const x = data[offset] ?? 0;
const y = data[offset + 1] ?? 0;
const w = data[offset + 2] ?? 0;
const h = data[offset + 3] ?? 0;
detections.push({
label: this.labels[maxClassIdx] ?? `class_${maxClassIdx}`,
score: confidence,
classId: maxClassIdx,
box: {
x: Math.max(0, x - w / 2),
y: Math.max(0, y - h / 2),
width: w,
height: h
},
boxNormalized: {
x: Math.max(0, x - w / 2),
y: Math.max(0, y - h / 2),
width: w,
height: h
}
});
}
} else if (boxSize === 4) {
for (let i = 0; i < numBoxes; i++) {
const offset = i * boxSize;
const x1 = data[offset] ?? 0;
const y1 = data[offset + 1] ?? 0;
const x2 = data[offset + 2] ?? 0;
const y2 = data[offset + 3] ?? 0;
detections.push({
label: this.labels[0] ?? "object",
score: 1,
classId: 0,
box: {
x: x1,
y: y1,
width: x2 - x1,
height: y2 - y1
},
boxNormalized: {
x: x1,
y: y1,
width: x2 - x1,
height: y2 - y1
}
});
}
}
return detections;
}
nonMaxSuppression(detections, iouThreshold) {
if (detections.length === 0)
return [];
const sorted = [...detections].sort((a, b) => b.score - a.score);
const selected = [];
const active = new Array(sorted.length).fill(true);
for (let i = 0; i < sorted.length; i++) {
if (!active[i])
continue;
const current = sorted[i];
selected.push(current);
for (let j = i + 1; j < sorted.length; j++) {
if (!active[j])
continue;
const other = sorted[j];
if (current.classId !== other.classId)
continue;
const iou = this.computeIoU(current.box, other.box);
if (iou > iouThreshold) {
active[j] = false;
}
}
}
return selected;
}
computeIoU(a, b) {
const xOverlap = Math.max(0, Math.min(a.x + a.width, b.x + b.width) - Math.max(a.x, b.x));
const yOverlap = Math.max(0, Math.min(a.y + a.height, b.y + b.height) - Math.max(a.y, b.y));
const intersection = xOverlap * yOverlap;
const aArea = a.width * a.height;
const bArea = b.width * b.height;
const union = aArea + bArea - intersection;
return union > 0 ? intersection / union : 0;
}
};
registerPipeline("object-detection", (config) => new ObjectDetectionPipeline(config));
// dist/pipelines/automatic-speech-recognition.js
init_tensor();
init_model_loader();
var DEFAULT_MODELS5 = {
encoder: "https://huggingface.co/Xenova/whisper-tiny/resolve/main/onnx/encoder_model_quantized.onnx",
decoder: "https://huggingface.co/Xenova/whisper-tiny/resolve/main/onnx/decoder_model_merged_quantized.onnx",
tokenizer: "https://huggingface.co/Xenova/whisper-tiny/resolve/main/tokenizer.json"
};
var SOT_TOKEN = 50258;
var TRANSLATE_TOKEN = 50358;
var TRANSCRIBE_TOKEN = 50359;
var EOT_TOKEN = 50257;
var NO_TIMESTAMPS_TOKEN = 50363;
var EN_TOKEN = 50259;
var MAX_DECODER_TOKENS = 448;
var AutomaticSpeechRecognitionPipeline = class extends BasePipeline {
constructor(config) {
super(config ?? {
task: "automatic-speech-recognition",
model: "default"
});
__publicField(this, "audioPreprocessor");
__publicField(this, "tokenizer", null);
__publicField(this, "encoderModel", null);
__publicField(this, "decoderModel", null);
__publicField(this, "encoderUrl");
__publicField(this, "decoderUrl");
__publicField(this, "tokenizerUrl");
this.encoderUrl = DEFAULT_MODELS5.encoder;
this.decoderUrl = DEFAULT_MODELS5.decoder;
this.tokenizerUrl = DEFAULT_MODELS5.tokenizer;
this.audioPreprocessor = new AudioPreprocessor({
sampleRate: 16e3,
nMels: 80,
nFft: 400,
hopLength: 160,
maxDuration: 30
});
}
async initialize() {
await super.initialize();
if (!this.tokenizer) {
this.tokenizer = await Tokenizer.fromUrl(this.tokenizerUrl);
}
if (!this.encoderModel) {
const data = await loadModelData(this.encoderUrl, { cache: this.config.cache ?? true });
this.encoderModel = await loadModelFromBuffer(data);
}
if (!this.decoderModel) {
const data = await loadModelData(this.decoderUrl, { cache: this.config.cache ?? true });
this.decoderModel = await loadModelFromBuffer(data);
}
}
setTokenizer(tokenizer) {
this.tokenizer = tokenizer;
}
async run(input, options) {
await this.initialize();
const isBatch = Array.isArray(input);
const inputs = isBatch ? input : [input];
const opts = options ?? {};
const results = [];
for (const audio of inputs) {
const result = await this.transcribeSingle(audio, opts);
results.push(result);
}
return isBatch ? results : results[0];
}
async transcribeSingle(audio, options) {
const startTime = performance.now();
const melTensor = await this.audioPreprocessor.process(audio);
const melInput = new EdgeFlowTensor(melTensor.toFloat32Array(), [1, ...melTensor.shape], "float32");
const encoderOutputs = await runInference(this.encoderModel, [melInput]);
const encoderHidden = encoderOutputs[0];
const task = options.task ?? "transcribe";
const initialTokens = this.buildInitialTokens(task, options.language);
const generatedTokens = await this.autoregressiveDecode(encoderHidden, initialTokens);
const text = this.tokenizer.decode(generatedTokens, true);
const result = {
text: text.trim(),
processingTime: performance.now() - startTime
};
if (options.returnTimestamps) {
result.chunks = this.extractTimestamps(generatedTokens, text);
}
return result;
}
buildInitialTokens(task, language) {
const tokens = [SOT_TOKEN];
tokens.push(language ? this.getLanguageToken(language) : EN_TOKEN);
tokens.push(task === "translate" ? TRANSLATE_TOKEN : TRANSCRIBE_TOKEN);
tokens.push(NO_TIMESTAMPS_TOKEN);
return tokens;
}
getLanguageToken(language) {
const langMap = {
en: 50259,
zh: 50260,
de: 50261,
es: 50262,
ru: 50263,
ko: 50264,
fr: 50265,
ja: 50266,
pt: 50267,
tr: 50268,
pl: 50269,
ca: 50270,
nl: 50271,
ar: 50272,
sv: 50273,
it: 50274,
id: 50275,
hi: 50276,
fi: 50277,
vi: 50278
};
return langMap[language.toLowerCase()] ?? EN_TOKEN;
}
/**
* Autoregressive decoder loop similar to text-generation.
* Feeds encoder hidden states + growing token sequence to decoder.
*/
async autoregressiveDecode(encoderHidden, initialTokens) {
const tokens = [...initialTokens];
for (let step = 0; step < MAX_DECODER_TOKENS; step++) {
const decoderInputIds = new EdgeFlowTensor(BigInt64Array.from(tokens.map((t) => BigInt(t))), [1, tokens.length], "int64");
const namedInputs = /* @__PURE__ */ new Map();
namedInputs.set("input_ids", decoderInputIds);
namedInputs.set("encoder_hidden_states", encoderHidden);
const decoderOutputs = await runInferenceNamed(this.decoderModel, namedInputs);
const logits = decoderOutputs[0].toFloat32Array();
const vocabSize = logits.length / tokens.length;
const lastTokenLogits = logits.slice((tokens.length - 1) * vocabSize);
let bestId = 0;
let bestVal = lastTokenLogits[0] ?? -Infinity;
for (let i = 1; i < lastTokenLogits.length; i++) {
if ((lastTokenLogits[i] ?? -Infinity) > bestVal) {
bestVal = lastTokenLogits[i] ?? -Infinity;
bestId = i;
}
}
if (bestId === EOT_TOKEN)
break;
tokens.push(bestId);
}
return tokens.slice(initialTokens.length);
}
extractTimestamps(_tokenIds, text) {
const words = text.split(/\s+/).filter((w) => w.length > 0);
const chunks = [];
const wordsPerSecond = 2.5;
let chunkText = "";
let chunkStart = 0;
for (let i = 0; i < words.length; i++) {
chunkText += (chunkText ? " " : "") + words[i];
if ((i + 1) % 5 === 0 || i === words.length - 1) {
const duration = chunkText.split(/\s+/).length / wordsPerSecond;
chunks.push({
text: chunkText,
start: chunkStart,
end: chunkStart + duration
});
chunkStart = chunkStart + duration;
chunkText = "";
}
}
return chunks;
}
async processLongAudio(audio, options = {}) {
const chunkDuration = options.chunkDuration ?? 30;
const chunkOverlap = options.chunkOverlap ?? 5;
const rawTensor = await this.audioPreprocessor.processRaw(audio);
const audioData = rawTensor.toFloat32Array();
const sampleRate = 16e3;
const chunkSamples = chunkDuration * sampleRate;
const overlapSamples = chunkOverlap * sampleRate;
const stepSamples = chunkSamples - overlapSamples;
const chunks = [];
for (let start = 0; start < audioData.length; start += stepSamples) {
const end = Math.min(start + chunkSamples, audioData.length);
const chunkAudio = audioData.slice(start, end);
const chunkResult = await this.run(new Float32Array(chunkAudio), options);
if (chunkResult.chunks) {
const timeOffset = start / sampleRate;
chunkResult.chunks = chunkResult.chunks.map((c) => ({
...c,
start: c.start + timeOffset,
end: c.end + timeOffset
}));
}
chunks.push(chunkResult);
}
const mergedText = chunks.map((c) => c.text).join(" ");
const mergedChunks = chunks.flatMap((c) => c.chunks ?? []);
return {
text: mergedText,
chunks: mergedChunks
};
}
async preprocess(input) {
const inputs = Array.isArray(input) ? input : [input];
const tensors = await Promise.all(inputs.map((audio) => this.audioPreprocessor.process(audio)));
if (tensors.length === 1) {
const t = tensors[0];
return [new EdgeFlowTensor(t.toFloat32Array(), [1, ...t.shape], "float32")];
}
return tensors;
}
async postprocess(outputs, options) {
const opts = options ?? {};
const returnTimestamps = opts.returnTimestamps ?? false;
if (!outputs[0]) {
return { text: "" };
}
const outputData = outputs[0].toFloat32Array();
const shape = outputs[0].shape;
const text = this.decodeOutput(outputData, shape);
const result = { text };
if (returnTimestamps) {
result.chunks = this.extractTimestamps([], text);
}
return result;
}
decodeOutput(data, shape) {
const seqLen = shape[1] ?? data.length;
const vocabSize = shape[2] ?? 1;
const tokenIds = [];
if (vocabSize > 1) {
for (let i = 0; i < seqLen; i++) {
const offset = i * vocabSize;
let maxIdx = 0;
let maxVal = data[offset] ?? -Infinity;
for (let j = 1; j < vocabSize; j++) {
if ((data[offset + j] ?? -Infinity) > maxVal) {
maxVal = data[offset + j] ?? -Infinity;
maxIdx = j;
}
}
tokenIds.push(maxIdx);
}
} else {
for (let i = 0; i < data.length; i++) {
tokenIds.push(Math.round(data[i] ?? 0));
}
}
if (this.tokenizer) {
return this.tokenizer.decode(tokenIds, true);
}
return tokenIds.join(" ");
}
};
registerPipeline("automatic-speech-recognition", (config) => new AutomaticSpeechRecognitionPipeline(config));
// dist/pipelines/zero-shot-classification.js
init_tensor();
init_model_loader();
var DEFAULT_MODELS6 = {
model: "https://huggingface.co/Xenova/nli-deberta-v3-small/resolve/main/onnx/model_quantized.onnx",
tokenizer: "https://huggingface.co/Xenova/nli-deberta-v3-small/resolve/main/tokenizer.json"
};
var ENTAILMENT_IDX = 2;
var ZeroShotClassificationPipeline = class extends BasePipeline {
constructor(config) {
super(config ?? {
task: "zero-shot-classification",
model: "default"
});
__publicField(this, "tokenizer", null);
__publicField(this, "onnxModel", null);
__publicField(this, "hypothesisTemplate", "This text is about {label}.");
__publicField(this, "modelUrl");
__publicField(this, "tokenizerUrl");
this.modelUrl = config?.model && config.model !== "default" ? config.model : DEFAULT_MODELS6.model;
this.tokenizerUrl = DEFAULT_MODELS6.tokenizer;
}
async initialize() {
await super.initialize();
if (!this.tokenizer) {
this.tokenizer = await Tokenizer.fromUrl(this.tokenizerUrl);
}
if (!this.onnxModel) {
const modelData = await loadModelData(this.modelUrl, { cache: this.config.cache ?? true });
this.onnxModel = await loadModelFromBuffer(modelData);
}
}
setTokenizer(tokenizer) {
this.tokenizer = tokenizer;
}
async classify(text, candidateLabels, options) {
return this.run({ text, candidateLabels }, options);
}
async run(input, options) {
await this.initialize();
const { text, candidateLabels } = input;
const opts = options ?? {};
const texts = Array.isArray(text) ? text : [text];
const template = opts.hypothesisTemplate ?? this.hypothesisTemplate;
const multiLabel = opts.multiLabel ?? false;
const results = await Promise.all(texts.map((t) => this.classifySingle(t, candidateLabels, template, multiLabel)));
return Array.isArray(text) ? results : results[0];
}
async classifySingle(text, candidateLabels, template, multiLabel) {
const startTime = performance.now();
const hypotheses = candidateLabels.map((label) => template.replace("{label}", label));
const scores = [];
for (const hypothesis of hypotheses) {
const score = await this.scoreHypothesis(text, hypothesis);
scores.push(score);
}
let normalizedScores;
if (multiLabel) {
normalizedScores = scores.map((s) => 1 / (1 + Math.exp(-s)));
} else {
const tensor2 = new EdgeFlowTensor(new Float32Array(scores), [scores.length], "float32");
normalizedScores = Array.from(softmax(tensor2).toFloat32Array());
}
const indexed = candidateLabels.map((label, i) => ({
label,
score: normalizedScores[i] ?? 0
}));
indexed.sort((a, b) => b.score - a.score);
return {
sequence: text,
labels: indexed.map((i) => i.label),
scores: indexed.map((i) => i.score),
processingTime: performance.now() - startTime
};
}
/**
* Score a single hypothesis using the real NLI ONNX model.
* Returns the entailment logit.
*/
async scoreHypothesis(premise, hypothesis) {
const encoded = this.tokenizer.encode(premise, {
textPair: hypothesis,
addSpecialTokens: true,
maxLength: 512,
truncation: true,
returnAttentionMask: true
});
const inputIds = new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map((id) => BigInt(id))), [1, encoded.inputIds.length], "int64");
const attentionMask = new EdgeFlowTensor(BigInt64Array.from(encoded.attentionMask.map((m) => BigInt(m))), [1, encoded.attentionMask.length], "int64");
const namedInputs = /* @__PURE__ */ new Map();
namedInputs.set("input_ids", inputIds);
namedInputs.set("attention_mask", attentionMask);
const outputs = await runInferenceNamed(this.onnxModel, namedInputs);
const logits = outputs[0].toFloat32Array();
return logits[ENTAILMENT_IDX] ?? 0;
}
async preprocess(input) {
const { text, candidateLabels } = input;
const firstText = Array.isArray(text) ? text[0] ?? "" : text;
const firstLabel = candidateLabels[0] ?? "";
const encoded = this.tokenizer.encode(firstText, {
textPair: this.hypothesisTemplate.replace("{label}", firstLabel),
addSpecialTokens: true,
maxLength: 512
});
return [new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map((id) => BigInt(id))), [1, encoded.inputIds.length], "int64")];
}
async postprocess(_outputs, _options) {
return {
sequence: "",
labels: [],
scores: []
};
}
};
registerPipeline("zero-shot-classification", (config) => new ZeroShotClassificationPipeline(config));
// dist/pipelines/question-answering.js
init_tensor();
init_model_loader();
var DEFAULT_MODELS7 = {
model: "https://huggingface.co/Xenova/distilbert-base-cased-distilled-squad/resolve/main/onnx/model_quantized.onnx",
tokenizer: "https://huggingface.co/Xenova/distilbert-base-cased-distilled-squad/resolve/main/tokenizer.json"
};
var QuestionAnsweringPipeline = class extends BasePipeline {
constructor(config) {
super(config ?? {
task: "question-answering",
model: "default"
});
__publicField(this, "tokenizer", null);
__publicField(this, "onnxModel", null);
__publicField(this, "modelUrl");
__publicField(this, "tokenizerUrl");
this.modelUrl = config?.model && config.model !== "default" ? config.model : DEFAULT_MODELS7.model;
this.tokenizerUrl = DEFAULT_MODELS7.tokenizer;
}
async initialize() {
await super.initialize();
if (!this.tokenizer) {
this.tokenizer = await Tokenizer.fromUrl(this.tokenizerUrl);
}
if (!this.onnxModel) {
const modelData = await loadModelData(this.modelUrl, { cache: this.config.cache ?? true });
this.onnxModel = await loadModelFromBuffer(modelData);
}
}
setTokenizer(tokenizer) {
this.tokenizer = tokenizer;
}
async run(input, options) {
await this.initialize();
const inputs = Array.isArray(input) ? input : [input];
const results = await Promise.all(inputs.map((i) => this.answerQuestion(i, options ?? {})));
return Array.isArray(input) ? results : results[0];
}
async answerQuestion(input, options) {
const startTime = performance.now();
const { question, context } = input;
const maxAnswerLength = options.maxAnswerLength ?? 30;
const encoded = this.tokenizer.encode(question, {
textPair: context,
addSpecialTokens: true,
maxLength: 512,
truncation: true,
returnAttentionMask: true,
returnTokenTypeIds: true
});
const inputIds = new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map((id) => BigInt(id))), [1, encoded.inputIds.length], "int64");
const attentionMask = new EdgeFlowTensor(BigInt64Array.from(encoded.attentionMask.map((m) => BigInt(m))), [1, encoded.attentionMask.length], "int64");
const namedInputs = /* @__PURE__ */ new Map();
namedInputs.set("input_ids", inputIds);
namedInputs.set("attention_mask", attentionMask);
const outputs = await runInferenceNamed(this.onnxModel, namedInputs);
if (outputs.length < 2) {
return { answer: "", score: 0, start: 0, end: 0, processingTime: performance.now() - startTime };
}
const startLogits = outputs[0].toFloat32Array();
const endLogits = outputs[1].toFloat32Array();
const seqLen = startLogits.length;
const startProbs = softmax(new EdgeFlowTensor(new Float32Array(startLogits), [seqLen], "float32")).toFloat32Array();
const endProbs = softmax(new EdgeFlowTensor(new Float32Array(endLogits), [seqLen], "float32")).toFloat32Array();
let bestStartIdx = 0;
let bestEndIdx = 0;
let bestScore = 0;
for (let s = 0; s < seqLen; s++) {
for (let e = s; e < Math.min(s + maxAnswerLength, seqLen); e++) {
const score = (startProbs[s] ?? 0) * (endProbs[e] ?? 0);
if (score > bestScore) {
bestScore = score;
bestStartIdx = s;
bestEndIdx = e;
}
}
}
const answerTokenIds = encoded.inputIds.slice(bestStartIdx, bestEndIdx + 1);
const answer = this.tokenizer.decode(answerTokenIds, true);
const charStart = this.tokenOffsetToCharOffset(context, question, encoded.inputIds, bestStartIdx);
const charEnd = this.tokenOffsetToCharOffset(context, question, encoded.inputIds, bestEndIdx) + 1;
return {
answer: answer || "",
score: bestScore,
start: charStart,
end: charEnd,
processingTime: performance.now() - startTime
};
}
tokenOffsetToCharOffset(context, _question, inputIds, tokenIdx) {
const decoded = this.tokenizer.decode(inputIds.slice(0, tokenIdx + 1), true);
const contextStart = context.indexOf(decoded.trim().split(" ").pop() ?? "");
return contextStart >= 0 ? contextStart : 0;
}
async preprocess(input) {
const qaInput = Array.isArray(input) ? input[0] : input;
const encoded = this.tokenizer.encode(qaInput.question, {
textPair: qaInput.context,
addSpecialTokens: true,
maxLength: 512,
truncation: true,
returnAttentionMask: true,
returnTokenTypeIds: true
});
return [
new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map((id) => BigInt(id))), [1, encoded.inputIds.length], "int64"),
new EdgeFlowTensor(BigInt64Array.from(encoded.attentionMask.map((m) => BigInt(m))), [1, encoded.attentionMask.length], "int64")
];
}
async postprocess(outputs, _options) {
if (outputs.length < 2) {
return { answer: "", score: 0, start: 0, end: 0 };
}
const startLogits = outputs[0].toFloat32Array();
const endLogits = outputs[1].toFloat32Array();
const seqLen = startLogits.length;
const startProbs = softmax(new EdgeFlowTensor(startLogits, [seqLen], "float32")).toFloat32Array();
const endProbs = softmax(new EdgeFlowTensor(endLogits, [seqLen], "float32")).toFloat32Array();
let bestStart = 0;
let bestEnd = 0;
let bestScore = 0;
for (let start = 0; start < seqLen; start++) {
for (let end = start; end < Math.min(start + 30, seqLen); end++) {
const score = (startProbs[start] ?? 0) * (endProbs[end] ?? 0);
if (score > bestScore) {
bestScore = score;
bestStart = start;
bestEnd = end;
}
}
}
return {
answer: "",
score: bestScore,
start: bestStart,
end: bestEnd
};
}
};
registerPipeline("question-answering", (config) => new QuestionAnsweringPipeline(config));
// dist/pipelines/image-segmentation.js
init_tensor();
var DEFAULT_SAM_MODELS = {
encoder: "https://huggingface.co/Xenova/slimsam-77-uniform/resolve/main/onnx/vision_encoder_quantized.onnx",
decoder: "https://huggingface.co/Xenova/slimsam-77-uniform/resolve/main/onnx/prompt_encoder_mask_decoder_quantized.onnx"
};
var ImageSegmentationPipeline = class extends BasePipeline {
constructor(config) {
super(config);
__publicField(this, "encoderModel", null);
__publicField(this, "decoderModel", null);
__publicField(this, "imageEmbedding", null);
__publicField(this, "imagePositionalEmbedding", null);
__publicField(this, "currentImageSize", null);
__publicField(this, "resizedImageSize", null);
__publicField(this, "inputSize", 1024);
// SAM default input size
__publicField(this, "modelsLoaded", false);
// Custom model URLs
__publicField(this, "encoderUrl");
__publicField(this, "decoderUrl");
this.encoderUrl = DEFAULT_SAM_MODELS.encoder;
this.decoderUrl = DEFAULT_SAM_MODELS.decoder;
}
/**
* Check if models are loaded
*/
get isModelsLoaded() {
return this.modelsLoaded;
}
/**
* Set custom model URLs
*/
setModelUrls(encoder, decoder) {
this.encoderUrl = encoder;
this.decoderUrl = decoder;
}
/**
* Load both encoder and decoder models with progress callback
*/
async loadModels(onProgress) {
if (this.modelsLoaded)
return;
onProgress?.({ model: "encoder", loaded: 0, total: 100, progress: 0 });
const encoderData = await this.fetchModelWithProgress(this.encoderUrl, (loaded, total) => {
onProgress?.({
model: "encoder",
loaded,
total,
progress: Math.round(loaded / total * 100)
});
});
this.encoderModel = await loadModelFromBuffer(encoderData, {
runtime: "wasm"
// Uses ONNXRuntime which auto-detects WebGPU internally
});
onProgress?.({ model: "decoder", loaded: 0, total: 100, progress: 0 });
const decoderData = await this.fetchModelWithProgress(this.decoderUrl, (loaded, total) => {
onProgress?.({
model: "decoder",
loaded,
total,
progress: Math.round(loaded / total * 100)
});
});
this.decoderModel = await loadModelFromBuffer(decoderData, {
runtime: "wasm"
// Uses ONNXRuntime which auto-detects WebGPU internally
});
this.modelsLoaded = true;
}
/**
* Fetch model with progress tracking
*/
async fetchModelWithProgress(url, onProgress) {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to fetch model: ${response.status} ${response.statusText}`);
}
const contentLength = response.headers.get("content-length");
const total = contentLength ? parseInt(contentLength, 10) : 0;
if (!response.body) {
const buffer2 = await response.arrayBuffer();
onProgress(buffer2.byteLength, buffer2.byteLength);
return buffer2;
}
const reader = response.body.getReader();
const chunks = [];
let loaded = 0;
while (true) {
const { done, value } = await reader.read();
if (done)
break;
chunks.push(value);
loaded += value.length;
onProgress(loaded, total || loaded);
}
const buffer = new Uint8Array(loaded);
let offset = 0;
for (const chunk of chunks) {
buffer.set(chunk, offset);
offset += chunk.length;
}
return buffer.buffer;
}
/**
* Initialize pipeline (override to skip default model loading)
*/
async initialize() {
if (this.isReady)
return;
this.isReady = true;
}
/**
* Load encoder model (processes the image once)
*/
async loadEncoder(modelUrl) {
this.encoderModel = await loadModel(modelUrl, {
runtime: "wasm"
});
}
/**
* Load decoder model (processes prompts to generate masks)
*/
async loadDecoder(modelUrl) {
this.decoderModel = await loadModel(modelUrl, {
runtime: "wasm"
});
}
/**
* Set and encode the image (call once per image)
*/
async setImage(image) {
if (!this.modelsLoaded) {
throw new Error("Models not loaded. Call loadModels() first.");
}
const imageData = await this.loadImage(image);
this.currentImageSize = {
width: imageData.width,
height: imageData.height
};
const { tensor: inputTensor, resizedSize } = this.preprocessImage(imageData);
this.resizedImageSize = resizedSize;
if (this.encoderModel) {
const outputs = await runInference(this.encoderModel, [inputTensor]);
this.imageEmbedding = outputs[0];
this.imagePositionalEmbedding = outputs[1];
console.log("[SAM] Encoder outputs:", outputs.length);
console.log("[SAM] image_embeddings shape:", this.imageEmbedding.shape);
if (this.imagePositionalEmbedding) {
console.log("[SAM] image_positional_embeddings shape:", this.imagePositionalEmbedding.shape);
}
} else {
throw new Error("Encoder model not loaded");
}
}
/**
* Segment the image with given prompts
*/
async segment(options = {}) {
if (!this.imageEmbedding || !this.currentImageSize || !this.resizedImageSize) {
throw new Error("No image set. Call setImage() first.");
}
if (!this.decoderModel) {
throw new Error("Decoder model not loaded");
}
const startTime = performance.now();
const { points = [], boxes = [], maskThreshold = 0, returnAllMasks = false } = options;
const decoderInputs = this.prepareDecoderInputs(points, boxes);
decoderInputs.set("image_embeddings", this.imageEmbedding);
if (this.imagePositionalEmbedding) {
decoderInputs.set("image_positional_embeddings", this.imagePositionalEmbedding);
} else {
throw new Error("image_positional_embeddings not available from encoder");
}
const outputs = await runInferenceNamed(this.decoderModel, decoderInputs);
const masks = outputs[0];
const scores = outputs[1];
const result = this.postprocessMasks(masks, scores, maskThreshold, returnAllMasks);
result.processingTime = performance.now() - startTime;
return result;
}
/**
* Run segmentation (implements BasePipeline interface)
*/
async run(input, options) {
await this.setImage(input);
return this.segment(options);
}
/**
* Load image from various sources
*/
async loadImage(input) {
if (typeof input === "string") {
return this.loadImageFromUrl(input);
} else if (input instanceof HTMLImageElement) {
return this.imageElementToImageData(input);
} else if (input instanceof HTMLCanvasElement) {
return this.canvasToImageData(input);
} else if (input instanceof ImageData) {
return input;
} else if (typeof ImageBitmap !== "undefined" && input instanceof ImageBitmap) {
return this.imageBitmapToImageData(input);
}
throw new Error("Unsupported image input type");
}
/**
* Load image from URL
*/
async loadImageFromUrl(url) {
return new Promise((resolve, reject) => {
const img = new Image();
img.crossOrigin = "anonymous";
img.onload = () => {
const canvas = document.createElement("canvas");
canvas.width = img.width;
canvas.height = img.height;
const ctx = canvas.getContext("2d");
ctx.drawImage(img, 0, 0);
resolve(ctx.getImageData(0, 0, img.width, img.height));
};
img.onerror = reject;
img.src = url;
});
}
/**
* Convert HTMLImageElement to ImageData
*/
imageElementToImageData(img) {
const canvas = document.createElement("canvas");
canvas.width = img.naturalWidth || img.width;
canvas.height = img.naturalHeight || img.height;
const ctx = canvas.getContext("2d");
ctx.drawImage(img, 0, 0);
return ctx.getImageData(0, 0, canvas.width, canvas.height);
}
/**
* Convert canvas to ImageData
*/
canvasToImageData(canvas) {
const ctx = canvas.getContext("2d");
return ctx.getImageData(0, 0, canvas.width, canvas.height);
}
/**
* Convert ImageBitmap to ImageData
*/
imageBitmapToImageData(bitmap) {
const canvas = document.createElement("canvas");
canvas.width = bitmap.width;
canvas.height = bitmap.height;
const ctx = canvas.getContext("2d");
ctx.drawImage(bitmap, 0, 0);
return ctx.getImageData(0, 0, canvas.width, canvas.height);
}
/**
* Preprocess image for SAM
*/
preprocessImage(imageData) {
const { width, height } = imageData;
const scale = this.inputSize / Math.max(width, height);
const newWidth = Math.round(width * scale);
const newHeight = Math.round(height * scale);
const canvas = document.createElement("canvas");
canvas.width = this.inputSize;
canvas.height = this.inputSize;
const ctx = canvas.getContext("2d");
ctx.fillStyle = `rgb(123.675, 116.28, 103.53)`;
ctx.fillRect(0, 0, this.inputSize, this.inputSize);
const tempCanvas = document.createElement("canvas");
tempCanvas.width = width;
tempCanvas.height = height;
const tempCtx = tempCanvas.getContext("2d");
tempCtx.putImageData(imageData, 0, 0);
ctx.drawImage(tempCanvas, 0, 0, newWidth, newHeight);
const resizedData = ctx.getImageData(0, 0, this.inputSize, this.inputSize);
const tensorData = new Float32Array(3 * this.inputSize * this.inputSize);
const mean2 = [123.675, 116.28, 103.53];
const std = [58.395, 57.12, 57.375];
for (let i = 0; i < this.inputSize * this.inputSize; i++) {
const pixelIdx = i * 4;
tensorData[i] = (resizedData.data[pixelIdx] - mean2[0]) / std[0];
tensorData[this.inputSize * this.inputSize + i] = (resizedData.data[pixelIdx + 1] - mean2[1]) / std[1];
tensorData[2 * this.inputSize * this.inputSize + i] = (resizedData.data[pixelIdx + 2] - mean2[2]) / std[2];
}
return {
tensor: new EdgeFlowTensor(tensorData, [1, 3, this.inputSize, this.inputSize], "float32"),
resizedSize: { width: newWidth, height: newHeight }
};
}
/**
* Prepare decoder inputs (prompts) for SlimSAM
*
* SlimSAM prompt_encoder_mask_decoder expects these named inputs:
* - image_embeddings: [1, 256, 64, 64]
* - point_coords: [batch, num_points, 2]
* - point_labels: [batch, num_points]
* - mask_input: [batch, 1, 256, 256]
* - has_mask_input: [batch, 1]
* - orig_im_size: [2]
* - position_ids: [batch, num_points]
*/
prepareDecoderInputs(points, boxes) {
const { width: resizedW, height: resizedH } = this.resizedImageSize;
const scaleX = resizedW;
const scaleY = resizedH;
const allPoints = [];
const allLabels = [];
for (const point of points) {
allPoints.push(point.x * scaleX, point.y * scaleY);
allLabels.push(point.label);
}
for (const box of boxes) {
allPoints.push(box.x1 * scaleX, box.y1 * scaleY);
allLabels.push(2);
allPoints.push(box.x2 * scaleX, box.y2 * scaleY);
allLabels.push(3);
}
if (allPoints.length === 0) {
allPoints.push(resizedW / 2, resizedH / 2);
allLabels.push(1);
}
const numPoints = allLabels.length;
const inputs = /* @__PURE__ */ new Map();
inputs.set("input_points", new EdgeFlowTensor(new Float32Array(allPoints), [1, 1, numPoints, 2], "float32"));
inputs.set("input_labels", new EdgeFlowTensor(BigInt64Array.from(allLabels.map((l) => BigInt(l))), [1, 1, numPoints], "int64"));
return inputs;
}
/**
* Post-process masks from decoder output
*/
postprocessMasks(masks, scores, threshold, returnAllMasks) {
const { width, height } = this.currentImageSize;
const scoresData = scores.toFloat32Array();
const masksData = masks.toFloat32Array();
const numMasks = scoresData.length;
const maskShape = masks.shape;
const maskH = maskShape[2] ?? height;
const maskW = maskShape[3] ?? width;
let bestIdx = 0;
let bestScore = scoresData[0] ?? 0;
for (let i = 1; i < numMasks; i++) {
if ((scoresData[i] ?? 0) > bestScore) {
bestScore = scoresData[i] ?? 0;
bestIdx = i;
}
}
const outputMask = this.resizeMask(masksData, bestIdx, maskW, maskH, width, height, threshold);
const result = {
mask: outputMask,
width,
height,
score: bestScore
};
if (returnAllMasks && numMasks > 1) {
result.allMasks = [];
for (let m = 0; m < numMasks; m++) {
const mask = this.resizeMask(masksData, m, maskW, maskH, width, height, threshold);
result.allMasks.push({
mask,
score: scoresData[m] ?? 0
});
}
}
return result;
}
/**
* Resize mask from model output size to original image size
*/
resizeMask(masksData, maskIdx, srcW, srcH, dstW, dstH, threshold) {
const outputMask = new Uint8Array(dstW * dstH);
const maskOffset = maskIdx * srcW * srcH;
for (let y = 0; y < dstH; y++) {
for (let x = 0; x < dstW; x++) {
const srcX = x / dstW * srcW;
const srcY = y / dstH * srcH;
const x0 = Math.floor(srcX);
const x1 = Math.min(x0 + 1, srcW - 1);
const y0 = Math.floor(srcY);
const y1 = Math.min(y0 + 1, srcH - 1);
const xFrac = srcX - x0;
const yFrac = srcY - y0;
const v00 = masksData[maskOffset + y0 * srcW + x0] ?? 0;
const v01 = masksData[maskOffset + y0 * srcW + x1] ?? 0;
const v10 = masksData[maskOffset + y1 * srcW + x0] ?? 0;
const v11 = masksData[maskOffset + y1 * srcW + x1] ?? 0;
const value = v00 * (1 - xFrac) * (1 - yFrac) + v01 * xFrac * (1 - yFrac) + v10 * (1 - xFrac) * yFrac + v11 * xFrac * yFrac;
const sigmoid2 = 1 / (1 + Math.exp(-value));
outputMask[y * dstW + x] = sigmoid2 > threshold ? 255 : 0;
}
}
return outputMask;
}
/**
* Clear the current image embedding
*/
clearImage() {
this.imageEmbedding = null;
this.imagePositionalEmbedding = null;
this.currentImageSize = null;
this.resizedImageSize = null;
}
/**
* Preprocess (required by BasePipeline)
*/
async preprocess(input) {
const imageData = await this.loadImage(input);
const { tensor: tensor2 } = this.preprocessImage(imageData);
return [tensor2];
}
/**
* Postprocess (required by BasePipeline)
*/
async postprocess(_outputs, _options) {
return {
mask: new Uint8Array(0),
width: 0,
height: 0,
score: 0
};
}
/**
* Dispose resources
*/
dispose() {
super.dispose();
this.encoderModel?.dispose();
this.decoderModel?.dispose();
this.imageEmbedding = null;
this.imagePositionalEmbedding = null;
this.currentImageSize = null;
this.resizedImageSize = null;
this.modelsLoaded = false;
}
};
function createImageSegmentationPipeline(config = {}) {
return new ImageSegmentationPipeline({
task: "image-segmentation",
model: config.model ?? "slimsam",
runtime: config.runtime,
cache: config.cache ?? true,
quantization: config.quantization
});
}
registerPipeline("image-segmentation", (config) => new ImageSegmentationPipeline(config));
// dist/pipelines/index.js
async function pipeline(task, options) {
registerAllBackends();
const config = {
task,
model: options?.model ?? "default",
runtime: options?.runtime,
cache: options?.cache ?? true,
quantization: options?.quantization
};
let pipelineInstance;
switch (task) {
case "text-classification":
pipelineInstance = new TextClassificationPipeline(config, options?.labels);
break;
case "sentiment-analysis":
pipelineInstance = new SentimentAnalysisPipeline(config);
break;
case "feature-extraction":
pipelineInstance = new FeatureExtractionPipeline(config);
break;
case "image-classification":
pipelineInstance = new ImageClassificationPipeline(config, options?.labels);
break;
case "text-generation":
pipelineInstance = new TextGenerationPipeline(config);
break;
case "object-detection":
pipelineInstance = new ObjectDetectionPipeline(config, options?.labels);
break;
case "automatic-speech-recognition":
pipelineInstance = new AutomaticSpeechRecognitionPipeline(config);
break;
case "zero-shot-classification":
pipelineInstance = new ZeroShotClassificationPipeline(config);
break;
case "question-answering":
pipelineInstance = new QuestionAnsweringPipeline(config);
break;
case "image-segmentation":
pipelineInstance = new ImageSegmentationPipeline(config);
break;
default: {
const pluginEntry = getPluginPipeline(task);
if (pluginEntry) {
pipelineInstance = pluginEntry.factory(config);
break;
}
throw new Error(`Unknown pipeline task: "${task}". Register a plugin with registerPlugin() to add custom pipeline tasks.`);
}
}
await pipelineInstance.initialize();
return pipelineInstance;
}
async function createPipelines(tasks, options) {
const pipelines = await Promise.all(tasks.map((task) => pipeline(task, options)));
const result = {};
for (let i = 0; i < tasks.length; i++) {
const task = tasks[i];
result[task] = pipelines[i];
}
return result;
}
// dist/core/composer.js
function compose(stages) {
if (stages.length === 0) {
throw new Error("[edgeFlow.js] compose() requires at least one stage");
}
let pipelineInstances = null;
async function ensureInitialised() {
if (pipelineInstances)
return pipelineInstances;
pipelineInstances = await Promise.all(stages.map((stage) => pipeline(stage.task, {
model: stage.model,
...stage.options
})));
return pipelineInstances;
}
return {
get length() {
return stages.length;
},
async run(input) {
const instances = await ensureInitialised();
const stageResults = [];
const stageTimes = [];
let current = input;
const wallStart = performance.now();
for (let i = 0; i < stages.length; i++) {
const stage = stages[i];
const inst = instances[i];
if (stage.transform) {
current = stage.transform(current);
}
const t0 = performance.now();
current = await inst.run(current, stage.runOptions);
stageTimes.push(performance.now() - t0);
stageResults.push(current);
}
return {
output: current,
stages: stageResults,
totalTime: performance.now() - wallStart,
stageTimes
};
},
dispose() {
if (pipelineInstances) {
for (const inst of pipelineInstances) {
if (inst && typeof inst.dispose === "function") {
inst.dispose();
}
}
pipelineInstances = null;
}
}
};
}
function parallel(stages) {
if (stages.length === 0) {
throw new Error("[edgeFlow.js] parallel() requires at least one stage");
}
let pipelineInstances = null;
async function ensureInitialised() {
if (pipelineInstances)
return pipelineInstances;
pipelineInstances = await Promise.all(stages.map((s) => pipeline(s.task, {
model: s.model,
...s.options
})));
return pipelineInstances;
}
return {
async run(input) {
const instances = await ensureInitialised();
const t0 = performance.now();
const outputs = await Promise.all(stages.map((stage, i) => {
const stageInput = stage.transform ? stage.transform(input) : input;
return instances[i].run(stageInput, stage.runOptions);
}));
return { outputs, totalTime: performance.now() - t0 };
},
dispose() {
if (pipelineInstances) {
for (const inst of pipelineInstances) {
if (inst && typeof inst.dispose === "function") {
inst.dispose();
}
}
pipelineInstances = null;
}
}
};
}
// dist/utils/index.js
init_model_loader();
// dist/utils/hub.js
init_model_loader();
init_types();
var DEFAULT_ENDPOINT = "https://huggingface.co";
var DEFAULT_REVISION = "main";
var ONNX_MODEL_FILES = [
"model.onnx",
"model_quantized.onnx",
"model_int8.onnx",
"model_uint8.onnx",
"model_fp16.onnx",
"onnx/model.onnx",
"onnx/model_quantized.onnx"
];
function buildFileUrl(modelId, filename, options = {}) {
const endpoint = options.endpoint ?? DEFAULT_ENDPOINT;
const revision = options.revision ?? DEFAULT_REVISION;
const subfolder = options.subfolder ? `${options.subfolder}/` : "";
return `${endpoint}/${modelId}/resolve/${revision}/${subfolder}${filename}`;
}
async function fetchWithAuth(url, token) {
const headers = {};
if (token) {
headers["Authorization"] = `Bearer ${token}`;
}
const response = await fetch(url, { headers });
return response;
}
async function fileExists(modelId, filename, options = {}) {
const url = buildFileUrl(modelId, filename, options);
try {
const response = await fetchWithAuth(url, options.token);
return response.ok || response.status === 302;
} catch {
return false;
}
}
async function findOnnxModel(modelId, options = {}) {
for (const filename of ONNX_MODEL_FILES) {
if (await fileExists(modelId, filename, options)) {
return filename;
}
}
return null;
}
async function downloadFile(modelId, filename, options = {}) {
const url = buildFileUrl(modelId, filename, options);
return loadModelData(url, {
cache: options.cache ?? true,
forceDownload: options.forceDownload ?? false,
onProgress: options.onProgress ? (progress) => {
options.onProgress({
file: filename,
fileIndex: 1,
totalFiles: 1,
fileProgress: progress,
overallProgress: progress.percent
});
} : void 0
});
}
async function downloadJson(modelId, filename, options = {}) {
const url = buildFileUrl(modelId, filename, options);
if (options.cache !== false && !options.forceDownload) {
const cached = await isModelCached(url);
if (cached) {
const data = await loadModelData(url, { cache: true });
const text = new TextDecoder().decode(data);
return JSON.parse(text);
}
}
const response = await fetchWithAuth(url, options.token);
if (!response.ok) {
throw new EdgeFlowError(`Failed to download ${filename} from ${modelId}: ${response.status}`, ErrorCodes.MODEL_NOT_FOUND);
}
return response.json();
}
async function downloadTokenizer(modelId, options = {}) {
const url = buildFileUrl(modelId, "tokenizer.json", options);
return Tokenizer.fromUrl(url);
}
async function downloadConfig(modelId, options = {}) {
return downloadJson(modelId, "config.json", options);
}
async function downloadModel(modelId, options = {}) {
const files = {};
const totalSteps = 3;
let currentStep = 0;
const reportProgress = (file, progress) => {
if (options.onProgress) {
const baseProgress = currentStep / totalSteps * 100;
const stepProgress = progress.percent / totalSteps;
options.onProgress({
file,
fileIndex: currentStep + 1,
totalFiles: totalSteps,
fileProgress: progress,
overallProgress: baseProgress + stepProgress
});
}
};
console.log(`\u{1F50D} Finding ONNX model in ${modelId}...`);
const modelFile = await findOnnxModel(modelId, options);
if (!modelFile) {
throw new EdgeFlowError(`No ONNX model found in ${modelId}. Please ensure the model has an ONNX file.`, ErrorCodes.MODEL_NOT_FOUND, { modelId, triedFiles: ONNX_MODEL_FILES });
}
files.model = modelFile;
console.log(`\u{1F4E6} Downloading model: ${modelFile}`);
const modelData = await downloadFile(modelId, modelFile, {
...options,
onProgress: (p) => reportProgress(modelFile, p.fileProgress)
});
currentStep = 1;
let tokenizer;
try {
console.log(`\u{1F4DD} Downloading tokenizer...`);
files.tokenizer = "tokenizer.json";
tokenizer = await downloadTokenizer(modelId, options);
console.log(`\u2713 Tokenizer loaded`);
} catch (error) {
console.warn(`\u26A0\uFE0F No tokenizer found for ${modelId}`);
}
currentStep = 2;
let config;
try {
console.log(`\u2699\uFE0F Downloading config...`);
files.config = "config.json";
config = await downloadConfig(modelId, options);
console.log(`\u2713 Config loaded`);
} catch (error) {
console.warn(`\u26A0\uFE0F No config found for ${modelId}`);
}
currentStep = 3;
if (options.onProgress) {
options.onProgress({
file: "complete",
fileIndex: totalSteps,
totalFiles: totalSteps,
fileProgress: { loaded: 1, total: 1, percent: 100, speed: 0, eta: 0 },
overallProgress: 100
});
}
console.log(`\u2705 Model bundle downloaded: ${modelId}`);
return {
modelId,
modelData,
tokenizer,
config,
files
};
}
async function fromHub(modelId, options = {}) {
return downloadModel(modelId, options);
}
async function modelExists(modelId, options = {}) {
try {
const modelFile = await findOnnxModel(modelId, options);
return modelFile !== null;
} catch {
return false;
}
}
async function getModelInfo(modelId, options = {}) {
const [onnxFile, hasTokenizer, config] = await Promise.all([
findOnnxModel(modelId, options),
fileExists(modelId, "tokenizer.json", options),
downloadConfig(modelId, options).catch(() => void 0)
]);
return {
hasOnnx: onnxFile !== null,
onnxFile: onnxFile ?? void 0,
hasTokenizer,
hasConfig: config !== void 0,
config
};
}
var POPULAR_MODELS = {
// Text Classification / Sentiment
"sentiment-analysis": "Xenova/distilbert-base-uncased-finetuned-sst-2-english",
"text-classification": "Xenova/distilbert-base-uncased-finetuned-sst-2-english",
// Feature Extraction
"feature-extraction": "Xenova/all-MiniLM-L6-v2",
"sentence-similarity": "Xenova/all-MiniLM-L6-v2",
// Question Answering
"question-answering": "Xenova/distilbert-base-cased-distilled-squad",
// Token Classification
"ner": "Xenova/bert-base-NER",
"token-classification": "Xenova/bert-base-NER",
// Text Generation
"text-generation": "Xenova/gpt2",
// Translation
"translation-en-fr": "Xenova/t5-small",
"translation-en-de": "Xenova/t5-small",
// Summarization
"summarization": "Xenova/distilbart-cnn-6-6",
// Fill Mask
"fill-mask": "Xenova/bert-base-uncased",
// Image Classification
"image-classification": "Xenova/vit-base-patch16-224",
// Object Detection
"object-detection": "Xenova/detr-resnet-50",
// Image Segmentation
"image-segmentation": "Xenova/segformer-b0-finetuned-ade-512-512",
// Zero-shot Classification
"zero-shot-classification": "Xenova/mobilebert-uncased-mnli",
// Speech Recognition
"automatic-speech-recognition": "Xenova/whisper-tiny.en",
// Text-to-Speech
"text-to-speech": "Xenova/speecht5_tts"
};
function getDefaultModel(task) {
return POPULAR_MODELS[task];
}
async function fromTask(task, options = {}) {
const modelId = getDefaultModel(task);
return downloadModel(modelId, options);
}
// dist/tools/benchmark.js
async function benchmark(fn, options = {}) {
const { warmupRuns = 3, runs = 10, verbose = false, timeout = 3e4, name = "benchmark" } = options;
const times = [];
let failedRuns = 0;
if (verbose)
console.log(`[${name}] Running ${warmupRuns} warmup iterations...`);
for (let i = 0; i < warmupRuns; i++) {
try {
await Promise.race([
Promise.resolve(fn()),
new Promise((_, reject) => setTimeout(() => reject(new Error("Timeout")), timeout))
]);
} catch {
}
}
if (verbose)
console.log(`[${name}] Running ${runs} measured iterations...`);
for (let i = 0; i < runs; i++) {
try {
const start = performance.now();
await Promise.race([
Promise.resolve(fn()),
new Promise((_, reject) => setTimeout(() => reject(new Error("Timeout")), timeout))
]);
const end = performance.now();
times.push(end - start);
if (verbose)
console.log(` Run ${i + 1}: ${(end - start).toFixed(2)}ms`);
} catch (error) {
failedRuns++;
if (verbose)
console.log(` Run ${i + 1}: FAILED - ${error}`);
}
}
if (times.length === 0) {
throw new Error(`All ${runs} runs failed`);
}
const sorted = [...times].sort((a, b) => a - b);
const sum2 = times.reduce((a, b) => a + b, 0);
const avg = sum2 / times.length;
const variance = times.reduce((sum3, t) => sum3 + Math.pow(t - avg, 2), 0) / times.length;
const stdDev = Math.sqrt(variance);
const result = {
name,
avgTime: avg,
medianTime: sorted[Math.floor(sorted.length / 2)] ?? 0,
minTime: sorted[0] ?? 0,
maxTime: sorted[sorted.length - 1] ?? 0,
stdDev,
p95: sorted[Math.floor(sorted.length * 0.95)] ?? sorted[sorted.length - 1] ?? 0,
p99: sorted[Math.floor(sorted.length * 0.99)] ?? sorted[sorted.length - 1] ?? 0,
throughput: 1e3 / avg,
times,
totalRuns: runs,
failedRuns
};
if (verbose) {
console.log(`
[${name}] Results:`);
console.log(` Avg: ${result.avgTime.toFixed(2)}ms`);
console.log(` Median: ${result.medianTime.toFixed(2)}ms`);
console.log(` Min: ${result.minTime.toFixed(2)}ms`);
console.log(` Max: ${result.maxTime.toFixed(2)}ms`);
console.log(` Std Dev: ${result.stdDev.toFixed(2)}ms`);
console.log(` P95: ${result.p95.toFixed(2)}ms`);
console.log(` Throughput: ${result.throughput.toFixed(2)} ops/sec`);
}
return result;
}
async function compareBenchmarks(baseline, comparison, options = {}) {
const baselineResult = await benchmark(baseline, {
...options,
name: options.name ? `${options.name} (baseline)` : "baseline"
});
const comparisonResult = await benchmark(comparison, {
...options,
name: options.name ? `${options.name} (comparison)` : "comparison"
});
const speedup = baselineResult.avgTime / comparisonResult.avgTime;
const percentFaster = (baselineResult.avgTime - comparisonResult.avgTime) / baselineResult.avgTime * 100;
let winner;
if (Math.abs(percentFaster) < 5) {
winner = "tie";
} else if (percentFaster > 0) {
winner = "comparison";
} else {
winner = "baseline";
}
return {
baseline: baselineResult,
comparison: comparisonResult,
speedup,
percentFaster,
winner
};
}
async function benchmarkSuite(suite, options = {}) {
const results = {};
for (const [name, fn] of Object.entries(suite)) {
console.log(`
=== ${name} ===`);
results[name] = await benchmark(fn, { ...options, name, verbose: true });
}
return results;
}
function formatBenchmarkResult(result) {
return `
\u250C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510
\u2502 ${result.name.padEnd(39)} \u2502
\u251C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524
\u2502 Avg Time: ${result.avgTime.toFixed(2).padStart(10)}ms \u2502
\u2502 Median: ${result.medianTime.toFixed(2).padStart(10)}ms \u2502
\u2502 Min Time: ${result.minTime.toFixed(2).padStart(10)}ms \u2502
\u2502 Max Time: ${result.maxTime.toFixed(2).padStart(10)}ms \u2502
\u2502 Std Dev: ${result.stdDev.toFixed(2).padStart(10)}ms \u2502
\u2502 P95: ${result.p95.toFixed(2).padStart(10)}ms \u2502
\u2502 P99: ${result.p99.toFixed(2).padStart(10)}ms \u2502
\u2502 Throughput: ${result.throughput.toFixed(2).padStart(10)} ops/sec \u2502
\u2502 Runs: ${result.totalRuns.toString().padStart(10)} (${result.failedRuns} failed) \u2502
\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518
`.trim();
}
function formatComparisonResult(result) {
const arrow = result.percentFaster > 0 ? "\u2191" : result.percentFaster < 0 ? "\u2193" : "=";
const winnerText = result.winner === "comparison" ? "Comparison is faster!" : result.winner === "baseline" ? "Baseline is faster!" : "Results are similar";
return `
\u250C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510
\u2502 BENCHMARK COMPARISON \u2502
\u251C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524
\u2502 Baseline: ${result.baseline.avgTime.toFixed(2).padStart(10)}ms \u2502
\u2502 Comparison: ${result.comparison.avgTime.toFixed(2).padStart(10)}ms \u2502
\u251C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524
\u2502 Speedup: ${result.speedup.toFixed(2).padStart(10)}x \u2502
\u2502 Difference: ${arrow} ${Math.abs(result.percentFaster).toFixed(1).padStart(8)}% \u2502
\u251C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524
\u2502 Winner: ${winnerText.padEnd(42)} \u2502
\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518
`.trim();
}
async function benchmarkMemory(fn, options = {}) {
const { name = "memory-benchmark", runs = 5 } = options;
const getMemory = () => {
if (typeof performance !== "undefined" && "memory" in performance) {
return performance.memory.usedJSHeapSize;
}
return 0;
};
const memoryReadings = [];
const initialMemory = getMemory();
for (let i = 0; i < runs; i++) {
await fn();
memoryReadings.push(getMemory());
}
const peakMemory = Math.max(...memoryReadings);
const avgMemory = memoryReadings.reduce((a, b) => a + b, 0) / memoryReadings.length;
const memoryDelta = avgMemory - initialMemory;
return {
name,
peakMemory,
avgMemory,
memoryDelta
};
}
// dist/core/index.js
init_types();
init_tensor();
// dist/tools/quantization.js
function calculateQuantParams(data, bits, symmetric, perChannel, channelAxis = 0, shape = []) {
const qmin = symmetric ? -(1 << bits - 1) : 0;
const qmax = symmetric ? (1 << bits - 1) - 1 : (1 << bits) - 1;
if (perChannel && shape.length > 1) {
const numChannels = shape[channelAxis] ?? 1;
const scales = new Float32Array(numChannels);
const zeroPoints = new Int32Array(numChannels);
const channelSize = data.length / numChannels;
let globalMin = Infinity;
let globalMax = -Infinity;
for (let c = 0; c < numChannels; c++) {
let min = Infinity;
let max = -Infinity;
for (let i = 0; i < channelSize; i++) {
const idx = c * channelSize + i;
const val = data[idx] ?? 0;
min = Math.min(min, val);
max = Math.max(max, val);
}
globalMin = Math.min(globalMin, min);
globalMax = Math.max(globalMax, max);
if (symmetric) {
const absMax = Math.max(Math.abs(min), Math.abs(max));
scales[c] = absMax / qmax;
zeroPoints[c] = 0;
} else {
scales[c] = (max - min) / (qmax - qmin);
zeroPoints[c] = Math.round(qmin - min / (scales[c] || 1));
}
if (scales[c] === 0)
scales[c] = 1;
}
return { scale: scales, zeroPoint: zeroPoints, min: globalMin, max: globalMax };
} else {
let min = Infinity;
let max = -Infinity;
for (let i = 0; i < data.length; i++) {
const val = data[i] ?? 0;
min = Math.min(min, val);
max = Math.max(max, val);
}
let scale;
let zeroPoint;
if (symmetric) {
const absMax = Math.max(Math.abs(min), Math.abs(max));
scale = absMax / qmax;
zeroPoint = 0;
} else {
scale = (max - min) / (qmax - qmin);
zeroPoint = Math.round(qmin - min / (scale || 1));
}
if (scale === 0)
scale = 1;
return { scale, zeroPoint, min, max };
}
}
function quantizeToInt8(data, scale, zeroPoint, perChannel, channelSize = data.length) {
const result = new Int8Array(data.length);
if (perChannel && scale instanceof Float32Array) {
const numChannels = scale.length;
for (let c = 0; c < numChannels; c++) {
const s = scale[c] ?? 1;
const zp = zeroPoint[c] ?? 0;
for (let i = 0; i < channelSize; i++) {
const idx = c * channelSize + i;
const val = data[idx] ?? 0;
result[idx] = Math.max(-128, Math.min(127, Math.round(val / s + zp)));
}
}
} else {
const s = scale;
const zp = zeroPoint;
for (let i = 0; i < data.length; i++) {
const val = data[i] ?? 0;
result[i] = Math.max(-128, Math.min(127, Math.round(val / s + zp)));
}
}
return result;
}
function quantizeToUint8(data, scale, zeroPoint, perChannel, channelSize = data.length) {
const result = new Uint8Array(data.length);
if (perChannel && scale instanceof Float32Array) {
const numChannels = scale.length;
for (let c = 0; c < numChannels; c++) {
const s = scale[c] ?? 1;
const zp = zeroPoint[c] ?? 0;
for (let i = 0; i < channelSize; i++) {
const idx = c * channelSize + i;
const val = data[idx] ?? 0;
result[idx] = Math.max(0, Math.min(255, Math.round(val / s + zp)));
}
}
} else {
const s = scale;
const zp = zeroPoint;
for (let i = 0; i < data.length; i++) {
const val = data[i] ?? 0;
result[i] = Math.max(0, Math.min(255, Math.round(val / s + zp)));
}
}
return result;
}
function quantizeToInt4(data, scale, zeroPoint) {
const packedLength = Math.ceil(data.length / 2);
const result = new Uint8Array(packedLength);
for (let i = 0; i < data.length; i += 2) {
const val1 = data[i] ?? 0;
const val2 = data[i + 1] ?? 0;
const q1 = Math.max(0, Math.min(15, Math.round(val1 / scale + zeroPoint + 8)));
const q2 = Math.max(0, Math.min(15, Math.round(val2 / scale + zeroPoint + 8)));
result[i >> 1] = q1 << 4 | q2;
}
return result;
}
function quantizeToFloat16(data) {
const result = new Uint16Array(data.length);
for (let i = 0; i < data.length; i++) {
result[i] = float32ToFloat16(data[i] ?? 0);
}
return result;
}
function float32ToFloat16(value) {
const float32View = new Float32Array(1);
const int32View = new Int32Array(float32View.buffer);
float32View[0] = value;
const f = int32View[0];
const sign = f >> 16 & 32768;
const exponent = (f >> 23 & 255) - 127 + 15;
const mantissa = f & 8388607;
if (exponent <= 0) {
if (exponent < -10) {
return sign;
}
const m = (mantissa | 8388608) >> 1 - exponent;
return sign | m >> 13;
} else if (exponent >= 31) {
return sign | 31744;
}
return sign | exponent << 10 | mantissa >> 13;
}
function dequantizeInt8(data, scale, zeroPoint, perChannel = false, channelSize = data.length) {
const result = new Float32Array(data.length);
if (perChannel && scale instanceof Float32Array) {
const numChannels = scale.length;
for (let c = 0; c < numChannels; c++) {
const s = scale[c] ?? 1;
const zp = zeroPoint[c] ?? 0;
for (let i = 0; i < channelSize; i++) {
const idx = c * channelSize + i;
result[idx] = ((data[idx] ?? 0) - zp) * s;
}
}
} else {
const s = scale;
const zp = zeroPoint;
for (let i = 0; i < data.length; i++) {
result[i] = ((data[i] ?? 0) - zp) * s;
}
}
return result;
}
function dequantizeUint8(data, scale, zeroPoint, perChannel = false, channelSize = data.length) {
const result = new Float32Array(data.length);
if (perChannel && scale instanceof Float32Array) {
const numChannels = scale.length;
for (let c = 0; c < numChannels; c++) {
const s = scale[c] ?? 1;
const zp = zeroPoint[c] ?? 0;
for (let i = 0; i < channelSize; i++) {
const idx = c * channelSize + i;
result[idx] = ((data[idx] ?? 0) - zp) * s;
}
}
} else {
const s = scale;
const zp = zeroPoint;
for (let i = 0; i < data.length; i++) {
result[i] = ((data[i] ?? 0) - zp) * s;
}
}
return result;
}
function float16ToFloat32(value) {
const sign = (value & 32768) >> 15;
const exponent = (value & 31744) >> 10;
const mantissa = value & 1023;
if (exponent === 0) {
if (mantissa === 0) {
return sign === 0 ? 0 : -0;
}
return (sign === 0 ? 1 : -1) * Math.pow(2, -14) * (mantissa / 1024);
} else if (exponent === 31) {
if (mantissa === 0) {
return sign === 0 ? Infinity : -Infinity;
}
return NaN;
}
return (sign === 0 ? 1 : -1) * Math.pow(2, exponent - 15) * (1 + mantissa / 1024);
}
function dequantizeFloat16(data) {
const result = new Float32Array(data.length);
for (let i = 0; i < data.length; i++) {
result[i] = float16ToFloat32(data[i] ?? 0);
}
return result;
}
function parseModelWeights(modelData) {
const weights = [];
const float32Array = new Float32Array(modelData);
weights.push({
name: "model_weights",
data: float32Array,
shape: [float32Array.length],
dtype: "float32"
});
return weights;
}
function serializeQuantizedModel(model) {
const encoder = new TextEncoder();
let totalSize = 20;
for (const weight of model.weights) {
const nameBytes = encoder.encode(weight.name);
const dtypeBytes = encoder.encode(weight.dtype);
const origDtypeBytes = encoder.encode(weight.originalDtype);
totalSize += 4 + nameBytes.length;
totalSize += 4 + weight.shape.length * 4;
totalSize += 4 + dtypeBytes.length;
totalSize += 4 + origDtypeBytes.length;
totalSize += 1;
if (weight.scale !== void 0) {
totalSize += Array.isArray(weight.scale) ? 4 + weight.scale.length * 4 : 4;
}
totalSize += 1;
if (weight.zeroPoint !== void 0) {
totalSize += Array.isArray(weight.zeroPoint) ? 4 + weight.zeroPoint.length * 4 : 4;
}
totalSize += 8 + weight.data.byteLength;
}
const buffer = new ArrayBuffer(totalSize);
const view = new DataView(buffer);
const uint8 = new Uint8Array(buffer);
let offset = 0;
view.setUint32(offset, model.version, true);
offset += 4;
view.setUint32(offset, ["int8", "uint8", "int4", "float16", "dynamic"].indexOf(model.quantizationType), true);
offset += 4;
view.setUint32(offset, model.originalSize & 4294967295, true);
offset += 4;
view.setUint32(offset, model.originalSize / 4294967296 >>> 0, true);
offset += 4;
view.setUint32(offset, model.weights.length, true);
offset += 4;
for (const weight of model.weights) {
const nameBytes = encoder.encode(weight.name);
const dtypeBytes = encoder.encode(weight.dtype);
const origDtypeBytes = encoder.encode(weight.originalDtype);
view.setUint32(offset, nameBytes.length, true);
offset += 4;
uint8.set(nameBytes, offset);
offset += nameBytes.length;
view.setUint32(offset, weight.shape.length, true);
offset += 4;
for (const dim of weight.shape) {
view.setInt32(offset, dim, true);
offset += 4;
}
view.setUint32(offset, dtypeBytes.length, true);
offset += 4;
uint8.set(dtypeBytes, offset);
offset += dtypeBytes.length;
view.setUint32(offset, origDtypeBytes.length, true);
offset += 4;
uint8.set(origDtypeBytes, offset);
offset += origDtypeBytes.length;
if (weight.scale !== void 0) {
view.setUint8(offset, 1);
offset += 1;
if (Array.isArray(weight.scale)) {
view.setUint32(offset, weight.scale.length, true);
offset += 4;
for (const s of weight.scale) {
view.setFloat32(offset, s, true);
offset += 4;
}
} else {
view.setUint32(offset, 1, true);
offset += 4;
view.setFloat32(offset, weight.scale, true);
offset += 4;
}
} else {
view.setUint8(offset, 0);
offset += 1;
}
if (weight.zeroPoint !== void 0) {
view.setUint8(offset, 1);
offset += 1;
if (Array.isArray(weight.zeroPoint)) {
view.setUint32(offset, weight.zeroPoint.length, true);
offset += 4;
for (const zp of weight.zeroPoint) {
view.setInt32(offset, zp, true);
offset += 4;
}
} else {
view.setUint32(offset, 1, true);
offset += 4;
view.setInt32(offset, weight.zeroPoint, true);
offset += 4;
}
} else {
view.setUint8(offset, 0);
offset += 1;
}
const dataLow = weight.data.byteLength & 4294967295;
const dataHigh = weight.data.byteLength / 4294967296 >>> 0;
view.setUint32(offset, dataLow, true);
offset += 4;
view.setUint32(offset, dataHigh, true);
offset += 4;
uint8.set(new Uint8Array(weight.data), offset);
offset += weight.data.byteLength;
}
return buffer;
}
async function quantizeModel(modelData, options) {
const { type, skipPatterns = [], perChannel = false, symmetric = true, onProgress, minTensorSize = 100 } = options;
const originalSize = modelData.byteLength;
const layerStats = [];
let tensorsQuantized = 0;
let tensorsSkipped = 0;
onProgress?.({ stage: "analyzing", current: 0, total: 1, percent: 0 });
const weights = parseModelWeights(modelData);
const quantizedWeights = [];
let totalParams = 0;
let quantizedParams = 0;
const scales = [];
for (let i = 0; i < weights.length; i++) {
const weight = weights[i];
const percent = (i + 1) / weights.length * 100;
onProgress?.({
stage: "quantizing",
current: i + 1,
total: weights.length,
percent,
layerName: weight.name
});
totalParams += weight.data.length;
const shouldSkip = weight.data.length < minTensorSize || skipPatterns.some((pattern) => {
if (typeof pattern === "string") {
return weight.name.includes(pattern);
}
return pattern.test(weight.name);
});
if (shouldSkip) {
tensorsSkipped++;
layerStats.push({
name: weight.name,
originalDtype: weight.dtype,
quantizedDtype: weight.dtype,
originalSize: weight.data.byteLength,
quantizedSize: weight.data.byteLength,
scale: 1,
zeroPoint: 0,
minValue: Math.min(...weight.data),
maxValue: Math.max(...weight.data),
skipped: true,
skipReason: weight.data.length < minTensorSize ? "Tensor too small" : "Matched skip pattern"
});
quantizedWeights.push({
name: weight.name,
data: weight.data.buffer.slice(0),
shape: weight.shape,
dtype: weight.dtype,
originalDtype: weight.dtype
});
continue;
}
const bits = type === "int4" ? 4 : 8;
const params = calculateQuantParams(weight.data, bits, symmetric, perChannel, 0, weight.shape);
let quantizedData2;
let quantizedDtype;
switch (type) {
case "int8":
const int8Data = quantizeToInt8(weight.data, params.scale, params.zeroPoint, perChannel, perChannel ? weight.data.length / (weight.shape[0] ?? 1) : weight.data.length);
quantizedData2 = int8Data.buffer.slice(0);
quantizedDtype = "int8";
break;
case "uint8":
const uint8Data = quantizeToUint8(weight.data, params.scale, params.zeroPoint, perChannel, perChannel ? weight.data.length / (weight.shape[0] ?? 1) : weight.data.length);
quantizedData2 = uint8Data.buffer.slice(0);
quantizedDtype = "uint8";
break;
case "int4":
const int4Data = quantizeToInt4(weight.data, params.scale, params.zeroPoint);
quantizedData2 = int4Data.buffer.slice(0);
quantizedDtype = "int4";
break;
case "float16":
const fp16Data = quantizeToFloat16(weight.data);
quantizedData2 = fp16Data.buffer.slice(0);
quantizedDtype = "float16";
break;
case "dynamic":
default:
const dynData = quantizeToInt8(weight.data, params.scale, params.zeroPoint, perChannel, perChannel ? weight.data.length / (weight.shape[0] ?? 1) : weight.data.length);
quantizedData2 = dynData.buffer.slice(0);
quantizedDtype = "int8";
break;
}
tensorsQuantized++;
quantizedParams += weight.data.length;
const scaleValue = params.scale instanceof Float32Array ? Array.from(params.scale) : params.scale;
const zpValue = params.zeroPoint instanceof Int32Array ? Array.from(params.zeroPoint) : params.zeroPoint;
if (typeof scaleValue === "number") {
scales.push(scaleValue);
} else {
scales.push(...scaleValue);
}
layerStats.push({
name: weight.name,
originalDtype: weight.dtype,
quantizedDtype,
originalSize: weight.data.byteLength,
quantizedSize: quantizedData2.byteLength,
scale: scaleValue,
zeroPoint: zpValue,
minValue: params.min,
maxValue: params.max,
skipped: false
});
quantizedWeights.push({
name: weight.name,
data: quantizedData2,
shape: weight.shape,
dtype: quantizedDtype,
originalDtype: weight.dtype,
scale: scaleValue,
zeroPoint: zpValue
});
}
onProgress?.({ stage: "packing", current: 0, total: 1, percent: 0 });
const quantizedModel = {
version: 1,
quantizationType: type,
originalSize,
weights: quantizedWeights
};
const quantizedData = serializeQuantizedModel(quantizedModel);
onProgress?.({ stage: "complete", current: 1, total: 1, percent: 100 });
const avgScale = scales.length > 0 ? scales.reduce((a, b) => a + b, 0) / scales.length : 1;
const minScale = scales.length > 0 ? Math.min(...scales) : 1;
const maxScale = scales.length > 0 ? Math.max(...scales) : 1;
const bitsReduction = type === "int4" ? 8 : type === "float16" ? 2 : 4;
const errorEstimate = avgScale / bitsReduction;
return {
data: quantizedData,
originalSize,
quantizedSize: quantizedData.byteLength,
compressionRatio: originalSize / quantizedData.byteLength,
tensorsQuantized,
tensorsSkipped,
layerStats,
stats: {
totalParameters: totalParams,
quantizedParameters: quantizedParams,
averageScale: avgScale,
minScale,
maxScale,
errorEstimate
}
};
}
function quantizeTensor(tensor2, type, options = {}) {
const { symmetric = true, perChannel = false } = options;
const data = tensor2.toFloat32Array();
const shape = tensor2.shape;
const bits = type === "int4" ? 4 : 8;
const params = calculateQuantParams(data, bits, symmetric, perChannel, 0, shape);
let quantizedData;
let dtype;
switch (type) {
case "int8":
quantizedData = quantizeToInt8(data, params.scale, params.zeroPoint, perChannel);
dtype = "int32";
break;
case "uint8":
quantizedData = quantizeToUint8(data, params.scale, params.zeroPoint, perChannel);
dtype = "int32";
break;
case "float16":
quantizedData = quantizeToFloat16(data);
dtype = "float32";
break;
default:
quantizedData = quantizeToInt8(data, params.scale, params.zeroPoint, perChannel);
dtype = "int32";
}
const scaleValue = params.scale instanceof Float32Array ? Array.from(params.scale) : params.scale;
const zpValue = params.zeroPoint instanceof Int32Array ? Array.from(params.zeroPoint) : params.zeroPoint;
return {
tensor: new EdgeFlowTensor(Array.from(quantizedData), shape, dtype),
scale: scaleValue,
zeroPoint: zpValue
};
}
function dequantizeTensor(tensor2, scale, zeroPoint, type) {
const data = tensor2.toArray();
const shape = tensor2.shape;
let dequantizedData;
const scaleArr = Array.isArray(scale) ? new Float32Array(scale) : scale;
const zpArr = Array.isArray(zeroPoint) ? new Int32Array(zeroPoint) : zeroPoint;
const perChannel = Array.isArray(scale);
switch (type) {
case "int8":
dequantizedData = dequantizeInt8(new Int8Array(data.map(Number)), scaleArr, zpArr, perChannel);
break;
case "uint8":
dequantizedData = dequantizeUint8(new Uint8Array(data.map(Number)), scaleArr, zpArr, perChannel);
break;
case "float16":
dequantizedData = dequantizeFloat16(new Uint16Array(data.map(Number)));
break;
default:
dequantizedData = dequantizeInt8(new Int8Array(data.map(Number)), scaleArr, zpArr, perChannel);
}
return new EdgeFlowTensor(Array.from(dequantizedData), shape, "float32");
}
function pruneTensor(tensor2, options = {}) {
const { ratio = 0.5, method = "magnitude", threshold } = options;
const data = tensor2.toFloat32Array();
const shape = tensor2.shape;
const mask = new Float32Array(data.length);
const prunedData = new Float32Array(data.length);
let prunedCount = 0;
if (method === "magnitude") {
const absValues = Array.from(data).map(Math.abs).sort((a, b) => a - b);
const thresholdIndex = Math.floor(absValues.length * ratio);
const computedThreshold = threshold ?? (absValues[thresholdIndex] ?? 0);
for (let i = 0; i < data.length; i++) {
if (Math.abs(data[i] ?? 0) > computedThreshold) {
mask[i] = 1;
prunedData[i] = data[i] ?? 0;
} else {
mask[i] = 0;
prunedData[i] = 0;
prunedCount++;
}
}
} else if (method === "random") {
for (let i = 0; i < data.length; i++) {
if (Math.random() > ratio) {
mask[i] = 1;
prunedData[i] = data[i] ?? 0;
} else {
mask[i] = 0;
prunedData[i] = 0;
prunedCount++;
}
}
}
return {
tensor: new EdgeFlowTensor(Array.from(prunedData), shape, "float32"),
mask: new EdgeFlowTensor(Array.from(mask), shape, "float32"),
sparsity: prunedCount / data.length
};
}
async function pruneModel(modelData, options = {}) {
const { onProgress } = options;
onProgress?.({ current: 0, total: 1, percent: 0 });
const weights = parseModelWeights(modelData);
let totalParams = 0;
let prunedParams = 0;
for (const weight of weights) {
totalParams += weight.data.length;
const tensor2 = new EdgeFlowTensor(Array.from(weight.data), weight.shape, "float32");
const { sparsity } = pruneTensor(tensor2, options);
prunedParams += Math.floor(weight.data.length * sparsity);
}
onProgress?.({ current: 1, total: 1, percent: 100 });
return {
data: modelData,
// In a real implementation, we'd create a sparse format
originalSize: modelData.byteLength,
prunedSize: modelData.byteLength,
// Would be smaller with sparse format
sparsity: prunedParams / totalParams,
parametersPruned: prunedParams,
totalParameters: totalParams
};
}
async function analyzeModel(modelData) {
const weights = parseModelWeights(modelData);
const totalSize = modelData.byteLength;
const dtypeBreakdown = {};
let totalParams = 0;
const tensorInfos = [];
for (const weight of weights) {
totalParams += weight.data.length;
const bytesPerElement = weight.dtype === "float32" ? 4 : weight.dtype === "float16" ? 2 : weight.dtype === "int8" ? 1 : 4;
const size = weight.data.length * bytesPerElement;
if (!dtypeBreakdown[weight.dtype]) {
dtypeBreakdown[weight.dtype] = { count: 0, size: 0 };
}
dtypeBreakdown[weight.dtype].count++;
dtypeBreakdown[weight.dtype].size += size;
tensorInfos.push({
name: weight.name,
size,
shape: weight.shape
});
}
tensorInfos.sort((a, b) => b.size - a.size);
const largestTensors = tensorInfos.slice(0, 10);
const estimatedQuantizedSizes = {
int8: Math.ceil(totalSize / 4),
uint8: Math.ceil(totalSize / 4),
int4: Math.ceil(totalSize / 8),
float16: Math.ceil(totalSize / 2),
dynamic: Math.ceil(totalSize / 4)
};
let recommendedQuantization = "dynamic";
if (totalSize > 500 * 1024 * 1024) {
recommendedQuantization = "int4";
} else if (totalSize > 100 * 1024 * 1024) {
recommendedQuantization = "int8";
} else if (totalSize > 50 * 1024 * 1024) {
recommendedQuantization = "float16";
}
return {
totalSize,
tensorCount: weights.length,
totalParameters: totalParams,
dtypeBreakdown,
largestTensors,
estimatedMemory: totalParams * 4,
// Assuming float32 at runtime
recommendedQuantization,
estimatedQuantizedSizes
};
}
async function exportModel(modelData, options) {
const { format, quantize: quantize2 } = options;
let data = modelData;
if (quantize2) {
const result = await quantizeModel(modelData, { type: quantize2 });
data = result.data;
}
switch (format) {
case "edgeflow":
return data;
case "onnx":
return data;
case "tflite":
return data;
default:
return data;
}
}
// dist/tools/debugger.js
function calculateTensorStats(data) {
const arr = data instanceof Float32Array ? data : new Float32Array(data);
let min = Infinity;
let max = -Infinity;
let sum2 = 0;
let zeros2 = 0;
let nans = 0;
let infinities = 0;
for (let i = 0; i < arr.length; i++) {
const val = arr[i] ?? 0;
if (isNaN(val)) {
nans++;
continue;
}
if (!isFinite(val)) {
infinities++;
continue;
}
min = Math.min(min, val);
max = Math.max(max, val);
sum2 += val;
if (val === 0)
zeros2++;
}
const validCount = arr.length - nans - infinities;
const mean2 = validCount > 0 ? sum2 / validCount : 0;
let varianceSum = 0;
for (let i = 0; i < arr.length; i++) {
const val = arr[i] ?? 0;
if (!isNaN(val) && isFinite(val)) {
varianceSum += Math.pow(val - mean2, 2);
}
}
const std = validCount > 0 ? Math.sqrt(varianceSum / validCount) : 0;
return {
min: min === Infinity ? 0 : min,
max: max === -Infinity ? 0 : max,
mean: mean2,
std,
zeros: zeros2,
nans,
infinities,
sparsity: zeros2 / arr.length
};
}
function createHistogram(data, bins = 50) {
const arr = data instanceof Float32Array ? data : new Float32Array(data);
let min = Infinity;
let max = -Infinity;
for (let i = 0; i < arr.length; i++) {
const val = arr[i] ?? 0;
if (!isNaN(val) && isFinite(val)) {
min = Math.min(min, val);
max = Math.max(max, val);
}
}
if (min === Infinity || max === -Infinity || min === max) {
return { bins: [min || 0], counts: [arr.length], binEdges: [min || 0, max || 0] };
}
const binWidth = (max - min) / bins;
const counts = new Array(bins).fill(0);
const binEdges = new Array(bins + 1);
for (let i = 0; i <= bins; i++) {
binEdges[i] = min + i * binWidth;
}
for (let i = 0; i < arr.length; i++) {
const val = arr[i] ?? 0;
if (!isNaN(val) && isFinite(val)) {
const binIndex = Math.min(Math.floor((val - min) / binWidth), bins - 1);
counts[binIndex]++;
}
}
return {
bins: binEdges.slice(0, -1).map((e, i) => (e + binEdges[i + 1]) / 2),
counts,
binEdges
};
}
function inspectTensor(tensor2, name = "tensor", options = {}) {
const { histogram = true, maxSample = 10 } = options;
const data = tensor2.toFloat32Array();
const shape = tensor2.shape;
const size = tensor2.size;
const sampleIndices = [];
const step = Math.max(1, Math.floor(size / maxSample));
for (let i = 0; i < size && sampleIndices.length < maxSample; i += step) {
sampleIndices.push(i);
}
const sample = sampleIndices.map((i) => data[i] ?? 0);
const bytesPerElement = tensor2.dtype === "float32" ? 4 : tensor2.dtype === "int32" ? 4 : tensor2.dtype === "int64" ? 8 : 4;
const memoryBytes = size * bytesPerElement;
return {
name,
shape,
dtype: tensor2.dtype,
size,
memoryBytes,
stats: calculateTensorStats(data),
sample,
histogram: histogram ? createHistogram(data) : void 0
};
}
function formatTensorInspection(inspection) {
const { name, shape, dtype, size, memoryBytes, stats, sample } = inspection;
const lines = [
`\u250C\u2500 Tensor: ${name} \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500`,
`\u2502 Shape: [${shape.join(", ")}]`,
`\u2502 Dtype: ${dtype}`,
`\u2502 Size: ${size.toLocaleString()} elements`,
`\u2502 Memory: ${formatBytes(memoryBytes)}`,
`\u251C\u2500 Statistics \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500`,
`\u2502 Min: ${stats.min.toFixed(6)}`,
`\u2502 Max: ${stats.max.toFixed(6)}`,
`\u2502 Mean: ${stats.mean.toFixed(6)}`,
`\u2502 Std: ${stats.std.toFixed(6)}`,
`\u2502 Sparsity: ${(stats.sparsity * 100).toFixed(2)}%`
];
if (stats.nans > 0) {
lines.push(`\u2502 \u26A0\uFE0F NaN values: ${stats.nans}`);
}
if (stats.infinities > 0) {
lines.push(`\u2502 \u26A0\uFE0F Infinity values: ${stats.infinities}`);
}
lines.push(`\u251C\u2500 Sample Values \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500`);
lines.push(`\u2502 [${sample.map((v) => v.toFixed(4)).join(", ")}]`);
lines.push(`\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500`);
return lines.join("\n");
}
function formatBytes(bytes) {
if (bytes < 1024)
return `${bytes} B`;
if (bytes < 1024 * 1024)
return `${(bytes / 1024).toFixed(2)} KB`;
if (bytes < 1024 * 1024 * 1024)
return `${(bytes / (1024 * 1024)).toFixed(2)} MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
}
var EdgeFlowDebugger = class {
constructor(config = {}) {
__publicField(this, "config");
__publicField(this, "events", []);
__publicField(this, "traces", []);
__publicField(this, "performanceMetrics");
__publicField(this, "listeners", /* @__PURE__ */ new Map());
__publicField(this, "isEnabled", true);
this.config = {
logging: config.logging ?? true,
logLevel: config.logLevel ?? "info",
inspectTensors: config.inspectTensors ?? true,
maxDisplayValues: config.maxDisplayValues ?? 10,
trackPerformance: config.trackPerformance ?? true,
logger: config.logger ?? this.defaultLogger.bind(this)
};
this.performanceMetrics = {
inferenceCount: 0,
totalInferenceTime: 0,
averageInferenceTime: 0,
minInferenceTime: Infinity,
maxInferenceTime: 0,
peakMemoryUsage: 0,
currentMemoryUsage: 0,
tensorAllocations: 0,
tensorDeallocations: 0
};
}
/**
* Default logger
*/
defaultLogger(level, message, data) {
const timestamp = (/* @__PURE__ */ new Date()).toISOString();
const prefix = `[edgeFlow.js ${timestamp}] [${level.toUpperCase()}]`;
switch (level) {
case "debug":
console.debug(prefix, message, data ?? "");
break;
case "info":
console.info(prefix, message, data ?? "");
break;
case "warn":
console.warn(prefix, message, data ?? "");
break;
case "error":
console.error(prefix, message, data ?? "");
break;
default:
console.log(prefix, message, data ?? "");
}
}
/**
* Log a message
*/
log(level, message, data) {
if (!this.isEnabled || !this.config.logging)
return;
const levels = ["debug", "info", "warn", "error"];
const configLevel = levels.indexOf(this.config.logLevel);
const msgLevel = levels.indexOf(level);
if (msgLevel >= configLevel) {
this.config.logger(level, message, data);
}
}
/**
* Add debug event
*/
addEvent(event) {
this.events.push(event);
const listeners = this.listeners.get(event.type) ?? [];
for (const listener of listeners) {
listener(event);
}
if (this.events.length > 1e3) {
this.events = this.events.slice(-1e3);
}
}
/**
* Enable debugger
*/
enable() {
this.isEnabled = true;
this.log("info", "Debugger enabled");
}
/**
* Disable debugger
*/
disable() {
this.isEnabled = false;
}
/**
* Subscribe to events
*/
on(type, callback) {
const listeners = this.listeners.get(type) ?? [];
listeners.push(callback);
this.listeners.set(type, listeners);
return () => {
const idx = listeners.indexOf(callback);
if (idx !== -1)
listeners.splice(idx, 1);
};
}
/**
* Inspect and log a tensor
*/
inspectTensor(tensor2, name = "tensor") {
const inspection = inspectTensor(tensor2, name, {
histogram: true,
maxSample: this.config.maxDisplayValues
});
if (this.config.inspectTensors) {
this.log("debug", `Tensor: ${name}`, inspection);
this.addEvent({
type: "tensor",
timestamp: Date.now(),
message: `Inspected tensor: ${name}`,
data: inspection
});
if (inspection.stats.nans > 0) {
this.log("warn", `Tensor "${name}" contains ${inspection.stats.nans} NaN values`);
}
if (inspection.stats.infinities > 0) {
this.log("warn", `Tensor "${name}" contains ${inspection.stats.infinities} Infinity values`);
}
}
return inspection;
}
/**
* Start tracing an inference
*/
startTrace(modelId) {
const id = `trace_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
const trace = {
id,
modelId,
timestamp: Date.now(),
inputs: [],
outputs: [],
duration: 0,
memoryUsed: 0,
operations: []
};
this.traces.push(trace);
this.log("debug", `Started trace: ${id} for model: ${modelId}`);
return id;
}
/**
* Add input to trace
*/
traceInput(traceId, tensor2, name) {
const trace = this.traces.find((t) => t.id === traceId);
if (!trace)
return;
trace.inputs.push(inspectTensor(tensor2, name));
}
/**
* Add output to trace
*/
traceOutput(traceId, tensor2, name) {
const trace = this.traces.find((t) => t.id === traceId);
if (!trace)
return;
trace.outputs.push(inspectTensor(tensor2, name));
}
/**
* Add operation to trace
*/
traceOperation(traceId, operation) {
const trace = this.traces.find((t) => t.id === traceId);
if (!trace)
return;
trace.operations.push(operation);
}
/**
* End trace
*/
endTrace(traceId) {
const trace = this.traces.find((t) => t.id === traceId);
if (!trace)
return;
trace.duration = Date.now() - trace.timestamp;
this.performanceMetrics.inferenceCount++;
this.performanceMetrics.totalInferenceTime += trace.duration;
this.performanceMetrics.averageInferenceTime = this.performanceMetrics.totalInferenceTime / this.performanceMetrics.inferenceCount;
this.performanceMetrics.minInferenceTime = Math.min(this.performanceMetrics.minInferenceTime, trace.duration);
this.performanceMetrics.maxInferenceTime = Math.max(this.performanceMetrics.maxInferenceTime, trace.duration);
this.log("info", `Trace completed: ${traceId}`, {
duration: `${trace.duration}ms`,
inputs: trace.inputs.length,
outputs: trace.outputs.length,
operations: trace.operations.length
});
this.addEvent({
type: "inference",
timestamp: Date.now(),
message: `Inference completed in ${trace.duration}ms`,
data: trace
});
return trace;
}
/**
* Record tensor allocation
*/
recordAllocation(tensor2) {
if (!this.config.trackPerformance)
return;
this.performanceMetrics.tensorAllocations++;
const memory = tensor2.size * 4;
this.performanceMetrics.currentMemoryUsage += memory;
this.performanceMetrics.peakMemoryUsage = Math.max(this.performanceMetrics.peakMemoryUsage, this.performanceMetrics.currentMemoryUsage);
}
/**
* Record tensor deallocation
*/
recordDeallocation(tensor2) {
if (!this.config.trackPerformance)
return;
this.performanceMetrics.tensorDeallocations++;
const memory = tensor2.size * 4;
this.performanceMetrics.currentMemoryUsage -= memory;
}
/**
* Get performance metrics
*/
getPerformanceMetrics() {
return { ...this.performanceMetrics };
}
/**
* Get all events
*/
getEvents() {
return [...this.events];
}
/**
* Get all traces
*/
getTraces() {
return [...this.traces];
}
/**
* Get trace by ID
*/
getTrace(traceId) {
return this.traces.find((t) => t.id === traceId);
}
/**
* Clear all data
*/
clear() {
this.events = [];
this.traces = [];
this.performanceMetrics = {
inferenceCount: 0,
totalInferenceTime: 0,
averageInferenceTime: 0,
minInferenceTime: Infinity,
maxInferenceTime: 0,
peakMemoryUsage: 0,
currentMemoryUsage: 0,
tensorAllocations: 0,
tensorDeallocations: 0
};
}
/**
* Export debug data
*/
export() {
return {
events: this.getEvents(),
traces: this.getTraces(),
metrics: this.getPerformanceMetrics(),
timestamp: Date.now()
};
}
/**
* Generate summary report
*/
generateReport() {
const metrics = this.getPerformanceMetrics();
const traces = this.getTraces();
const lines = [
"\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557",
"\u2551 edgeFlow.js Debug Report \u2551",
"\u2560\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2563",
"\u2551 Performance Metrics \u2551",
"\u255F\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2562",
`\u2551 Total Inferences: ${metrics.inferenceCount.toString().padStart(10)} \u2551`,
`\u2551 Average Time: ${metrics.averageInferenceTime.toFixed(2).padStart(10)}ms \u2551`,
`\u2551 Min Time: ${(metrics.minInferenceTime === Infinity ? 0 : metrics.minInferenceTime).toFixed(2).padStart(10)}ms \u2551`,
`\u2551 Max Time: ${metrics.maxInferenceTime.toFixed(2).padStart(10)}ms \u2551`,
`\u2551 Peak Memory: ${formatBytes(metrics.peakMemoryUsage).padStart(10)} \u2551`,
`\u2551 Current Memory: ${formatBytes(metrics.currentMemoryUsage).padStart(10)} \u2551`,
`\u2551 Tensor Allocations: ${metrics.tensorAllocations.toString().padStart(10)} \u2551`,
`\u2551 Tensor Deallocations: ${metrics.tensorDeallocations.toString().padStart(10)} \u2551`,
"\u255F\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2562",
"\u2551 Recent Traces \u2551",
"\u255F\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2562"
];
const recentTraces = traces.slice(-5);
for (const trace of recentTraces) {
lines.push(`\u2551 ${trace.id.slice(0, 20).padEnd(20)} | ${trace.duration.toFixed(2).padStart(8)}ms | ${trace.modelId.slice(0, 20).padEnd(20)} \u2551`);
}
if (recentTraces.length === 0) {
lines.push("\u2551 No traces recorded \u2551");
}
lines.push("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
return lines.join("\n");
}
};
var globalDebugger = null;
function getDebugger(config) {
if (!globalDebugger || config) {
globalDebugger = new EdgeFlowDebugger(config);
}
return globalDebugger;
}
function enableDebugging(config) {
const debugger_ = getDebugger(config);
debugger_.enable();
return debugger_;
}
function disableDebugging() {
globalDebugger?.disable();
}
function createAsciiHistogram(histogram, width = 50, height = 10) {
const { counts, binEdges } = histogram;
const maxCount = Math.max(...counts);
if (maxCount === 0)
return "No data to display";
const lines = [];
const scaled = counts.map((c) => Math.round(c / maxCount * height));
for (let row = height; row > 0; row--) {
let line = row === height ? `${maxCount.toString().padStart(6)} \u2502` : " \u2502";
for (let col = 0; col < width && col < scaled.length; col++) {
line += (scaled[col] ?? 0) >= row ? "\u2588" : " ";
}
lines.push(line);
}
lines.push(" \u2514" + "\u2500".repeat(Math.min(width, scaled.length)));
const minLabel = (binEdges[0] ?? 0).toFixed(2);
const maxLabel = (binEdges[binEdges.length - 1] ?? 0).toFixed(2);
lines.push(` ${minLabel}${" ".repeat(Math.max(0, Math.min(width, scaled.length) - minLabel.length - maxLabel.length))}${maxLabel}`);
return lines.join("\n");
}
function createTensorHeatmap(tensor2, width = 40) {
const shape = tensor2.shape;
if (shape.length !== 2) {
return "Heatmap only supports 2D tensors";
}
const [rows, cols] = shape;
if (rows === void 0 || cols === void 0) {
return "Invalid tensor shape";
}
const data = tensor2.toFloat32Array();
let min = Infinity;
let max = -Infinity;
for (let i = 0; i < data.length; i++) {
const val = data[i] ?? 0;
if (!isNaN(val) && isFinite(val)) {
min = Math.min(min, val);
max = Math.max(max, val);
}
}
const range = max - min;
const chars = [" ", "\u2591", "\u2592", "\u2593", "\u2588"];
const lines = [];
const scaleX = Math.max(1, Math.ceil(cols / width));
const displayCols = Math.min(cols, width);
for (let r = 0; r < rows; r++) {
let line = "";
for (let c = 0; c < displayCols; c++) {
const idx = r * cols + c * scaleX;
const val = data[idx] ?? 0;
const normalized = range > 0 ? (val - min) / range : 0;
const charIdx = Math.floor(normalized * (chars.length - 1));
line += chars[charIdx];
}
lines.push(line);
}
return lines.join("\n");
}
function visualizeModelArchitecture(layers) {
const lines = [];
lines.push("\u250C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510");
lines.push("\u2502 Model Architecture \u2502");
lines.push("\u251C\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2524");
for (let i = 0; i < layers.length; i++) {
const layer = layers[i];
const inputStr = `[${layer.inputShape.join("\xD7")}]`;
const outputStr = `[${layer.outputShape.join("\xD7")}]`;
lines.push(`\u2502 ${(i + 1).toString().padStart(2)}. ${layer.name.padEnd(20)} \u2502 ${layer.type.padEnd(15)} \u2502`);
lines.push(`\u2502 ${inputStr.padEnd(15)} \u2192 ${outputStr.padEnd(15)} \u2502`);
if (i < layers.length - 1) {
lines.push("\u2502 \u2193 \u2502");
}
}
lines.push("\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518");
return lines.join("\n");
}
// dist/tools/monitor.js
var PerformanceMonitor = class {
constructor(config = {}) {
__publicField(this, "config");
__publicField(this, "samples", []);
__publicField(this, "isRunning", false);
__publicField(this, "intervalId", null);
__publicField(this, "alerts", []);
__publicField(this, "alertListeners", []);
__publicField(this, "sampleListeners", []);
// Inference tracking
__publicField(this, "inferenceCount", 0);
__publicField(this, "inferenceTimes", []);
__publicField(this, "queueLength", 0);
__publicField(this, "activeCount", 0);
// FPS tracking
__publicField(this, "frameCount", 0);
__publicField(this, "lastFrameTime", 0);
__publicField(this, "fps", 0);
__publicField(this, "rafId", null);
// Memory tracking
__publicField(this, "tensorMemory", 0);
__publicField(this, "cacheMemory", 0);
this.config = {
enabled: config.enabled ?? true,
sampleInterval: config.sampleInterval ?? 1e3,
historySize: config.historySize ?? 60,
monitorMemory: config.monitorMemory ?? true,
monitorFPS: config.monitorFPS ?? true,
collectors: config.collectors ?? []
};
}
/**
* Start monitoring
*/
start() {
if (this.isRunning)
return;
this.isRunning = true;
this.intervalId = setInterval(() => {
this.collectSample();
}, this.config.sampleInterval);
if (this.config.monitorFPS && typeof requestAnimationFrame !== "undefined") {
this.lastFrameTime = performance.now();
this.frameCount = 0;
this.monitorFPS();
}
}
/**
* Stop monitoring
*/
stop() {
this.isRunning = false;
if (this.intervalId) {
clearInterval(this.intervalId);
this.intervalId = null;
}
if (this.rafId) {
cancelAnimationFrame(this.rafId);
this.rafId = null;
}
}
/**
* Monitor FPS
*/
monitorFPS() {
if (!this.isRunning)
return;
this.frameCount++;
const now = performance.now();
const elapsed = now - this.lastFrameTime;
if (elapsed >= 1e3) {
this.fps = Math.round(this.frameCount * 1e3 / elapsed);
this.frameCount = 0;
this.lastFrameTime = now;
}
this.rafId = requestAnimationFrame(() => this.monitorFPS());
}
/**
* Collect a performance sample
*/
collectSample() {
const now = Date.now();
const avgTime = this.inferenceTimes.length > 0 ? this.inferenceTimes.reduce((a, b) => a + b, 0) / this.inferenceTimes.length : 0;
const minTime = this.inferenceTimes.length > 0 ? Math.min(...this.inferenceTimes) : 0;
const maxTime = this.inferenceTimes.length > 0 ? Math.max(...this.inferenceTimes) : 0;
const throughput = this.inferenceCount / (this.config.sampleInterval / 1e3);
const inference = {
count: this.inferenceCount,
avgTime,
minTime,
maxTime,
throughput,
queueLength: this.queueLength,
activeCount: this.activeCount
};
const memory = this.collectMemoryMetrics();
const system = this.collectSystemMetrics();
const custom = {};
for (const collector of this.config.collectors) {
try {
Object.assign(custom, collector());
} catch {
}
}
const sample = {
timestamp: now,
inference,
memory,
system,
custom
};
this.samples.push(sample);
if (this.samples.length > this.config.historySize) {
this.samples.shift();
}
this.checkAlerts(sample);
for (const listener of this.sampleListeners) {
listener(sample);
}
this.inferenceCount = 0;
this.inferenceTimes = [];
}
/**
* Collect memory metrics
*/
collectMemoryMetrics() {
let usedHeap = 0;
let totalHeap = 0;
let heapLimit = 0;
if (typeof performance !== "undefined" && "memory" in performance) {
const memory = performance.memory;
usedHeap = memory.usedJSHeapSize;
totalHeap = memory.totalJSHeapSize;
heapLimit = memory.jsHeapSizeLimit;
}
return {
usedHeap,
totalHeap,
heapLimit,
heapUsage: heapLimit > 0 ? usedHeap / heapLimit : 0,
tensorMemory: this.tensorMemory,
cacheMemory: this.cacheMemory
};
}
/**
* Collect system metrics
*/
collectSystemMetrics() {
const lastSample = this.samples[this.samples.length - 1];
const deltaTime = lastSample ? Date.now() - lastSample.timestamp : this.config.sampleInterval;
let webgpuAvailable = false;
if (typeof navigator !== "undefined" && "gpu" in navigator) {
webgpuAvailable = true;
}
let webnnAvailable = false;
if (typeof navigator !== "undefined" && "ml" in navigator) {
webnnAvailable = true;
}
return {
fps: this.fps,
cpuUsage: this.estimateCPUUsage(),
deltaTime,
userAgent: typeof navigator !== "undefined" ? navigator.userAgent : "unknown",
webgpuAvailable,
webnnAvailable
};
}
/**
* Estimate CPU usage based on inference times
*/
estimateCPUUsage() {
if (this.inferenceTimes.length === 0)
return 0;
const totalTime = this.inferenceTimes.reduce((a, b) => a + b, 0);
return Math.min(1, totalTime / this.config.sampleInterval);
}
/**
* Check alerts
*/
checkAlerts(sample) {
for (const alert of this.alerts) {
const value = this.getMetricValue(sample, alert.metric);
if (value === void 0)
continue;
let triggered = false;
switch (alert.operator) {
case ">":
triggered = value > alert.threshold;
break;
case "<":
triggered = value < alert.threshold;
break;
case ">=":
triggered = value >= alert.threshold;
break;
case "<=":
triggered = value <= alert.threshold;
break;
case "==":
triggered = value === alert.threshold;
break;
case "!=":
triggered = value !== alert.threshold;
break;
}
if (triggered) {
const event = {
config: alert,
value,
timestamp: sample.timestamp
};
for (const listener of this.alertListeners) {
listener(event);
}
}
}
}
/**
* Get metric value from sample
*/
getMetricValue(sample, metric) {
const parts = metric.split(".");
let value = sample;
for (const part of parts) {
if (value && typeof value === "object" && part in value) {
value = value[part];
} else {
return void 0;
}
}
return typeof value === "number" ? value : void 0;
}
/**
* Record an inference
*/
recordInference(duration) {
this.inferenceCount++;
this.inferenceTimes.push(duration);
}
/**
* Update queue length
*/
updateQueueLength(length) {
this.queueLength = length;
}
/**
* Update active count
*/
updateActiveCount(count) {
this.activeCount = count;
}
/**
* Update tensor memory
*/
updateTensorMemory(bytes) {
this.tensorMemory = bytes;
}
/**
* Update cache memory
*/
updateCacheMemory(bytes) {
this.cacheMemory = bytes;
}
/**
* Add an alert
*/
addAlert(config) {
this.alerts.push(config);
}
/**
* Remove an alert
*/
removeAlert(metric) {
this.alerts = this.alerts.filter((a) => a.metric !== metric);
}
/**
* Subscribe to alerts
*/
onAlert(callback) {
this.alertListeners.push(callback);
return () => {
const idx = this.alertListeners.indexOf(callback);
if (idx !== -1)
this.alertListeners.splice(idx, 1);
};
}
/**
* Subscribe to samples
*/
onSample(callback) {
this.sampleListeners.push(callback);
return () => {
const idx = this.sampleListeners.indexOf(callback);
if (idx !== -1)
this.sampleListeners.splice(idx, 1);
};
}
/**
* Get current sample
*/
getCurrentSample() {
return this.samples[this.samples.length - 1];
}
/**
* Get all samples
*/
getSamples() {
return [...this.samples];
}
/**
* Get samples in time range
*/
getSamplesInRange(startTime, endTime) {
return this.samples.filter((s) => s.timestamp >= startTime && s.timestamp <= endTime);
}
/**
* Get summary statistics
*/
getSummary() {
if (this.samples.length === 0) {
return {
avgInferenceTime: 0,
avgThroughput: 0,
avgMemoryUsage: 0,
avgFPS: 0,
totalInferences: 0,
uptime: 0
};
}
const avgInferenceTime = this.samples.reduce((sum2, s) => sum2 + s.inference.avgTime, 0) / this.samples.length;
const avgThroughput = this.samples.reduce((sum2, s) => sum2 + s.inference.throughput, 0) / this.samples.length;
const avgMemoryUsage = this.samples.reduce((sum2, s) => sum2 + s.memory.heapUsage, 0) / this.samples.length;
const avgFPS = this.samples.reduce((sum2, s) => sum2 + s.system.fps, 0) / this.samples.length;
const totalInferences = this.samples.reduce((sum2, s) => sum2 + s.inference.count, 0);
const firstSample = this.samples[0];
const lastSample = this.samples[this.samples.length - 1];
const uptime = lastSample.timestamp - firstSample.timestamp;
return {
avgInferenceTime,
avgThroughput,
avgMemoryUsage,
avgFPS,
totalInferences,
uptime
};
}
/**
* Clear all data
*/
clear() {
this.samples = [];
this.inferenceCount = 0;
this.inferenceTimes = [];
this.queueLength = 0;
this.activeCount = 0;
this.tensorMemory = 0;
this.cacheMemory = 0;
}
/**
* Export data
*/
export() {
return {
samples: this.getSamples(),
summary: this.getSummary(),
config: this.config,
timestamp: Date.now()
};
}
};
function generateDashboardHTML(monitor) {
const summary = monitor.getSummary();
const samples = monitor.getSamples();
const lastSample = samples[samples.length - 1];
const formatBytes2 = (bytes) => {
if (bytes < 1024)
return `${bytes} B`;
if (bytes < 1024 * 1024)
return `${(bytes / 1024).toFixed(1)} KB`;
if (bytes < 1024 * 1024 * 1024)
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
};
const formatDuration = (ms) => {
if (ms < 1e3)
return `${ms.toFixed(0)}ms`;
if (ms < 6e4)
return `${(ms / 1e3).toFixed(1)}s`;
return `${(ms / 6e4).toFixed(1)}m`;
};
return `
edgeFlow.js Performance Dashboard
${summary.totalInferences.toLocaleString()}
${summary.avgInferenceTime.toFixed(1)}ms
${summary.avgThroughput.toFixed(1)}ops/s
${Math.round(summary.avgFPS)}
${formatBytes2(lastSample?.memory.usedHeap ?? 0)}
0.6 ? "yellow" : "green"}"
style="width: ${(summary.avgMemoryUsage * 100).toFixed(0)}%">
${formatBytes2(lastSample?.memory.tensorMemory ?? 0)}
${formatBytes2(lastSample?.memory.cacheMemory ?? 0)}
${lastSample?.inference.queueLength ?? 0}
${generateChartPath(samples)}
Time
Inferences
Avg Time
Throughput
Memory
FPS
${samples.slice(-10).reverse().map((s) => `
${new Date(s.timestamp).toLocaleTimeString()}
${s.inference.count}
${s.inference.avgTime.toFixed(2)}ms
${s.inference.throughput.toFixed(1)}/s
${formatBytes2(s.memory.usedHeap)}
${s.system.fps}
`).join("")}
Generated at ${(/* @__PURE__ */ new Date()).toLocaleString()} | edgeFlow.js Performance Monitor
`.trim();
}
function generateChartPath(samples) {
if (samples.length < 2)
return "";
const width = 600;
const height = 180;
const padding = 10;
const times = samples.map((s) => s.inference.avgTime);
const maxTime = Math.max(...times, 1);
const points = samples.map((s, i) => {
const x = padding + i / (samples.length - 1) * (width - 2 * padding);
const y = height - padding - s.inference.avgTime / maxTime * (height - 2 * padding);
return `${x},${y}`;
});
const linePath = `M ${points.join(" L ")}`;
const areaPath = `M ${padding},${height - padding} L ${points.join(" L ")} L ${width - padding},${height - padding} Z`;
const gridLines = [];
for (let i = 0; i <= 4; i++) {
const y = padding + i / 4 * (height - 2 * padding);
gridLines.push(` `);
}
return `
${gridLines.join("\n")}
`;
}
function generateAsciiDashboard(monitor) {
const summary = monitor.getSummary();
const samples = monitor.getSamples();
const lastSample = samples[samples.length - 1];
const formatBytes2 = (bytes) => {
if (bytes < 1024)
return `${bytes} B`;
if (bytes < 1024 * 1024)
return `${(bytes / 1024).toFixed(1)} KB`;
if (bytes < 1024 * 1024 * 1024)
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
};
const bar = (value, max, width = 20) => {
const filled = Math.round(value / max * width);
return "\u2588".repeat(filled) + "\u2591".repeat(width - filled);
};
const lines = [
"\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557",
"\u2551 edgeFlow.js Performance Monitor Dashboard \u2551",
"\u2560\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2563",
"\u2551 \u2551",
`\u2551 Total Inferences: ${summary.totalInferences.toString().padStart(10)} \u2551`,
`\u2551 Avg Inference: ${summary.avgInferenceTime.toFixed(2).padStart(10)}ms \u2551`,
`\u2551 Throughput: ${summary.avgThroughput.toFixed(2).padStart(10)} ops/s \u2551`,
`\u2551 Avg FPS: ${Math.round(summary.avgFPS).toString().padStart(10)} \u2551`,
"\u2551 \u2551",
"\u255F\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2562",
"\u2551 Memory Usage \u2551",
`\u2551 Heap: ${bar(summary.avgMemoryUsage, 1)} ${(summary.avgMemoryUsage * 100).toFixed(0).padStart(3)}% \u2551`,
`\u2551 Used: ${formatBytes2(lastSample?.memory.usedHeap ?? 0).padStart(10)} \u2551`,
`\u2551 Tensor: ${formatBytes2(lastSample?.memory.tensorMemory ?? 0).padStart(10)} \u2551`,
`\u2551 Cache: ${formatBytes2(lastSample?.memory.cacheMemory ?? 0).padStart(10)} \u2551`,
"\u2551 \u2551",
"\u255F\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2562",
"\u2551 Inference Time History (last 30 samples) \u2551",
"\u2551 \u2551"
];
const recentSamples = samples.slice(-30);
if (recentSamples.length > 0) {
const times = recentSamples.map((s) => s.inference.avgTime);
const maxTime = Math.max(...times, 1);
const chartHeight = 5;
for (let row = chartHeight; row > 0; row--) {
let line = "\u2551 ";
for (const time of times) {
const height = Math.ceil(time / maxTime * chartHeight);
line += height >= row ? "\u2593" : " ";
}
lines.push(line.padEnd(76) + "\u2551");
}
lines.push("\u2551 " + "\u2500".repeat(30) + " \u2551");
}
lines.push("\u2551 \u2551");
lines.push(`\u2551 Last updated: ${(/* @__PURE__ */ new Date()).toLocaleString().padEnd(40)} \u2551`);
lines.push("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
return lines.join("\n");
}
var globalMonitor = null;
function getMonitor(config) {
if (!globalMonitor || config) {
globalMonitor = new PerformanceMonitor(config);
}
return globalMonitor;
}
function startMonitoring(config) {
const monitor = getMonitor(config);
monitor.start();
return monitor;
}
function stopMonitoring() {
globalMonitor?.stop();
}
// dist/tools/index.js
async function quantize(model, options) {
const modelData = model instanceof ArrayBuffer ? model : await getModelData(model);
const originalSize = modelData.byteLength;
let quantizedData;
let layersQuantized = 0;
let layersSkipped = 0;
switch (options.method) {
case "int8":
({ data: quantizedData, layersQuantized, layersSkipped } = quantizeInt8(modelData, options));
break;
case "uint8":
({ data: quantizedData, layersQuantized, layersSkipped } = quantizeUint8(modelData, options));
break;
case "float16":
({ data: quantizedData, layersQuantized, layersSkipped } = quantizeFloat16(modelData, options));
break;
case "int4":
({ data: quantizedData, layersQuantized, layersSkipped } = quantizeInt4(modelData, options));
break;
default:
quantizedData = modelData;
}
return {
modelData: quantizedData,
originalSize,
quantizedSize: quantizedData.byteLength,
compressionRatio: originalSize / quantizedData.byteLength,
stats: {
layersQuantized,
layersSkipped
}
};
}
async function getModelData(_model) {
return new ArrayBuffer(0);
}
function quantizeInt8(data, _options) {
const input = new Float32Array(data);
const output = new Int8Array(input.length);
let max = 0;
for (let i = 0; i < input.length; i++) {
const abs = Math.abs(input[i] ?? 0);
if (abs > max)
max = abs;
}
const scale = max / 127;
for (let i = 0; i < input.length; i++) {
output[i] = Math.round((input[i] ?? 0) / scale);
}
return {
data: output.buffer,
layersQuantized: 1,
layersSkipped: 0
};
}
function quantizeUint8(data, _options) {
const input = new Float32Array(data);
const output = new Uint8Array(input.length);
let min = Infinity, max = -Infinity;
for (let i = 0; i < input.length; i++) {
const val = input[i] ?? 0;
if (val < min)
min = val;
if (val > max)
max = val;
}
const scale = (max - min) / 255;
for (let i = 0; i < input.length; i++) {
output[i] = Math.round(((input[i] ?? 0) - min) / scale);
}
return {
data: output.buffer,
layersQuantized: 1,
layersSkipped: 0
};
}
function quantizeFloat16(data, _options) {
const input = new Float32Array(data);
const output = new Uint16Array(input.length);
for (let i = 0; i < input.length; i++) {
output[i] = float32ToFloat162(input[i] ?? 0);
}
return {
data: output.buffer,
layersQuantized: 1,
layersSkipped: 0
};
}
function quantizeInt4(data, _options) {
const input = new Float32Array(data);
const output = new Uint8Array(Math.ceil(input.length / 2));
let max = 0;
for (let i = 0; i < input.length; i++) {
const abs = Math.abs(input[i] ?? 0);
if (abs > max)
max = abs;
}
const scale = max / 7;
for (let i = 0; i < input.length; i += 2) {
const val1 = Math.round((input[i] ?? 0) / scale) + 8;
const val2 = Math.round((input[i + 1] ?? 0) / scale) + 8;
output[i / 2] = (val1 & 15) << 4 | val2 & 15;
}
return {
data: output.buffer,
layersQuantized: 1,
layersSkipped: 0
};
}
function float32ToFloat162(value) {
const floatView = new Float32Array(1);
const int32View = new Int32Array(floatView.buffer);
floatView[0] = value;
const x = int32View[0] ?? 0;
let bits = x >> 16 & 32768;
let m = x >> 12 & 2047;
const e = x >> 23 & 255;
if (e < 103) {
return bits;
}
if (e > 142) {
bits |= 31744;
bits |= (e === 255 ? 0 : 1) && x & 8388607;
return bits;
}
if (e < 113) {
m |= 2048;
bits |= (m >> 114 - e) + (m >> 113 - e & 1);
return bits;
}
bits |= e - 112 << 10 | m >> 1;
bits += m & 1;
return bits;
}
async function prune(model, options) {
const modelData = model instanceof ArrayBuffer ? model : await getModelData(model);
const weights = new Float32Array(modelData);
const total = weights.length;
const magnitudes = weights.map(Math.abs);
const sorted = [...magnitudes].sort((a, b) => a - b);
const thresholdIdx = Math.floor(options.sparsity * sorted.length);
const threshold = sorted[thresholdIdx] ?? 0;
let pruned = 0;
for (let i = 0; i < weights.length; i++) {
if (Math.abs(weights[i] ?? 0) < threshold) {
weights[i] = 0;
pruned++;
}
}
return {
modelData: weights.buffer,
actualSparsity: pruned / total,
parametersPruned: pruned,
totalParameters: total
};
}
async function analyzeModel2(model) {
const size = model instanceof ArrayBuffer ? model.byteLength : model.metadata.sizeBytes;
const estimatedParams = Math.floor(size / 4);
return {
totalParameters: estimatedParams,
sizeBytes: size,
layers: [],
estimatedFlops: estimatedParams * 2,
// Rough estimate
memoryRequirements: {
weights: size,
activations: size * 0.1,
// Rough estimate
total: size * 1.1
}
};
}
async function benchmark2(runFn, options = {}) {
const { warmupRuns = 3, runs = 10 } = options;
for (let i = 0; i < warmupRuns; i++) {
await runFn();
}
const times = [];
for (let i = 0; i < runs; i++) {
const start = performance.now();
await runFn();
times.push(performance.now() - start);
}
const sum2 = times.reduce((a, b) => a + b, 0);
const avgTime = sum2 / times.length;
const minTime = Math.min(...times);
const maxTime = Math.max(...times);
const squaredDiffs = times.map((t) => Math.pow(t - avgTime, 2));
const avgSquaredDiff = squaredDiffs.reduce((a, b) => a + b, 0) / times.length;
const stdDev = Math.sqrt(avgSquaredDiff);
return {
avgTime,
minTime,
maxTime,
stdDev,
throughput: 1e3 / avgTime,
times
};
}
async function exportModel2(model, format) {
const modelData = model instanceof ArrayBuffer ? model : await getModelData(model);
switch (format) {
case "json":
const array = new Float32Array(modelData);
return JSON.stringify(Array.from(array));
case "binary":
case "onnx":
default:
return modelData;
}
}
// dist/index.js
async function isSupported() {
const runtimes = await getAvailableRuntimes();
return Array.from(runtimes.values()).some((v) => v);
}
async function getBestRuntimeType() {
const runtimes = await getAvailableRuntimes();
if (runtimes.get("webgpu"))
return "webgpu";
if (runtimes.get("webnn"))
return "webnn";
if (runtimes.get("wasm"))
return "wasm";
return null;
}
async function preload(models) {
const cache = new ModelDownloadCache();
await Promise.all(models.map(async (url) => {
if (!await cache.get(url)) {
const response = await fetch(url);
if (response.ok) {
await cache.put(url, response);
}
}
}));
}
var VERSION = "0.1.0";
async function getInfo() {
const runtimes = await getAvailableRuntimes();
return {
version: VERSION,
runtimes: {
webgpu: runtimes.get("webgpu") ?? false,
webnn: runtimes.get("webnn") ?? false,
wasm: runtimes.get("wasm") ?? false,
auto: true
},
features: [
"concurrent-execution",
"batch-processing",
"memory-management",
"model-caching",
"quantization"
]
};
}
export {
AudioPreprocessor,
BasePipeline,
Cache,
EMOTION_LABELS,
EdgeFlowDebugger,
EdgeFlowError,
EdgeFlowTensor,
ErrorCodes,
FeatureExtractionPipeline,
IMAGENET_LABELS,
ImageClassificationPipeline,
ImagePreprocessor,
ImageSegmentationPipeline,
InferenceCache,
InferenceScheduler,
LoadedModelImpl,
MemoryManager,
MemoryScope,
ModelCache,
ModelDownloadCache,
POPULAR_MODELS,
PerformanceMonitor,
RuntimeManager,
SENTIMENT_LABELS,
SentimentAnalysisPipeline,
TextClassificationPipeline,
TextGenerationPipeline,
Tokenizer,
TransformersAdapterRuntime,
VERSION,
WASMRuntime,
WebGPURuntime,
WebNNRuntime,
add,
analyzeModel2 as analyzeModel,
analyzeModel as analyzeModelDetailed,
arange,
argmax,
benchmark2 as benchmark,
benchmarkMemory,
benchmarkSuite,
cancelPreload,
clearModelCache,
compareBenchmarks,
compose,
concat,
configureScheduler,
createAsciiHistogram,
createAudioPreprocessor,
createBasicTokenizer,
createCache,
createFeatureExtractionPipeline,
createImageClassificationPipeline,
createImagePreprocessor,
createImageSegmentationPipeline,
createPipelines,
createSentimentAnalysisPipeline,
createTensorHeatmap,
createTextClassificationPipeline,
createTextGenerationPipeline,
createWASMRuntime,
createWebGPURuntime,
createWebNNRuntime,
deleteCachedModel,
dequantizeFloat16,
dequantizeInt8,
dequantizeTensor,
dequantizeUint8,
disableDebugging,
div,
downloadConfig,
downloadModel,
downloadTokenizer,
enableDebugging,
exportModel2 as exportModel,
exportModel as exportModelAdvanced,
eye,
float16ToFloat32,
formatBenchmarkResult,
formatComparisonResult,
formatTensorInspection,
fromHub,
fromTask,
full,
gc,
generateAsciiDashboard,
generateDashboardHTML,
getAvailableRuntimes,
getBestRuntime,
getBestRuntimeType,
getCachedModel,
getDebugger,
getDefaultModel,
getDeviceProfile,
getInfo,
getMemoryManager,
getMemoryStats,
getModelCacheStats,
getModelInfo,
getMonitor,
getPipelineFactory,
getPluginMiddleware,
getPluginPipeline,
getPreloadStatus,
getPreloadedModel,
getRuntimeManager,
getScheduler,
getTransformersAdapter,
inspectTensor,
isModelCached,
isSupported,
linspace,
listPlugins,
loadModel,
loadModelData,
loadModelFromBuffer,
loadTokenizer,
loadTokenizerFromHub,
matmul,
mean,
modelExists,
mul,
ones,
parallel,
pipeline,
preload,
preloadModel,
preloadModels,
preprocessText,
prune,
pruneModel,
pruneTensor,
quantize,
quantizeModel,
quantizeTensor,
randn,
random,
recommendModelVariant,
recommendQuantization,
registerAllBackends,
registerPipeline,
registerPlugin,
registerRuntime,
release,
relu,
resetDeviceProfile,
runBatchInference,
benchmark as runBenchmark,
runInference,
setScheduler,
sigmoid,
softmax,
startMonitoring,
stopMonitoring,
sub,
sum,
tanh,
tensor,
unregisterPlugin,
useTransformersBackend,
visualizeModelArchitecture,
withMemoryScope,
withMemoryScopeSync,
zeros
};
//# sourceMappingURL=edgeflow.browser.js.map
================================================
FILE: dist/index.d.ts
================================================
/**
* edgeFlow.js
*
* Lightweight, high-performance browser ML inference framework
* with native concurrency support.
*
* @example
* ```typescript
* import { pipeline } from 'edgeflow';
*
* // Create a sentiment analysis pipeline
* const sentiment = await pipeline('sentiment-analysis');
*
* // Run inference
* const result = await sentiment.run('I love this product!');
* console.log(result); // { label: 'positive', score: 0.98 }
*
* // Batch processing
* const results = await sentiment.run([
* 'This is amazing!',
* 'This is terrible.'
* ]);
*
* // Concurrent execution with different models
* const classifier = await pipeline('text-classification');
* const extractor = await pipeline('feature-extraction');
*
* const [classification, features] = await Promise.all([
* classifier.run('Sample text'),
* extractor.run('Sample text')
* ]);
* ```
*
* @packageDocumentation
*/
export type { DataType, TypedArray, Shape, Tensor, RuntimeType, RuntimeCapabilities, Runtime, ModelFormat, QuantizationType, ModelMetadata, ModelIOSpec, ModelLoadOptions, LoadedModel, TaskPriority, TaskStatus, InferenceTask, SchedulerOptions, MemoryStats, MemoryPoolConfig, PipelineTask, PipelineConfig, PipelineOptions, TokenizerConfig, TokenizedOutput, EventType, EdgeFlowEvent, EventListener, ErrorCode, } from './core/types.js';
export { EdgeFlowError, ErrorCodes } from './core/types.js';
export { EdgeFlowTensor, tensor, zeros, ones, full, random, randn, arange, linspace, eye, add, sub, mul, div, matmul, softmax, relu, sigmoid, tanh, sum, mean, argmax, concat, } from './core/tensor.js';
export { InferenceScheduler, getScheduler, setScheduler, configureScheduler, } from './core/scheduler.js';
export { MemoryManager, MemoryScope, ModelCache, withMemoryScope, withMemoryScopeSync, getMemoryManager, getMemoryStats, release, gc, } from './core/memory.js';
export { registerPlugin, getPluginPipeline, getPluginMiddleware, listPlugins, unregisterPlugin, type EdgeFlowPlugin, type PluginPipelineEntry, type PluginBackendEntry, type PluginMiddleware, } from './core/plugin.js';
export { getDeviceProfile, recommendQuantization, recommendModelVariant, resetDeviceProfile, type DeviceProfile, type DeviceTier, type ModelRecommendation, } from './core/device-profiler.js';
export { compose, parallel, type CompositionStage, type CompositionResult, type ComposedPipeline, } from './core/composer.js';
export { RuntimeManager, LoadedModelImpl, loadModel, loadModelFromBuffer, runInference, runBatchInference, getRuntimeManager, registerRuntime, getBestRuntime, getAvailableRuntimes, } from './core/runtime.js';
export { WebGPURuntime, createWebGPURuntime, WebNNRuntime, createWebNNRuntime, WASMRuntime, createWASMRuntime, registerAllBackends, TransformersAdapterRuntime, useTransformersBackend, getTransformersAdapter, type TransformersAdapterOptions, type TransformersPipelineFactory, } from './backends/index.js';
export { pipeline, createPipelines, BasePipeline, registerPipeline, getPipelineFactory, SENTIMENT_LABELS, EMOTION_LABELS, IMAGENET_LABELS, type PipelineResult, type TextClassificationResult, type FeatureExtractionResult, type ImageClassificationResult, type ObjectDetectionResult, TextClassificationPipeline, SentimentAnalysisPipeline, FeatureExtractionPipeline, ImageClassificationPipeline, TextGenerationPipeline, ImageSegmentationPipeline, createTextClassificationPipeline, createSentimentAnalysisPipeline, createFeatureExtractionPipeline, createImageClassificationPipeline, createTextGenerationPipeline, createImageSegmentationPipeline, type PipelineFactoryOptions, type TextClassificationOptions, type FeatureExtractionOptions, type ImageClassificationOptions, type ImageInput, type TextGenerationOptions, type TextGenerationResult, type GenerationStreamEvent, type ChatMessage, type ChatOptions, type ChatTemplateType, type LLMLoadProgress, type ImageSegmentationOptions, type ImageSegmentationResult, type PointPrompt, type BoxPrompt, type ModelLoadProgress, } from './pipelines/index.js';
export { Tokenizer, createBasicTokenizer, loadTokenizer, loadTokenizerFromHub, type TokenizerModel, type TokenizerOptions, ImagePreprocessor, AudioPreprocessor, preprocessText, createImagePreprocessor, createAudioPreprocessor, type ImagePreprocessorOptions, type AudioPreprocessorOptions, type TextPreprocessorOptions, Cache, InferenceCache, ModelDownloadCache, createCache, type CacheStrategy, type CacheOptions, type CacheStats, loadModelData, preloadModel, preloadModels, isModelCached, getCachedModel, deleteCachedModel, clearModelCache, getModelCacheStats, getPreloadStatus, cancelPreload, getPreloadedModel, type DownloadProgress, type ModelLoaderOptions, type PreloadOptions, fromHub, fromTask, downloadModel, downloadTokenizer, downloadConfig, modelExists, getModelInfo, getDefaultModel, POPULAR_MODELS, type HubOptions, type HubDownloadProgress, type ModelConfig, type ModelBundle, type PopularModelTask, } from './utils/index.js';
export { quantize, type QuantizationOptions, type QuantizationResult, prune, type PruningOptions, type PruningResult, analyzeModel, type ModelAnalysis, benchmark, type BenchmarkOptions, type BenchmarkResult, exportModel, quantizeModel, quantizeTensor, dequantizeTensor, pruneModel, pruneTensor, analyzeModelDetailed, exportModelAdvanced, dequantizeInt8, dequantizeUint8, dequantizeFloat16, float16ToFloat32, type QuantizationMethod, type AdvancedQuantizationOptions, type QuantizationProgress, type AdvancedQuantizationResult, type LayerQuantizationStats, type QuantizationStats, type AdvancedPruningOptions, type AdvancedPruningResult, type DetailedModelAnalysis, type ExportFormat, type ExportOptions, EdgeFlowDebugger, getDebugger, enableDebugging, disableDebugging, inspectTensor, formatTensorInspection, createAsciiHistogram, createTensorHeatmap, visualizeModelArchitecture, type DebuggerConfig, type TensorInspection, type TensorStats, type HistogramData, type InferenceTrace, type OperationTrace, type DebugEvent, type DebugPerformanceMetrics, PerformanceMonitor, getMonitor, startMonitoring, stopMonitoring, generateDashboardHTML, generateAsciiDashboard, type MonitorConfig, type PerformanceSample, type InferenceMetrics, type MemoryMetrics, type SystemMetrics, type AlertConfig, type AlertEvent, type WidgetData, runBenchmark, compareBenchmarks, benchmarkSuite, benchmarkMemory, formatBenchmarkResult, formatComparisonResult, type DetailedBenchmarkOptions, type DetailedBenchmarkResult, type CompareBenchmarkResult, type MemoryBenchmarkResult, } from './tools/index.js';
/**
* Check if edgeFlow is supported in the current environment
*/
export declare function isSupported(): Promise;
/**
* Get the best available runtime type
*/
export declare function getBestRuntimeType(): Promise;
/**
* Preload models for faster subsequent loading
*/
export declare function preload(models: string[]): Promise;
/**
* edgeFlow.js version
*/
export declare const VERSION = "0.1.0";
/**
* Get framework info
*/
export declare function getInfo(): Promise<{
version: string;
runtimes: Record;
features: string[];
}>;
import { RuntimeType } from './core/types.js';
//# sourceMappingURL=index.d.ts.map
================================================
FILE: dist/index.js
================================================
/**
* edgeFlow.js
*
* Lightweight, high-performance browser ML inference framework
* with native concurrency support.
*
* @example
* ```typescript
* import { pipeline } from 'edgeflow';
*
* // Create a sentiment analysis pipeline
* const sentiment = await pipeline('sentiment-analysis');
*
* // Run inference
* const result = await sentiment.run('I love this product!');
* console.log(result); // { label: 'positive', score: 0.98 }
*
* // Batch processing
* const results = await sentiment.run([
* 'This is amazing!',
* 'This is terrible.'
* ]);
*
* // Concurrent execution with different models
* const classifier = await pipeline('text-classification');
* const extractor = await pipeline('feature-extraction');
*
* const [classification, features] = await Promise.all([
* classifier.run('Sample text'),
* extractor.run('Sample text')
* ]);
* ```
*
* @packageDocumentation
*/
// Error class
export { EdgeFlowError, ErrorCodes } from './core/types.js';
// Tensor operations
export { EdgeFlowTensor, tensor, zeros, ones, full, random, randn, arange, linspace, eye, add, sub, mul, div, matmul, softmax, relu, sigmoid, tanh, sum, mean, argmax, concat, } from './core/tensor.js';
// Scheduler
export { InferenceScheduler, getScheduler, setScheduler, configureScheduler, } from './core/scheduler.js';
// Memory management
export { MemoryManager, MemoryScope, ModelCache, withMemoryScope, withMemoryScopeSync, getMemoryManager, getMemoryStats, release, gc, } from './core/memory.js';
// Plugin system
export { registerPlugin, getPluginPipeline, getPluginMiddleware, listPlugins, unregisterPlugin, } from './core/plugin.js';
// Device profiling
export { getDeviceProfile, recommendQuantization, recommendModelVariant, resetDeviceProfile, } from './core/device-profiler.js';
// Pipeline composition
export { compose, parallel, } from './core/composer.js';
// Runtime management
export { RuntimeManager, LoadedModelImpl, loadModel, loadModelFromBuffer, runInference, runBatchInference, getRuntimeManager, registerRuntime, getBestRuntime, getAvailableRuntimes, } from './core/runtime.js';
// ============================================================================
// Backend Exports
// ============================================================================
export { WebGPURuntime, createWebGPURuntime, WebNNRuntime, createWebNNRuntime, WASMRuntime, createWASMRuntime, registerAllBackends,
// transformers.js adapter
TransformersAdapterRuntime, useTransformersBackend, getTransformersAdapter, } from './backends/index.js';
// ============================================================================
// Pipeline Exports
// ============================================================================
export {
// Factory function
pipeline, createPipelines,
// Base classes
BasePipeline, registerPipeline, getPipelineFactory,
// Labels
SENTIMENT_LABELS, EMOTION_LABELS, IMAGENET_LABELS,
// Pipelines
TextClassificationPipeline, SentimentAnalysisPipeline, FeatureExtractionPipeline, ImageClassificationPipeline, TextGenerationPipeline, ImageSegmentationPipeline,
// Factory functions
createTextClassificationPipeline, createSentimentAnalysisPipeline, createFeatureExtractionPipeline, createImageClassificationPipeline, createTextGenerationPipeline, createImageSegmentationPipeline, } from './pipelines/index.js';
// ============================================================================
// Utility Exports
// ============================================================================
export {
// Tokenizer
Tokenizer, createBasicTokenizer, loadTokenizer, loadTokenizerFromHub,
// Preprocessor
ImagePreprocessor, AudioPreprocessor, preprocessText, createImagePreprocessor, createAudioPreprocessor,
// Cache
Cache, InferenceCache, ModelDownloadCache, createCache,
// Model Loader (Preloading, Sharding, Resume, Caching)
loadModelData, preloadModel, preloadModels, isModelCached, getCachedModel, deleteCachedModel, clearModelCache, getModelCacheStats, getPreloadStatus, cancelPreload, getPreloadedModel,
// HuggingFace Hub Integration
fromHub, fromTask, downloadModel, downloadTokenizer, downloadConfig, modelExists, getModelInfo, getDefaultModel, POPULAR_MODELS, } from './utils/index.js';
// ============================================================================
// Tools Exports
// ============================================================================
export {
// Quantization (basic)
quantize,
// Pruning (basic)
prune,
// Analysis (basic)
analyzeModel,
// Benchmarking (basic)
benchmark,
// Export
exportModel,
// Advanced Quantization
quantizeModel, quantizeTensor, dequantizeTensor, pruneModel, pruneTensor, analyzeModelDetailed, exportModelAdvanced, dequantizeInt8, dequantizeUint8, dequantizeFloat16, float16ToFloat32,
// Debugging Tools
EdgeFlowDebugger, getDebugger, enableDebugging, disableDebugging, inspectTensor, formatTensorInspection, createAsciiHistogram, createTensorHeatmap, visualizeModelArchitecture,
// Performance Monitor
PerformanceMonitor, getMonitor, startMonitoring, stopMonitoring, generateDashboardHTML, generateAsciiDashboard,
// Benchmark utilities
runBenchmark, compareBenchmarks, benchmarkSuite, benchmarkMemory, formatBenchmarkResult, formatComparisonResult, } from './tools/index.js';
// ============================================================================
// Convenience Functions
// ============================================================================
/**
* Check if edgeFlow is supported in the current environment
*/
export async function isSupported() {
const runtimes = await getAvailableRuntimes();
return Array.from(runtimes.values()).some(v => v);
}
/**
* Get the best available runtime type
*/
export async function getBestRuntimeType() {
const runtimes = await getAvailableRuntimes();
if (runtimes.get('webgpu'))
return 'webgpu';
if (runtimes.get('webnn'))
return 'webnn';
if (runtimes.get('wasm'))
return 'wasm';
return null;
}
/**
* Preload models for faster subsequent loading
*/
export async function preload(models) {
const cache = new ModelDownloadCache();
await Promise.all(models.map(async (url) => {
if (!(await cache.get(url))) {
const response = await fetch(url);
if (response.ok) {
await cache.put(url, response);
}
}
}));
}
// ============================================================================
// Version Info
// ============================================================================
/**
* edgeFlow.js version
*/
export const VERSION = '0.1.0';
/**
* Get framework info
*/
export async function getInfo() {
const runtimes = await getAvailableRuntimes();
return {
version: VERSION,
runtimes: {
webgpu: runtimes.get('webgpu') ?? false,
webnn: runtimes.get('webnn') ?? false,
wasm: runtimes.get('wasm') ?? false,
auto: true,
},
features: [
'concurrent-execution',
'batch-processing',
'memory-management',
'model-caching',
'quantization',
],
};
}
import { getAvailableRuntimes } from './core/runtime.js';
import { ModelDownloadCache } from './utils/cache.js';
//# sourceMappingURL=index.js.map
================================================
FILE: dist/pipelines/automatic-speech-recognition.d.ts
================================================
/**
* edgeFlow.js - Automatic Speech Recognition Pipeline
*
* Transcribe audio to text using Whisper ONNX models (encoder + decoder).
*/
import { BasePipeline, PipelineResult } from './base.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { PipelineConfig, PipelineOptions } from '../core/types.js';
import { type AudioInput } from '../utils/preprocessor.js';
import { Tokenizer } from '../utils/tokenizer.js';
export interface ASROptions extends PipelineOptions {
language?: string;
task?: 'transcribe' | 'translate';
returnTimestamps?: boolean | 'word' | 'chunk';
maxDuration?: number;
chunkDuration?: number;
chunkOverlap?: number;
}
export interface WordTimestamp {
word: string;
start: number;
end: number;
confidence?: number;
}
export interface ChunkTimestamp {
text: string;
start: number;
end: number;
}
export interface ASRResult extends PipelineResult {
text: string;
language?: string;
words?: WordTimestamp[];
chunks?: ChunkTimestamp[];
}
export declare class AutomaticSpeechRecognitionPipeline extends BasePipeline {
private audioPreprocessor;
private tokenizer;
private encoderModel;
private decoderModel;
private encoderUrl;
private decoderUrl;
private tokenizerUrl;
constructor(config?: PipelineConfig);
initialize(): Promise;
setTokenizer(tokenizer: Tokenizer): void;
run(input: AudioInput | AudioInput[], options?: PipelineOptions): Promise;
private transcribeSingle;
private buildInitialTokens;
private getLanguageToken;
/**
* Autoregressive decoder loop similar to text-generation.
* Feeds encoder hidden states + growing token sequence to decoder.
*/
private autoregressiveDecode;
private extractTimestamps;
processLongAudio(audio: AudioInput, options?: ASROptions): Promise;
protected preprocess(input: AudioInput | AudioInput[]): Promise;
protected postprocess(outputs: EdgeFlowTensor[], options?: PipelineOptions): Promise;
private decodeOutput;
}
export declare function createASRPipeline(config?: PipelineConfig): AutomaticSpeechRecognitionPipeline;
//# sourceMappingURL=automatic-speech-recognition.d.ts.map
================================================
FILE: dist/pipelines/automatic-speech-recognition.js
================================================
/**
* edgeFlow.js - Automatic Speech Recognition Pipeline
*
* Transcribe audio to text using Whisper ONNX models (encoder + decoder).
*/
import { BasePipeline, registerPipeline } from './base.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { AudioPreprocessor } from '../utils/preprocessor.js';
import { Tokenizer } from '../utils/tokenizer.js';
import { loadModelData } from '../utils/model-loader.js';
import { loadModelFromBuffer, runInference, runInferenceNamed } from '../core/runtime.js';
// ============================================================================
// Default Model (Whisper-tiny, quantized encoder + decoder)
// ============================================================================
const DEFAULT_MODELS = {
encoder: 'https://huggingface.co/Xenova/whisper-tiny/resolve/main/onnx/encoder_model_quantized.onnx',
decoder: 'https://huggingface.co/Xenova/whisper-tiny/resolve/main/onnx/decoder_model_merged_quantized.onnx',
tokenizer: 'https://huggingface.co/Xenova/whisper-tiny/resolve/main/tokenizer.json',
};
// Whisper special tokens
const SOT_TOKEN = 50258; // <|startoftranscript|>
const TRANSLATE_TOKEN = 50358; // <|translate|>
const TRANSCRIBE_TOKEN = 50359; // <|transcribe|>
const EOT_TOKEN = 50257; // <|endoftext|>
const NO_TIMESTAMPS_TOKEN = 50363; // <|notimestamps|>
const EN_TOKEN = 50259; // <|en|>
const MAX_DECODER_TOKENS = 448;
// ============================================================================
// ASR Pipeline
// ============================================================================
export class AutomaticSpeechRecognitionPipeline extends BasePipeline {
audioPreprocessor;
tokenizer = null;
encoderModel = null;
decoderModel = null;
encoderUrl;
decoderUrl;
tokenizerUrl;
constructor(config) {
super(config ?? {
task: 'automatic-speech-recognition',
model: 'default',
});
this.encoderUrl = DEFAULT_MODELS.encoder;
this.decoderUrl = DEFAULT_MODELS.decoder;
this.tokenizerUrl = DEFAULT_MODELS.tokenizer;
this.audioPreprocessor = new AudioPreprocessor({
sampleRate: 16000,
nMels: 80,
nFft: 400,
hopLength: 160,
maxDuration: 30,
});
}
async initialize() {
await super.initialize();
if (!this.tokenizer) {
this.tokenizer = await Tokenizer.fromUrl(this.tokenizerUrl);
}
if (!this.encoderModel) {
const data = await loadModelData(this.encoderUrl, { cache: this.config.cache ?? true });
this.encoderModel = await loadModelFromBuffer(data);
}
if (!this.decoderModel) {
const data = await loadModelData(this.decoderUrl, { cache: this.config.cache ?? true });
this.decoderModel = await loadModelFromBuffer(data);
}
}
setTokenizer(tokenizer) {
this.tokenizer = tokenizer;
}
async run(input, options) {
await this.initialize();
const isBatch = Array.isArray(input);
const inputs = isBatch ? input : [input];
const opts = options ?? {};
const results = [];
for (const audio of inputs) {
const result = await this.transcribeSingle(audio, opts);
results.push(result);
}
return isBatch ? results : results[0];
}
async transcribeSingle(audio, options) {
const startTime = performance.now();
// 1. Preprocess audio → mel spectrogram
const melTensor = await this.audioPreprocessor.process(audio);
const melInput = new EdgeFlowTensor(melTensor.toFloat32Array(), [1, ...melTensor.shape], 'float32');
// 2. Run encoder
const encoderOutputs = await runInference(this.encoderModel, [melInput]);
const encoderHidden = encoderOutputs[0];
// 3. Autoregressive decoder loop
const task = options.task ?? 'transcribe';
const initialTokens = this.buildInitialTokens(task, options.language);
const generatedTokens = await this.autoregressiveDecode(encoderHidden, initialTokens);
// 4. Decode tokens to text
const text = this.tokenizer.decode(generatedTokens, true);
const result = {
text: text.trim(),
processingTime: performance.now() - startTime,
};
if (options.returnTimestamps) {
result.chunks = this.extractTimestamps(generatedTokens, text);
}
return result;
}
buildInitialTokens(task, language) {
const tokens = [SOT_TOKEN];
tokens.push(language ? this.getLanguageToken(language) : EN_TOKEN);
tokens.push(task === 'translate' ? TRANSLATE_TOKEN : TRANSCRIBE_TOKEN);
tokens.push(NO_TIMESTAMPS_TOKEN);
return tokens;
}
getLanguageToken(language) {
// Whisper language tokens start at 50259 for English
const langMap = {
en: 50259, zh: 50260, de: 50261, es: 50262, ru: 50263,
ko: 50264, fr: 50265, ja: 50266, pt: 50267, tr: 50268,
pl: 50269, ca: 50270, nl: 50271, ar: 50272, sv: 50273,
it: 50274, id: 50275, hi: 50276, fi: 50277, vi: 50278,
};
return langMap[language.toLowerCase()] ?? EN_TOKEN;
}
/**
* Autoregressive decoder loop similar to text-generation.
* Feeds encoder hidden states + growing token sequence to decoder.
*/
async autoregressiveDecode(encoderHidden, initialTokens) {
const tokens = [...initialTokens];
for (let step = 0; step < MAX_DECODER_TOKENS; step++) {
const decoderInputIds = new EdgeFlowTensor(BigInt64Array.from(tokens.map(t => BigInt(t))), [1, tokens.length], 'int64');
const namedInputs = new Map();
namedInputs.set('input_ids', decoderInputIds);
namedInputs.set('encoder_hidden_states', encoderHidden);
const decoderOutputs = await runInferenceNamed(this.decoderModel, namedInputs);
const logits = decoderOutputs[0].toFloat32Array();
// Get logits for the last token position
const vocabSize = logits.length / tokens.length;
const lastTokenLogits = logits.slice((tokens.length - 1) * vocabSize);
// Greedy: argmax
let bestId = 0;
let bestVal = lastTokenLogits[0] ?? -Infinity;
for (let i = 1; i < lastTokenLogits.length; i++) {
if ((lastTokenLogits[i] ?? -Infinity) > bestVal) {
bestVal = lastTokenLogits[i] ?? -Infinity;
bestId = i;
}
}
if (bestId === EOT_TOKEN)
break;
tokens.push(bestId);
}
// Strip initial tokens to return only generated tokens
return tokens.slice(initialTokens.length);
}
extractTimestamps(_tokenIds, text) {
// Simplified timestamp extraction: split by punctuation
const words = text.split(/\s+/).filter(w => w.length > 0);
const chunks = [];
const wordsPerSecond = 2.5;
let chunkText = '';
let chunkStart = 0;
for (let i = 0; i < words.length; i++) {
chunkText += (chunkText ? ' ' : '') + words[i];
if ((i + 1) % 5 === 0 || i === words.length - 1) {
const duration = chunkText.split(/\s+/).length / wordsPerSecond;
chunks.push({
text: chunkText,
start: chunkStart,
end: chunkStart + duration,
});
chunkStart = chunkStart + duration;
chunkText = '';
}
}
return chunks;
}
async processLongAudio(audio, options = {}) {
const chunkDuration = options.chunkDuration ?? 30;
const chunkOverlap = options.chunkOverlap ?? 5;
const rawTensor = await this.audioPreprocessor.processRaw(audio);
const audioData = rawTensor.toFloat32Array();
const sampleRate = 16000;
const chunkSamples = chunkDuration * sampleRate;
const overlapSamples = chunkOverlap * sampleRate;
const stepSamples = chunkSamples - overlapSamples;
const chunks = [];
for (let start = 0; start < audioData.length; start += stepSamples) {
const end = Math.min(start + chunkSamples, audioData.length);
const chunkAudio = audioData.slice(start, end);
const chunkResult = await this.run(new Float32Array(chunkAudio), options);
if (chunkResult.chunks) {
const timeOffset = start / sampleRate;
chunkResult.chunks = chunkResult.chunks.map(c => ({
...c,
start: c.start + timeOffset,
end: c.end + timeOffset,
}));
}
chunks.push(chunkResult);
}
const mergedText = chunks.map(c => c.text).join(' ');
const mergedChunks = chunks.flatMap(c => c.chunks ?? []);
return {
text: mergedText,
chunks: mergedChunks,
};
}
async preprocess(input) {
const inputs = Array.isArray(input) ? input : [input];
const tensors = await Promise.all(inputs.map(audio => this.audioPreprocessor.process(audio)));
if (tensors.length === 1) {
const t = tensors[0];
return [new EdgeFlowTensor(t.toFloat32Array(), [1, ...t.shape], 'float32')];
}
return tensors;
}
async postprocess(outputs, options) {
const opts = options ?? {};
const returnTimestamps = opts.returnTimestamps ?? false;
if (!outputs[0]) {
return { text: '' };
}
const outputData = outputs[0].toFloat32Array();
const shape = outputs[0].shape;
const text = this.decodeOutput(outputData, shape);
const result = { text };
if (returnTimestamps) {
result.chunks = this.extractTimestamps([], text);
}
return result;
}
decodeOutput(data, shape) {
const seqLen = shape[1] ?? data.length;
const vocabSize = shape[2] ?? 1;
const tokenIds = [];
if (vocabSize > 1) {
for (let i = 0; i < seqLen; i++) {
const offset = i * vocabSize;
let maxIdx = 0;
let maxVal = data[offset] ?? -Infinity;
for (let j = 1; j < vocabSize; j++) {
if ((data[offset + j] ?? -Infinity) > maxVal) {
maxVal = data[offset + j] ?? -Infinity;
maxIdx = j;
}
}
tokenIds.push(maxIdx);
}
}
else {
for (let i = 0; i < data.length; i++) {
tokenIds.push(Math.round(data[i] ?? 0));
}
}
if (this.tokenizer) {
return this.tokenizer.decode(tokenIds, true);
}
return tokenIds.join(' ');
}
}
// ============================================================================
// Factory
// ============================================================================
export function createASRPipeline(config) {
return new AutomaticSpeechRecognitionPipeline(config);
}
registerPipeline('automatic-speech-recognition', (config) => new AutomaticSpeechRecognitionPipeline(config));
//# sourceMappingURL=automatic-speech-recognition.js.map
================================================
FILE: dist/pipelines/base.d.ts
================================================
/**
* edgeFlow.js - Base Pipeline
*
* Base class and utilities for all pipeline implementations.
*/
import { LoadedModel, PipelineConfig, PipelineOptions, PipelineTask } from '../core/types.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { ModelCache } from '../core/memory.js';
import { ModelDownloadCache } from '../utils/cache.js';
/**
* Pipeline result base interface
*/
export interface PipelineResult {
/** Processing time in milliseconds */
processingTime?: number;
}
/**
* Text classification result
*/
export interface TextClassificationResult extends PipelineResult {
label: string;
score: number;
}
/**
* Feature extraction result
*/
export interface FeatureExtractionResult extends PipelineResult {
embeddings: number[];
}
/**
* Image classification result
*/
export interface ImageClassificationResult extends PipelineResult {
label: string;
score: number;
}
/**
* Object detection result
*/
export interface ObjectDetectionResult extends PipelineResult {
label: string;
score: number;
box: {
x: number;
y: number;
width: number;
height: number;
};
}
/**
* BasePipeline - Abstract base class for all pipelines
*/
export declare abstract class BasePipeline {
protected model: LoadedModel | null;
protected readonly config: PipelineConfig;
protected readonly modelCache: ModelCache;
protected readonly downloadCache: ModelDownloadCache;
protected isReady: boolean;
constructor(config: PipelineConfig);
/**
* Initialize the pipeline (load model).
*
* Skips model loading when `config.model === 'default'` — concrete
* subclasses that define their own DEFAULT_MODELS handle all model
* loading in their overridden `initialize()` methods, so the base
* should not attempt to fetch a URL called "default".
*/
initialize(): Promise;
/**
* Load model with caching
*/
protected loadModelWithCache(modelPath: string): Promise;
/**
* Run inference (single input)
*/
run(input: TInput, options?: PipelineOptions): Promise;
/**
* Run batch inference
*/
runBatch(inputs: TInput[], options?: PipelineOptions): Promise;
/**
* Preprocess input - must be implemented by subclasses
*/
protected abstract preprocess(input: TInput): Promise;
/**
* Postprocess output - must be implemented by subclasses
*/
protected abstract postprocess(outputs: EdgeFlowTensor[], options?: PipelineOptions): Promise;
/**
* Get the task type
*/
get task(): PipelineTask;
/**
* Check if pipeline is ready
*/
get ready(): boolean;
/**
* Dispose the pipeline
*/
dispose(): void;
}
/**
* Pipeline factory function type
*/
type PipelineFactory = (config: PipelineConfig) => BasePipeline;
/**
* Register a pipeline factory
*/
export declare function registerPipeline(task: PipelineTask, factory: PipelineFactory): void;
/**
* Get a pipeline factory
*/
export declare function getPipelineFactory(task: PipelineTask): PipelineFactory | undefined;
/**
* Common sentiment labels
*/
export declare const SENTIMENT_LABELS: string[];
/**
* Common emotion labels
*/
export declare const EMOTION_LABELS: string[];
/**
* ImageNet top-10 labels (for demo)
*/
export declare const IMAGENET_LABELS: string[];
export {};
//# sourceMappingURL=base.d.ts.map
================================================
FILE: dist/pipelines/base.js
================================================
/**
* edgeFlow.js - Base Pipeline
*
* Base class and utilities for all pipeline implementations.
*/
import { loadModel, runInference } from '../core/runtime.js';
import { ModelCache } from '../core/memory.js';
import { ModelDownloadCache } from '../utils/cache.js';
// ============================================================================
// Base Pipeline Class
// ============================================================================
/**
* BasePipeline - Abstract base class for all pipelines
*/
export class BasePipeline {
model = null;
config;
modelCache;
downloadCache;
isReady = false;
constructor(config) {
this.config = config;
this.modelCache = new ModelCache();
this.downloadCache = new ModelDownloadCache();
}
/**
* Initialize the pipeline (load model).
*
* Skips model loading when `config.model === 'default'` — concrete
* subclasses that define their own DEFAULT_MODELS handle all model
* loading in their overridden `initialize()` methods, so the base
* should not attempt to fetch a URL called "default".
*/
async initialize() {
if (this.isReady && this.model)
return;
// Skip generic model loading for subclasses that manage their own models.
if (this.config.model === 'default') {
this.isReady = true;
return;
}
// Check model cache first
const cachedModel = this.modelCache.get(this.config.model);
if (cachedModel) {
this.model = cachedModel;
this.isReady = true;
return;
}
// Load model using the explicit URL from config
this.model = await this.loadModelWithCache(this.config.model);
this.isReady = true;
}
/**
* Load model with caching
*/
async loadModelWithCache(modelPath) {
// Try download cache first
const cachedResponse = await this.downloadCache.get(modelPath);
if (cachedResponse) {
// Use cached data
}
// Download and cache (or use mock for now)
try {
const response = await fetch(modelPath);
if (response.ok) {
// Cache the response
await this.downloadCache.put(modelPath, response.clone());
}
}
catch {
// Ignore fetch errors for demo
}
// Load into runtime
return loadModel(modelPath, {
runtime: this.config.runtime,
quantization: this.config.quantization,
cache: this.config.cache,
});
}
/**
* Run inference (single input)
*/
async run(input, options) {
await this.initialize();
const startTime = performance.now();
// Preprocess
const preprocessed = await this.preprocess(input);
// Run inference
const outputs = await runInference(this.model, preprocessed);
// Postprocess
const result = await this.postprocess(outputs, options);
if (result && typeof result === 'object' && 'processingTime' in result) {
result.processingTime = performance.now() - startTime;
}
return result;
}
/**
* Run batch inference
*/
async runBatch(inputs, options) {
await this.initialize();
// Process all inputs
const results = await Promise.all(inputs.map(input => this.run(input, options)));
return results;
}
/**
* Get the task type
*/
get task() {
return this.config.task;
}
/**
* Check if pipeline is ready
*/
get ready() {
return this.isReady;
}
/**
* Dispose the pipeline
*/
dispose() {
if (this.model) {
this.model.dispose();
this.model = null;
}
this.isReady = false;
}
}
/**
* Registered pipeline factories
*/
const pipelineFactories = new Map();
/**
* Register a pipeline factory
*/
export function registerPipeline(task, factory) {
pipelineFactories.set(task, factory);
}
/**
* Get a pipeline factory
*/
export function getPipelineFactory(task) {
return pipelineFactories.get(task);
}
// ============================================================================
// Default Label Maps
// ============================================================================
/**
* Common sentiment labels
*/
export const SENTIMENT_LABELS = ['negative', 'positive'];
/**
* Common emotion labels
*/
export const EMOTION_LABELS = [
'anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'neutral'
];
/**
* ImageNet top-10 labels (for demo)
*/
export const IMAGENET_LABELS = [
'tench', 'goldfish', 'great white shark', 'tiger shark', 'hammerhead',
'electric ray', 'stingray', 'cock', 'hen', 'ostrich'
];
//# sourceMappingURL=base.js.map
================================================
FILE: dist/pipelines/feature-extraction.d.ts
================================================
/**
* edgeFlow.js - Feature Extraction Pipeline
*
* Extract embeddings/features from text using sentence-transformer models.
*/
import { PipelineConfig, PipelineOptions } from '../core/types.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { BasePipeline, FeatureExtractionResult } from './base.js';
export interface FeatureExtractionOptions extends PipelineOptions {
pooling?: 'mean' | 'max' | 'cls' | 'none';
normalize?: boolean;
outputDim?: number;
}
export declare class FeatureExtractionPipeline extends BasePipeline {
private tokenizer;
private onnxModel;
private embeddingDim;
private modelUrl;
private tokenizerUrl;
constructor(config: PipelineConfig, embeddingDim?: number);
initialize(): Promise;
run(input: string | string[], options?: FeatureExtractionOptions): Promise;
protected preprocess(input: string | string[]): Promise;
private runInference;
protected postprocess(outputs: EdgeFlowTensor[], options?: FeatureExtractionOptions): Promise;
private extractCLSEmbedding;
private meanPooling;
private maxPooling;
private normalizeVector;
}
export declare function createFeatureExtractionPipeline(config?: Partial): FeatureExtractionPipeline;
//# sourceMappingURL=feature-extraction.d.ts.map
================================================
FILE: dist/pipelines/feature-extraction.js
================================================
/**
* edgeFlow.js - Feature Extraction Pipeline
*
* Extract embeddings/features from text using sentence-transformer models.
*/
import { EdgeFlowTensor } from '../core/tensor.js';
import { Tokenizer } from '../utils/tokenizer.js';
import { loadModelData } from '../utils/model-loader.js';
import { loadModelFromBuffer, runInferenceNamed } from '../core/runtime.js';
import { BasePipeline, registerPipeline, } from './base.js';
// ============================================================================
// Default Model (all-MiniLM-L6-v2, 384-dim sentence embeddings)
// ============================================================================
const DEFAULT_MODELS = {
model: 'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/onnx/model_quantized.onnx',
tokenizer: 'https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/tokenizer.json',
};
const DEFAULT_EMBEDDING_DIM = 384;
export class FeatureExtractionPipeline extends BasePipeline {
tokenizer = null;
onnxModel = null;
embeddingDim;
modelUrl;
tokenizerUrl;
constructor(config, embeddingDim = DEFAULT_EMBEDDING_DIM) {
super(config);
this.embeddingDim = embeddingDim;
this.modelUrl = config.model !== 'default' ? config.model : DEFAULT_MODELS.model;
this.tokenizerUrl = DEFAULT_MODELS.tokenizer;
}
async initialize() {
await super.initialize();
if (!this.tokenizer) {
this.tokenizer = await Tokenizer.fromUrl(this.tokenizerUrl);
}
if (!this.onnxModel) {
const modelData = await loadModelData(this.modelUrl, { cache: this.config.cache ?? true });
this.onnxModel = await loadModelFromBuffer(modelData);
}
}
async run(input, options) {
const isBatch = Array.isArray(input);
const inputs = isBatch ? input : [input];
await this.initialize();
const startTime = performance.now();
const results = [];
for (const text of inputs) {
const tensorInputs = await this.preprocess(text);
const outputs = await this.runInference(tensorInputs);
const result = await this.postprocess(outputs, options);
results.push(result);
}
const processingTime = performance.now() - startTime;
for (const result of results) {
result.processingTime = processingTime / results.length;
}
return isBatch ? results : results[0];
}
async preprocess(input) {
const text = Array.isArray(input) ? input[0] : input;
const encoded = this.tokenizer.encode(text, {
maxLength: 128,
padding: 'max_length',
truncation: true,
});
const inputIds = new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map(id => BigInt(id))), [1, encoded.inputIds.length], 'int64');
const attentionMask = new EdgeFlowTensor(BigInt64Array.from(encoded.attentionMask.map(m => BigInt(m))), [1, encoded.attentionMask.length], 'int64');
const tokenTypeIds = new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map(() => BigInt(0))), [1, encoded.inputIds.length], 'int64');
return [inputIds, attentionMask, tokenTypeIds];
}
async runInference(inputs) {
const namedInputs = new Map();
namedInputs.set('input_ids', inputs[0]);
namedInputs.set('attention_mask', inputs[1]);
namedInputs.set('token_type_ids', inputs[2]);
const outputs = await runInferenceNamed(this.onnxModel, namedInputs);
return outputs;
}
async postprocess(outputs, options) {
const hiddenStates = outputs[0];
if (!hiddenStates) {
return { embeddings: [] };
}
const pooling = options?.pooling ?? 'mean';
const normalize = options?.normalize ?? true;
let embeddings;
switch (pooling) {
case 'cls':
embeddings = this.extractCLSEmbedding(hiddenStates);
break;
case 'max':
embeddings = this.maxPooling(hiddenStates);
break;
case 'none':
embeddings = hiddenStates.toArray();
break;
case 'mean':
default:
embeddings = this.meanPooling(hiddenStates);
break;
}
if (normalize) {
embeddings = this.normalizeVector(embeddings);
}
if (options?.outputDim && options.outputDim < embeddings.length) {
embeddings = embeddings.slice(0, options.outputDim);
}
return { embeddings };
}
extractCLSEmbedding(hiddenStates) {
const data = hiddenStates.toFloat32Array();
const embeddingDim = hiddenStates.shape[2] ?? this.embeddingDim;
return Array.from(data.slice(0, embeddingDim));
}
meanPooling(hiddenStates) {
const data = hiddenStates.toFloat32Array();
const seqLen = hiddenStates.shape[1] ?? 1;
const embeddingDim = hiddenStates.shape[2] ?? this.embeddingDim;
const result = new Float32Array(embeddingDim);
for (let i = 0; i < seqLen; i++) {
for (let j = 0; j < embeddingDim; j++) {
result[j] = (result[j] ?? 0) + (data[i * embeddingDim + j] ?? 0) / seqLen;
}
}
return Array.from(result);
}
maxPooling(hiddenStates) {
const data = hiddenStates.toFloat32Array();
const seqLen = hiddenStates.shape[1] ?? 1;
const embeddingDim = hiddenStates.shape[2] ?? this.embeddingDim;
const result = new Array(embeddingDim).fill(-Infinity);
for (let i = 0; i < seqLen; i++) {
for (let j = 0; j < embeddingDim; j++) {
const val = data[i * embeddingDim + j] ?? 0;
if (val > (result[j] ?? -Infinity)) {
result[j] = val;
}
}
}
return result;
}
normalizeVector(vec) {
let norm = 0;
for (const v of vec) {
norm += v * v;
}
norm = Math.sqrt(norm);
if (norm === 0)
return vec;
return vec.map(v => v / norm);
}
}
// ============================================================================
// Factory Function
// ============================================================================
export function createFeatureExtractionPipeline(config = {}) {
return new FeatureExtractionPipeline({
task: 'feature-extraction',
model: config.model ?? 'default',
runtime: config.runtime,
cache: config.cache ?? true,
quantization: config.quantization,
});
}
registerPipeline('feature-extraction', (config) => new FeatureExtractionPipeline(config));
//# sourceMappingURL=feature-extraction.js.map
================================================
FILE: dist/pipelines/image-classification.d.ts
================================================
/**
* edgeFlow.js - Image Classification Pipeline
*
* Classify images into categories using vision models.
*/
import { PipelineConfig, PipelineOptions } from '../core/types.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { BasePipeline, ImageClassificationResult } from './base.js';
export interface ImageClassificationOptions extends PipelineOptions {
returnAllScores?: boolean;
labels?: string[];
topK?: number;
}
export type ImageInput = HTMLImageElement | HTMLCanvasElement | ImageBitmap | ImageData | string;
export declare class ImageClassificationPipeline extends BasePipeline {
private preprocessor;
private onnxModel;
private labels;
private modelUrl;
constructor(config: PipelineConfig, labels?: string[], _numClasses?: number);
initialize(): Promise;
setLabels(labels: string[]): void;
run(input: ImageInput | ImageInput[], options?: ImageClassificationOptions): Promise;
protected preprocess(input: ImageInput | ImageInput[]): Promise;
private runModelInference;
protected postprocess(outputs: EdgeFlowTensor[], options?: ImageClassificationOptions): Promise;
}
export declare function createImageClassificationPipeline(config?: Partial, labels?: string[]): ImageClassificationPipeline;
//# sourceMappingURL=image-classification.d.ts.map
================================================
FILE: dist/pipelines/image-classification.js
================================================
/**
* edgeFlow.js - Image Classification Pipeline
*
* Classify images into categories using vision models.
*/
import { softmax } from '../core/tensor.js';
import { createImagePreprocessor } from '../utils/preprocessor.js';
import { loadModelData } from '../utils/model-loader.js';
import { loadModelFromBuffer, runInference } from '../core/runtime.js';
import { BasePipeline, registerPipeline, IMAGENET_LABELS, } from './base.js';
// ============================================================================
// Default Model (MobileViT-small, quantized)
// ============================================================================
const DEFAULT_MODELS = {
model: 'https://huggingface.co/Xenova/mobilevit-small/resolve/main/onnx/model_quantized.onnx',
};
export class ImageClassificationPipeline extends BasePipeline {
preprocessor = null;
onnxModel = null;
labels;
modelUrl;
constructor(config, labels, _numClasses = 1000) {
super(config);
this.labels = labels ?? IMAGENET_LABELS;
this.modelUrl = config.model !== 'default' ? config.model : DEFAULT_MODELS.model;
}
async initialize() {
await super.initialize();
if (!this.preprocessor) {
this.preprocessor = createImagePreprocessor('imagenet');
}
if (!this.onnxModel) {
const modelData = await loadModelData(this.modelUrl, { cache: this.config.cache ?? true });
this.onnxModel = await loadModelFromBuffer(modelData);
}
}
setLabels(labels) {
this.labels = labels;
}
async run(input, options) {
const isBatch = Array.isArray(input);
const inputs = isBatch ? input : [input];
await this.initialize();
const startTime = performance.now();
const results = [];
for (const image of inputs) {
const tensorInputs = await this.preprocess(image);
const outputs = await this.runModelInference(tensorInputs);
const result = await this.postprocess(outputs, options);
results.push(result);
}
const processingTime = performance.now() - startTime;
for (const result of results) {
result.processingTime = processingTime / results.length;
}
return isBatch ? results : results[0];
}
async preprocess(input) {
const image = Array.isArray(input) ? input[0] : input;
const tensor = await this.preprocessor.process(image);
if (tensor.shape.length === 3) {
return [tensor.reshape([1, ...tensor.shape])];
}
return [tensor];
}
async runModelInference(inputs) {
const outputs = await runInference(this.onnxModel, inputs);
return outputs;
}
async postprocess(outputs, options) {
const logits = outputs[0];
if (!logits) {
return { label: 'unknown', score: 0 };
}
const probs = softmax(logits, -1);
const probsArray = probs.toFloat32Array();
let maxIdx = 0;
let maxScore = probsArray[0] ?? 0;
for (let i = 1; i < probsArray.length; i++) {
if ((probsArray[i] ?? 0) > maxScore) {
maxScore = probsArray[i] ?? 0;
maxIdx = i;
}
}
const label = options?.labels?.[maxIdx] ?? this.labels[maxIdx] ?? `class_${maxIdx}`;
return { label, score: maxScore };
}
}
// ============================================================================
// Factory Function
// ============================================================================
export function createImageClassificationPipeline(config = {}, labels) {
return new ImageClassificationPipeline({
task: 'image-classification',
model: config.model ?? 'default',
runtime: config.runtime,
cache: config.cache ?? true,
quantization: config.quantization,
}, labels);
}
registerPipeline('image-classification', (config) => new ImageClassificationPipeline(config));
//# sourceMappingURL=image-classification.js.map
================================================
FILE: dist/pipelines/image-segmentation.d.ts
================================================
/**
* edgeFlow.js - Image Segmentation Pipeline
*
* Interactive image segmentation using SAM (Segment Anything Model).
* Supports point prompts and bounding box prompts.
*/
import { PipelineConfig, PipelineOptions } from '../core/types.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { BasePipeline, PipelineResult } from './base.js';
/**
* Point prompt for segmentation
*/
export interface PointPrompt {
/** X coordinate (0-1 normalized) */
x: number;
/** Y coordinate (0-1 normalized) */
y: number;
/** 1 for foreground (include), 0 for background (exclude) */
label: 0 | 1;
}
/**
* Box prompt for segmentation
*/
export interface BoxPrompt {
/** Top-left X (0-1 normalized) */
x1: number;
/** Top-left Y (0-1 normalized) */
y1: number;
/** Bottom-right X (0-1 normalized) */
x2: number;
/** Bottom-right Y (0-1 normalized) */
y2: number;
}
/**
* Model loading progress callback
*/
export interface ModelLoadProgress {
/** Model name (encoder or decoder) */
model: 'encoder' | 'decoder';
/** Bytes loaded */
loaded: number;
/** Total bytes */
total: number;
/** Progress percentage (0-100) */
progress: number;
}
/**
* Segmentation options
*/
export interface ImageSegmentationOptions extends PipelineOptions {
/** Point prompts */
points?: PointPrompt[];
/** Box prompts */
boxes?: BoxPrompt[];
/** Return all masks or just the best one */
returnAllMasks?: boolean;
/** Mask threshold (0-1) */
maskThreshold?: number;
}
/**
* Segmentation result
*/
export interface ImageSegmentationResult extends PipelineResult {
/** Segmentation mask (Uint8Array, 0 or 255) */
mask: Uint8Array;
/** Mask width */
width: number;
/** Mask height */
height: number;
/** Confidence score */
score: number;
/** All masks if returnAllMasks is true */
allMasks?: Array<{
mask: Uint8Array;
score: number;
}>;
}
/**
* Image input types
*/
export type ImageInput = HTMLImageElement | HTMLCanvasElement | ImageBitmap | ImageData | string;
/**
* ImageSegmentationPipeline - Interactive image segmentation
*
* Uses SAM-style models for point/box prompted segmentation.
*
* @example
* ```typescript
* const segmenter = createImageSegmentationPipeline();
*
* // Load models with progress callback
* await segmenter.loadModels((progress) => {
* console.log(`Loading ${progress.model}: ${progress.progress}%`);
* });
*
* // Set image and segment
* await segmenter.setImage(imageElement);
* const result = await segmenter.segment({
* points: [{ x: 0.5, y: 0.5, label: 1 }]
* });
* ```
*/
export declare class ImageSegmentationPipeline extends BasePipeline {
private encoderModel;
private decoderModel;
private imageEmbedding;
private imagePositionalEmbedding;
private currentImageSize;
private resizedImageSize;
private inputSize;
private modelsLoaded;
private encoderUrl;
private decoderUrl;
constructor(config: PipelineConfig);
/**
* Check if models are loaded
*/
get isModelsLoaded(): boolean;
/**
* Set custom model URLs
*/
setModelUrls(encoder: string, decoder: string): void;
/**
* Load both encoder and decoder models with progress callback
*/
loadModels(onProgress?: (progress: ModelLoadProgress) => void): Promise;
/**
* Fetch model with progress tracking
*/
private fetchModelWithProgress;
/**
* Initialize pipeline (override to skip default model loading)
*/
initialize(): Promise;
/**
* Load encoder model (processes the image once)
*/
loadEncoder(modelUrl: string): Promise;
/**
* Load decoder model (processes prompts to generate masks)
*/
loadDecoder(modelUrl: string): Promise;
/**
* Set and encode the image (call once per image)
*/
setImage(image: ImageInput): Promise;
/**
* Segment the image with given prompts
*/
segment(options?: ImageSegmentationOptions): Promise;
/**
* Run segmentation (implements BasePipeline interface)
*/
run(input: ImageInput, options?: ImageSegmentationOptions): Promise;
/**
* Load image from various sources
*/
private loadImage;
/**
* Load image from URL
*/
private loadImageFromUrl;
/**
* Convert HTMLImageElement to ImageData
*/
private imageElementToImageData;
/**
* Convert canvas to ImageData
*/
private canvasToImageData;
/**
* Convert ImageBitmap to ImageData
*/
private imageBitmapToImageData;
/**
* Preprocess image for SAM
*/
private preprocessImage;
/**
* Prepare decoder inputs (prompts) for SlimSAM
*
* SlimSAM prompt_encoder_mask_decoder expects these named inputs:
* - image_embeddings: [1, 256, 64, 64]
* - point_coords: [batch, num_points, 2]
* - point_labels: [batch, num_points]
* - mask_input: [batch, 1, 256, 256]
* - has_mask_input: [batch, 1]
* - orig_im_size: [2]
* - position_ids: [batch, num_points]
*/
private prepareDecoderInputs;
/**
* Post-process masks from decoder output
*/
private postprocessMasks;
/**
* Resize mask from model output size to original image size
*/
private resizeMask;
/**
* Clear the current image embedding
*/
clearImage(): void;
/**
* Preprocess (required by BasePipeline)
*/
protected preprocess(input: ImageInput): Promise;
/**
* Postprocess (required by BasePipeline)
*/
protected postprocess(_outputs: EdgeFlowTensor[], _options?: PipelineOptions): Promise;
/**
* Dispose resources
*/
dispose(): void;
}
/**
* Create image segmentation pipeline
*/
export declare function createImageSegmentationPipeline(config?: Partial): ImageSegmentationPipeline;
//# sourceMappingURL=image-segmentation.d.ts.map
================================================
FILE: dist/pipelines/image-segmentation.js
================================================
/**
* edgeFlow.js - Image Segmentation Pipeline
*
* Interactive image segmentation using SAM (Segment Anything Model).
* Supports point prompts and bounding box prompts.
*/
import { EdgeFlowTensor } from '../core/tensor.js';
import { BasePipeline, registerPipeline } from './base.js';
import { loadModel, loadModelFromBuffer, runInference, runInferenceNamed } from '../core/runtime.js';
// ============================================================================
// Default Model URLs (SlimSAM - quantized for browser)
// ============================================================================
const DEFAULT_SAM_MODELS = {
encoder: 'https://huggingface.co/Xenova/slimsam-77-uniform/resolve/main/onnx/vision_encoder_quantized.onnx',
decoder: 'https://huggingface.co/Xenova/slimsam-77-uniform/resolve/main/onnx/prompt_encoder_mask_decoder_quantized.onnx',
};
// ============================================================================
// Image Segmentation Pipeline
// ============================================================================
/**
* ImageSegmentationPipeline - Interactive image segmentation
*
* Uses SAM-style models for point/box prompted segmentation.
*
* @example
* ```typescript
* const segmenter = createImageSegmentationPipeline();
*
* // Load models with progress callback
* await segmenter.loadModels((progress) => {
* console.log(`Loading ${progress.model}: ${progress.progress}%`);
* });
*
* // Set image and segment
* await segmenter.setImage(imageElement);
* const result = await segmenter.segment({
* points: [{ x: 0.5, y: 0.5, label: 1 }]
* });
* ```
*/
export class ImageSegmentationPipeline extends BasePipeline {
encoderModel = null;
decoderModel = null;
imageEmbedding = null;
imagePositionalEmbedding = null;
currentImageSize = null;
resizedImageSize = null;
inputSize = 1024; // SAM default input size
modelsLoaded = false;
// Custom model URLs
encoderUrl;
decoderUrl;
constructor(config) {
super(config);
this.encoderUrl = DEFAULT_SAM_MODELS.encoder;
this.decoderUrl = DEFAULT_SAM_MODELS.decoder;
}
/**
* Check if models are loaded
*/
get isModelsLoaded() {
return this.modelsLoaded;
}
/**
* Set custom model URLs
*/
setModelUrls(encoder, decoder) {
this.encoderUrl = encoder;
this.decoderUrl = decoder;
}
/**
* Load both encoder and decoder models with progress callback
*/
async loadModels(onProgress) {
if (this.modelsLoaded)
return;
// Load encoder
onProgress?.({ model: 'encoder', loaded: 0, total: 100, progress: 0 });
const encoderData = await this.fetchModelWithProgress(this.encoderUrl, (loaded, total) => {
onProgress?.({
model: 'encoder',
loaded,
total,
progress: Math.round((loaded / total) * 100),
});
});
this.encoderModel = await loadModelFromBuffer(encoderData, {
runtime: 'wasm', // Uses ONNXRuntime which auto-detects WebGPU internally
});
// Load decoder
onProgress?.({ model: 'decoder', loaded: 0, total: 100, progress: 0 });
const decoderData = await this.fetchModelWithProgress(this.decoderUrl, (loaded, total) => {
onProgress?.({
model: 'decoder',
loaded,
total,
progress: Math.round((loaded / total) * 100),
});
});
this.decoderModel = await loadModelFromBuffer(decoderData, {
runtime: 'wasm', // Uses ONNXRuntime which auto-detects WebGPU internally
});
this.modelsLoaded = true;
}
/**
* Fetch model with progress tracking
*/
async fetchModelWithProgress(url, onProgress) {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to fetch model: ${response.status} ${response.statusText}`);
}
const contentLength = response.headers.get('content-length');
const total = contentLength ? parseInt(contentLength, 10) : 0;
if (!response.body) {
// Fallback if no streaming support
const buffer = await response.arrayBuffer();
onProgress(buffer.byteLength, buffer.byteLength);
return buffer;
}
const reader = response.body.getReader();
const chunks = [];
let loaded = 0;
while (true) {
const { done, value } = await reader.read();
if (done)
break;
chunks.push(value);
loaded += value.length;
onProgress(loaded, total || loaded);
}
// Combine chunks into ArrayBuffer
const buffer = new Uint8Array(loaded);
let offset = 0;
for (const chunk of chunks) {
buffer.set(chunk, offset);
offset += chunk.length;
}
return buffer.buffer;
}
/**
* Initialize pipeline (override to skip default model loading)
*/
async initialize() {
if (this.isReady)
return;
// Don't call super.initialize() - we handle model loading separately
this.isReady = true;
}
/**
* Load encoder model (processes the image once)
*/
async loadEncoder(modelUrl) {
this.encoderModel = await loadModel(modelUrl, {
runtime: 'wasm',
});
}
/**
* Load decoder model (processes prompts to generate masks)
*/
async loadDecoder(modelUrl) {
this.decoderModel = await loadModel(modelUrl, {
runtime: 'wasm',
});
}
/**
* Set and encode the image (call once per image)
*/
async setImage(image) {
if (!this.modelsLoaded) {
throw new Error('Models not loaded. Call loadModels() first.');
}
// Get image data
const imageData = await this.loadImage(image);
this.currentImageSize = {
width: imageData.width,
height: imageData.height,
};
// Preprocess image for SAM
const { tensor: inputTensor, resizedSize } = this.preprocessImage(imageData);
this.resizedImageSize = resizedSize;
// Run encoder
if (this.encoderModel) {
const outputs = await runInference(this.encoderModel, [inputTensor]);
// SlimSAM encoder outputs: [image_embeddings, image_positional_embeddings]
this.imageEmbedding = outputs[0];
this.imagePositionalEmbedding = outputs[1];
console.log('[SAM] Encoder outputs:', outputs.length);
console.log('[SAM] image_embeddings shape:', this.imageEmbedding.shape);
if (this.imagePositionalEmbedding) {
console.log('[SAM] image_positional_embeddings shape:', this.imagePositionalEmbedding.shape);
}
}
else {
throw new Error('Encoder model not loaded');
}
}
/**
* Segment the image with given prompts
*/
async segment(options = {}) {
if (!this.imageEmbedding || !this.currentImageSize || !this.resizedImageSize) {
throw new Error('No image set. Call setImage() first.');
}
if (!this.decoderModel) {
throw new Error('Decoder model not loaded');
}
const startTime = performance.now();
const { points = [], boxes = [], maskThreshold = 0.0, returnAllMasks = false } = options;
// Prepare inputs for decoder
const decoderInputs = this.prepareDecoderInputs(points, boxes);
// Add image embeddings to inputs
decoderInputs.set('image_embeddings', this.imageEmbedding);
// Add positional embeddings (required by SlimSAM)
if (this.imagePositionalEmbedding) {
decoderInputs.set('image_positional_embeddings', this.imagePositionalEmbedding);
}
else {
throw new Error('image_positional_embeddings not available from encoder');
}
// Run decoder model with named inputs
const outputs = await runInferenceNamed(this.decoderModel, decoderInputs);
// SAM decoder outputs: [masks, iou_predictions]
const masks = outputs[0];
const scores = outputs[1];
// Post-process masks
const result = this.postprocessMasks(masks, scores, maskThreshold, returnAllMasks);
result.processingTime = performance.now() - startTime;
return result;
}
/**
* Run segmentation (implements BasePipeline interface)
*/
async run(input, options) {
await this.setImage(input);
return this.segment(options);
}
/**
* Load image from various sources
*/
async loadImage(input) {
// Handle different input types
if (typeof input === 'string') {
// URL or base64
return this.loadImageFromUrl(input);
}
else if (input instanceof HTMLImageElement) {
return this.imageElementToImageData(input);
}
else if (input instanceof HTMLCanvasElement) {
return this.canvasToImageData(input);
}
else if (input instanceof ImageData) {
return input;
}
else if (typeof ImageBitmap !== 'undefined' && input instanceof ImageBitmap) {
return this.imageBitmapToImageData(input);
}
throw new Error('Unsupported image input type');
}
/**
* Load image from URL
*/
async loadImageFromUrl(url) {
return new Promise((resolve, reject) => {
const img = new Image();
img.crossOrigin = 'anonymous';
img.onload = () => {
const canvas = document.createElement('canvas');
canvas.width = img.width;
canvas.height = img.height;
const ctx = canvas.getContext('2d');
ctx.drawImage(img, 0, 0);
resolve(ctx.getImageData(0, 0, img.width, img.height));
};
img.onerror = reject;
img.src = url;
});
}
/**
* Convert HTMLImageElement to ImageData
*/
imageElementToImageData(img) {
const canvas = document.createElement('canvas');
canvas.width = img.naturalWidth || img.width;
canvas.height = img.naturalHeight || img.height;
const ctx = canvas.getContext('2d');
ctx.drawImage(img, 0, 0);
return ctx.getImageData(0, 0, canvas.width, canvas.height);
}
/**
* Convert canvas to ImageData
*/
canvasToImageData(canvas) {
const ctx = canvas.getContext('2d');
return ctx.getImageData(0, 0, canvas.width, canvas.height);
}
/**
* Convert ImageBitmap to ImageData
*/
imageBitmapToImageData(bitmap) {
const canvas = document.createElement('canvas');
canvas.width = bitmap.width;
canvas.height = bitmap.height;
const ctx = canvas.getContext('2d');
ctx.drawImage(bitmap, 0, 0);
return ctx.getImageData(0, 0, canvas.width, canvas.height);
}
/**
* Preprocess image for SAM
*/
preprocessImage(imageData) {
const { width, height } = imageData;
// Calculate resize dimensions (longest side = inputSize)
const scale = this.inputSize / Math.max(width, height);
const newWidth = Math.round(width * scale);
const newHeight = Math.round(height * scale);
// Create resized canvas with padding
const canvas = document.createElement('canvas');
canvas.width = this.inputSize;
canvas.height = this.inputSize;
const ctx = canvas.getContext('2d');
// Fill with padding color (SAM uses pixel mean)
ctx.fillStyle = `rgb(123.675, 116.28, 103.53)`;
ctx.fillRect(0, 0, this.inputSize, this.inputSize);
// Draw resized image (top-left aligned)
const tempCanvas = document.createElement('canvas');
tempCanvas.width = width;
tempCanvas.height = height;
const tempCtx = tempCanvas.getContext('2d');
tempCtx.putImageData(imageData, 0, 0);
ctx.drawImage(tempCanvas, 0, 0, newWidth, newHeight);
// Get pixel data
const resizedData = ctx.getImageData(0, 0, this.inputSize, this.inputSize);
// Convert to tensor (NCHW format, normalized with ImageNet mean/std)
const tensorData = new Float32Array(3 * this.inputSize * this.inputSize);
const mean = [123.675, 116.28, 103.53];
const std = [58.395, 57.12, 57.375];
for (let i = 0; i < this.inputSize * this.inputSize; i++) {
const pixelIdx = i * 4;
tensorData[i] = (resizedData.data[pixelIdx] - mean[0]) / std[0]; // R
tensorData[this.inputSize * this.inputSize + i] =
(resizedData.data[pixelIdx + 1] - mean[1]) / std[1]; // G
tensorData[2 * this.inputSize * this.inputSize + i] =
(resizedData.data[pixelIdx + 2] - mean[2]) / std[2]; // B
}
return {
tensor: new EdgeFlowTensor(tensorData, [1, 3, this.inputSize, this.inputSize], 'float32'),
resizedSize: { width: newWidth, height: newHeight },
};
}
/**
* Prepare decoder inputs (prompts) for SlimSAM
*
* SlimSAM prompt_encoder_mask_decoder expects these named inputs:
* - image_embeddings: [1, 256, 64, 64]
* - point_coords: [batch, num_points, 2]
* - point_labels: [batch, num_points]
* - mask_input: [batch, 1, 256, 256]
* - has_mask_input: [batch, 1]
* - orig_im_size: [2]
* - position_ids: [batch, num_points]
*/
prepareDecoderInputs(points, boxes) {
const { width: resizedW, height: resizedH } = this.resizedImageSize;
// Scale factors for converting normalized coords to resized image coords
const scaleX = resizedW;
const scaleY = resizedH;
const allPoints = [];
const allLabels = [];
// Add point prompts
for (const point of points) {
allPoints.push(point.x * scaleX, point.y * scaleY);
allLabels.push(point.label);
}
// Add box prompts (as two corner points)
for (const box of boxes) {
// Top-left corner (label 2)
allPoints.push(box.x1 * scaleX, box.y1 * scaleY);
allLabels.push(2);
// Bottom-right corner (label 3)
allPoints.push(box.x2 * scaleX, box.y2 * scaleY);
allLabels.push(3);
}
// Default point if no prompts (center of image)
if (allPoints.length === 0) {
allPoints.push(resizedW / 2, resizedH / 2);
allLabels.push(1);
}
const numPoints = allLabels.length;
const inputs = new Map();
// input_points: [1, 1, num_points, 2] - SlimSAM format (float32)
inputs.set('input_points', new EdgeFlowTensor(new Float32Array(allPoints), [1, 1, numPoints, 2], 'float32'));
// input_labels: [1, 1, num_points] - SlimSAM format (int64)
inputs.set('input_labels', new EdgeFlowTensor(BigInt64Array.from(allLabels.map(l => BigInt(l))), [1, 1, numPoints], 'int64'));
// Note: image_embeddings and image_positional_embeddings are added in segment()
// SlimSAM decoder only needs: image_embeddings, image_positional_embeddings, input_points, input_labels
return inputs;
}
/**
* Post-process masks from decoder output
*/
postprocessMasks(masks, scores, threshold, returnAllMasks) {
const { width, height } = this.currentImageSize;
const scoresData = scores.toFloat32Array();
const masksData = masks.toFloat32Array();
// SAM outputs multiple masks (usually 3)
const numMasks = scoresData.length;
const maskShape = masks.shape; // [1, num_masks, H, W]
const maskH = maskShape[2] ?? height;
const maskW = maskShape[3] ?? width;
// Find best mask by score
let bestIdx = 0;
let bestScore = scoresData[0] ?? 0;
for (let i = 1; i < numMasks; i++) {
if ((scoresData[i] ?? 0) > bestScore) {
bestScore = scoresData[i] ?? 0;
bestIdx = i;
}
}
// Extract and resize the best mask to original image size
const outputMask = this.resizeMask(masksData, bestIdx, maskW, maskH, width, height, threshold);
const result = {
mask: outputMask,
width,
height,
score: bestScore,
};
if (returnAllMasks && numMasks > 1) {
result.allMasks = [];
for (let m = 0; m < numMasks; m++) {
const mask = this.resizeMask(masksData, m, maskW, maskH, width, height, threshold);
result.allMasks.push({
mask,
score: scoresData[m] ?? 0,
});
}
}
return result;
}
/**
* Resize mask from model output size to original image size
*/
resizeMask(masksData, maskIdx, srcW, srcH, dstW, dstH, threshold) {
const outputMask = new Uint8Array(dstW * dstH);
const maskOffset = maskIdx * srcW * srcH;
// Bilinear interpolation for resizing
for (let y = 0; y < dstH; y++) {
for (let x = 0; x < dstW; x++) {
// Map to source coordinates
const srcX = (x / dstW) * srcW;
const srcY = (y / dstH) * srcH;
// Bilinear interpolation
const x0 = Math.floor(srcX);
const x1 = Math.min(x0 + 1, srcW - 1);
const y0 = Math.floor(srcY);
const y1 = Math.min(y0 + 1, srcH - 1);
const xFrac = srcX - x0;
const yFrac = srcY - y0;
const v00 = masksData[maskOffset + y0 * srcW + x0] ?? 0;
const v01 = masksData[maskOffset + y0 * srcW + x1] ?? 0;
const v10 = masksData[maskOffset + y1 * srcW + x0] ?? 0;
const v11 = masksData[maskOffset + y1 * srcW + x1] ?? 0;
const value = v00 * (1 - xFrac) * (1 - yFrac) +
v01 * xFrac * (1 - yFrac) +
v10 * (1 - xFrac) * yFrac +
v11 * xFrac * yFrac;
// Apply sigmoid and threshold
const sigmoid = 1 / (1 + Math.exp(-value));
outputMask[y * dstW + x] = sigmoid > threshold ? 255 : 0;
}
}
return outputMask;
}
/**
* Clear the current image embedding
*/
clearImage() {
this.imageEmbedding = null;
this.imagePositionalEmbedding = null;
this.currentImageSize = null;
this.resizedImageSize = null;
}
/**
* Preprocess (required by BasePipeline)
*/
async preprocess(input) {
const imageData = await this.loadImage(input);
const { tensor } = this.preprocessImage(imageData);
return [tensor];
}
/**
* Postprocess (required by BasePipeline)
*/
async postprocess(_outputs, _options) {
// This is handled in segment() method
return {
mask: new Uint8Array(0),
width: 0,
height: 0,
score: 0,
};
}
/**
* Dispose resources
*/
dispose() {
super.dispose();
this.encoderModel?.dispose();
this.decoderModel?.dispose();
this.imageEmbedding = null;
this.imagePositionalEmbedding = null;
this.currentImageSize = null;
this.resizedImageSize = null;
this.modelsLoaded = false;
}
}
// ============================================================================
// Factory Function
// ============================================================================
/**
* Create image segmentation pipeline
*/
export function createImageSegmentationPipeline(config = {}) {
return new ImageSegmentationPipeline({
task: 'image-segmentation',
model: config.model ?? 'slimsam',
runtime: config.runtime,
cache: config.cache ?? true,
quantization: config.quantization,
});
}
// Register pipeline
registerPipeline('image-segmentation', (config) => new ImageSegmentationPipeline(config));
//# sourceMappingURL=image-segmentation.js.map
================================================
FILE: dist/pipelines/index.d.ts
================================================
/**
* edgeFlow.js - Pipeline Exports
*/
import { RuntimeType, QuantizationType } from '../core/types.js';
export { BasePipeline, registerPipeline, getPipelineFactory, SENTIMENT_LABELS, EMOTION_LABELS, IMAGENET_LABELS, type PipelineResult, type TextClassificationResult, type FeatureExtractionResult, type ImageClassificationResult, type ObjectDetectionResult, } from './base.js';
export { TextClassificationPipeline, SentimentAnalysisPipeline, createTextClassificationPipeline, createSentimentAnalysisPipeline, type TextClassificationOptions, } from './text-classification.js';
export { FeatureExtractionPipeline, createFeatureExtractionPipeline, type FeatureExtractionOptions, } from './feature-extraction.js';
export { ImageClassificationPipeline, createImageClassificationPipeline, type ImageClassificationOptions, type ImageInput, } from './image-classification.js';
export { TextGenerationPipeline, createTextGenerationPipeline, type TextGenerationOptions, type TextGenerationResult, type GenerationStreamEvent, type ChatMessage, type ChatOptions, type ChatTemplateType, type LLMLoadProgress, } from './text-generation.js';
export { ObjectDetectionPipeline, createObjectDetectionPipeline, COCO_LABELS, type ObjectDetectionOptions, type Detection, type BoundingBox, } from './object-detection.js';
export { AutomaticSpeechRecognitionPipeline, createASRPipeline, type ASROptions, type ASRResult, type WordTimestamp, type ChunkTimestamp, } from './automatic-speech-recognition.js';
export { ZeroShotClassificationPipeline, createZeroShotClassificationPipeline, type ZeroShotClassificationOptions, type ZeroShotClassificationResult, } from './zero-shot-classification.js';
export { QuestionAnsweringPipeline, createQuestionAnsweringPipeline, type QuestionAnsweringOptions, type QuestionAnsweringResult, type QAInput, } from './question-answering.js';
export { ImageSegmentationPipeline, createImageSegmentationPipeline, type ImageSegmentationOptions, type ImageSegmentationResult, type PointPrompt, type BoxPrompt, type ModelLoadProgress, } from './image-segmentation.js';
/**
* Pipeline options for the factory function
*/
export interface PipelineFactoryOptions {
/** Model ID or URL */
model?: string;
/** Runtime to use */
runtime?: RuntimeType;
/** Enable caching */
cache?: boolean;
/** Quantization type */
quantization?: QuantizationType;
/** Custom labels for classification */
labels?: string[];
}
/**
* Supported pipeline task mapping
*/
type PipelineTaskMap = {
'text-classification': TextClassificationPipeline;
'sentiment-analysis': SentimentAnalysisPipeline;
'feature-extraction': FeatureExtractionPipeline;
'image-classification': ImageClassificationPipeline;
'text-generation': TextGenerationPipeline;
'object-detection': ObjectDetectionPipeline;
'automatic-speech-recognition': AutomaticSpeechRecognitionPipeline;
'zero-shot-classification': ZeroShotClassificationPipeline;
'question-answering': QuestionAnsweringPipeline;
'image-segmentation': ImageSegmentationPipeline;
};
import { TextClassificationPipeline, SentimentAnalysisPipeline } from './text-classification.js';
import { FeatureExtractionPipeline } from './feature-extraction.js';
import { ImageClassificationPipeline } from './image-classification.js';
import { TextGenerationPipeline } from './text-generation.js';
import { ObjectDetectionPipeline } from './object-detection.js';
import { AutomaticSpeechRecognitionPipeline } from './automatic-speech-recognition.js';
import { ZeroShotClassificationPipeline } from './zero-shot-classification.js';
import { QuestionAnsweringPipeline } from './question-answering.js';
import { ImageSegmentationPipeline } from './image-segmentation.js';
/**
* Create a pipeline for a specific task
*
* @example
* ```typescript
* // Create a sentiment analysis pipeline
* const sentiment = await pipeline('sentiment-analysis');
* const result = await sentiment.run('I love this product!');
*
* // Create an image classifier with custom model
* const classifier = await pipeline('image-classification', {
* model: 'https://example.com/model.bin',
* });
* ```
*/
export declare function pipeline(task: T, options?: PipelineFactoryOptions): Promise;
/**
* Create multiple pipelines at once
*/
export declare function createPipelines(tasks: T, options?: PipelineFactoryOptions): Promise<{
[K in T[number]]: PipelineTaskMap[K];
}>;
//# sourceMappingURL=index.d.ts.map
================================================
FILE: dist/pipelines/index.js
================================================
/**
* edgeFlow.js - Pipeline Exports
*/
import { getPluginPipeline } from '../core/plugin.js';
import { registerAllBackends } from '../backends/index.js';
// Base
export { BasePipeline, registerPipeline, getPipelineFactory, SENTIMENT_LABELS, EMOTION_LABELS, IMAGENET_LABELS, } from './base.js';
// Text Classification
export { TextClassificationPipeline, SentimentAnalysisPipeline, createTextClassificationPipeline, createSentimentAnalysisPipeline, } from './text-classification.js';
// Feature Extraction
export { FeatureExtractionPipeline, createFeatureExtractionPipeline, } from './feature-extraction.js';
// Image Classification
export { ImageClassificationPipeline, createImageClassificationPipeline, } from './image-classification.js';
// Text Generation
export { TextGenerationPipeline, createTextGenerationPipeline, } from './text-generation.js';
// Object Detection
export { ObjectDetectionPipeline, createObjectDetectionPipeline, COCO_LABELS, } from './object-detection.js';
// Automatic Speech Recognition
export { AutomaticSpeechRecognitionPipeline, createASRPipeline, } from './automatic-speech-recognition.js';
// Zero-shot Classification
export { ZeroShotClassificationPipeline, createZeroShotClassificationPipeline, } from './zero-shot-classification.js';
// Question Answering
export { QuestionAnsweringPipeline, createQuestionAnsweringPipeline, } from './question-answering.js';
// Image Segmentation
export { ImageSegmentationPipeline, createImageSegmentationPipeline, } from './image-segmentation.js';
// Import pipeline classes
import { TextClassificationPipeline, SentimentAnalysisPipeline } from './text-classification.js';
import { FeatureExtractionPipeline } from './feature-extraction.js';
import { ImageClassificationPipeline } from './image-classification.js';
import { TextGenerationPipeline } from './text-generation.js';
import { ObjectDetectionPipeline } from './object-detection.js';
import { AutomaticSpeechRecognitionPipeline } from './automatic-speech-recognition.js';
import { ZeroShotClassificationPipeline } from './zero-shot-classification.js';
import { QuestionAnsweringPipeline } from './question-answering.js';
import { ImageSegmentationPipeline } from './image-segmentation.js';
/**
* Create a pipeline for a specific task
*
* @example
* ```typescript
* // Create a sentiment analysis pipeline
* const sentiment = await pipeline('sentiment-analysis');
* const result = await sentiment.run('I love this product!');
*
* // Create an image classifier with custom model
* const classifier = await pipeline('image-classification', {
* model: 'https://example.com/model.bin',
* });
* ```
*/
export async function pipeline(task, options) {
// Guarantee backends are registered before any model loads.
// registerAllBackends() is synchronous and idempotent (safe to call repeatedly).
registerAllBackends();
const config = {
task: task,
model: options?.model ?? 'default',
runtime: options?.runtime,
cache: options?.cache ?? true,
quantization: options?.quantization,
};
let pipelineInstance;
switch (task) {
case 'text-classification':
pipelineInstance = new TextClassificationPipeline(config, options?.labels);
break;
case 'sentiment-analysis':
pipelineInstance = new SentimentAnalysisPipeline(config);
break;
case 'feature-extraction':
pipelineInstance = new FeatureExtractionPipeline(config);
break;
case 'image-classification':
pipelineInstance = new ImageClassificationPipeline(config, options?.labels);
break;
case 'text-generation':
pipelineInstance = new TextGenerationPipeline(config);
break;
case 'object-detection':
pipelineInstance = new ObjectDetectionPipeline(config, options?.labels);
break;
case 'automatic-speech-recognition':
pipelineInstance = new AutomaticSpeechRecognitionPipeline(config);
break;
case 'zero-shot-classification':
pipelineInstance = new ZeroShotClassificationPipeline(config);
break;
case 'question-answering':
pipelineInstance = new QuestionAnsweringPipeline(config);
break;
case 'image-segmentation':
pipelineInstance = new ImageSegmentationPipeline(config);
break;
default: {
// Check if a plugin provides this pipeline task
const pluginEntry = getPluginPipeline(task);
if (pluginEntry) {
pipelineInstance = pluginEntry.factory(config);
break;
}
throw new Error(`Unknown pipeline task: "${task}". ` +
`Register a plugin with registerPlugin() to add custom pipeline tasks.`);
}
}
// Initialize the pipeline
await pipelineInstance.initialize();
return pipelineInstance;
}
/**
* Create multiple pipelines at once
*/
export async function createPipelines(tasks, options) {
const pipelines = await Promise.all(tasks.map(task => pipeline(task, options)));
const result = {};
for (let i = 0; i < tasks.length; i++) {
const task = tasks[i];
result[task] = pipelines[i];
}
return result;
}
//# sourceMappingURL=index.js.map
================================================
FILE: dist/pipelines/object-detection.d.ts
================================================
/**
* edgeFlow.js - Object Detection Pipeline
*
* Detect objects in images with bounding boxes and class labels.
*/
import { BasePipeline, ObjectDetectionResult } from './base.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { PipelineConfig, PipelineOptions } from '../core/types.js';
import { type ImageInput } from '../utils/preprocessor.js';
export interface ObjectDetectionOptions extends PipelineOptions {
threshold?: number;
topK?: number;
nms?: boolean;
iouThreshold?: number;
}
export interface BoundingBox {
x: number;
y: number;
width: number;
height: number;
}
export interface Detection extends ObjectDetectionResult {
classId: number;
boxNormalized: BoundingBox;
}
export declare const COCO_LABELS: string[];
export declare class ObjectDetectionPipeline extends BasePipeline {
private preprocessor;
private onnxModel;
private labels;
private modelUrl;
constructor(config?: PipelineConfig, labels?: string[]);
initialize(): Promise;
setLabels(labels: string[]): void;
run(input: ImageInput | ImageInput[], options?: ObjectDetectionOptions): Promise;
protected preprocess(input: ImageInput | ImageInput[]): Promise;
private runModelInference;
protected postprocess(outputs: EdgeFlowTensor[], options?: PipelineOptions): Promise;
private parseDetections;
private nonMaxSuppression;
private computeIoU;
}
export declare function createObjectDetectionPipeline(config?: PipelineConfig, labels?: string[]): ObjectDetectionPipeline;
//# sourceMappingURL=object-detection.d.ts.map
================================================
FILE: dist/pipelines/object-detection.js
================================================
/**
* edgeFlow.js - Object Detection Pipeline
*
* Detect objects in images with bounding boxes and class labels.
*/
import { BasePipeline, registerPipeline } from './base.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { ImagePreprocessor } from '../utils/preprocessor.js';
import { loadModelData } from '../utils/model-loader.js';
import { loadModelFromBuffer, runInference } from '../core/runtime.js';
// ============================================================================
// Default Model (YOLOS-tiny, quantized)
// ============================================================================
const DEFAULT_MODELS = {
model: 'https://huggingface.co/Xenova/yolos-tiny/resolve/main/onnx/model_quantized.onnx',
};
// ============================================================================
// COCO Labels
// ============================================================================
export const COCO_LABELS = [
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck',
'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench',
'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
'toothbrush'
];
// ============================================================================
// Object Detection Pipeline
// ============================================================================
export class ObjectDetectionPipeline extends BasePipeline {
preprocessor;
onnxModel = null;
labels;
modelUrl;
constructor(config, labels) {
super(config ?? {
task: 'object-detection',
model: 'default',
});
this.labels = labels ?? COCO_LABELS;
this.modelUrl = (config?.model && config.model !== 'default') ? config.model : DEFAULT_MODELS.model;
this.preprocessor = new ImagePreprocessor({
width: 640,
height: 640,
mean: [0.485, 0.456, 0.406],
std: [0.229, 0.224, 0.225],
channelFormat: 'CHW',
});
}
async initialize() {
await super.initialize();
if (!this.onnxModel) {
const modelData = await loadModelData(this.modelUrl, { cache: this.config.cache ?? true });
this.onnxModel = await loadModelFromBuffer(modelData);
}
}
setLabels(labels) {
this.labels = labels;
}
async run(input, options) {
await this.initialize();
const tensorInputs = await this.preprocess(input);
const outputs = await this.runModelInference(tensorInputs);
return this.postprocess(outputs, options);
}
async preprocess(input) {
const inputs = Array.isArray(input) ? input : [input];
if (inputs.length === 1) {
const tensor = await this.preprocessor.process(inputs[0]);
return [new EdgeFlowTensor(tensor.toFloat32Array(), [1, ...tensor.shape], 'float32')];
}
return [await this.preprocessor.processBatch(inputs)];
}
async runModelInference(inputs) {
const outputs = await runInference(this.onnxModel, inputs);
return outputs;
}
async postprocess(outputs, options) {
const opts = options ?? {};
const threshold = opts.threshold ?? 0.5;
const topK = opts.topK ?? 100;
const nms = opts.nms ?? true;
const iouThreshold = opts.iouThreshold ?? 0.5;
if (!outputs[0]) {
return [];
}
const outputData = outputs[0].toFloat32Array();
const shape = [...outputs[0].shape];
const detections = this.parseDetections(outputData, shape, threshold);
let filtered = nms ? this.nonMaxSuppression(detections, iouThreshold) : detections;
filtered.sort((a, b) => b.score - a.score);
filtered = filtered.slice(0, topK);
return filtered;
}
parseDetections(data, shape, threshold) {
const detections = [];
const numBoxes = shape[1] ?? 0;
const boxSize = shape[2] ?? 0;
if (boxSize >= 5) {
const numClasses = boxSize - 5;
for (let i = 0; i < numBoxes; i++) {
const offset = i * boxSize;
const objectness = data[offset + 4] ?? 0;
if (objectness < threshold)
continue;
let maxClassScore = 0;
let maxClassIdx = 0;
for (let c = 0; c < numClasses; c++) {
const score = data[offset + 5 + c] ?? 0;
if (score > maxClassScore) {
maxClassScore = score;
maxClassIdx = c;
}
}
const confidence = objectness * maxClassScore;
if (confidence < threshold)
continue;
const x = data[offset] ?? 0;
const y = data[offset + 1] ?? 0;
const w = data[offset + 2] ?? 0;
const h = data[offset + 3] ?? 0;
detections.push({
label: this.labels[maxClassIdx] ?? `class_${maxClassIdx}`,
score: confidence,
classId: maxClassIdx,
box: {
x: Math.max(0, x - w / 2),
y: Math.max(0, y - h / 2),
width: w,
height: h,
},
boxNormalized: {
x: Math.max(0, x - w / 2),
y: Math.max(0, y - h / 2),
width: w,
height: h,
},
});
}
}
else if (boxSize === 4) {
for (let i = 0; i < numBoxes; i++) {
const offset = i * boxSize;
const x1 = data[offset] ?? 0;
const y1 = data[offset + 1] ?? 0;
const x2 = data[offset + 2] ?? 0;
const y2 = data[offset + 3] ?? 0;
detections.push({
label: this.labels[0] ?? 'object',
score: 1.0,
classId: 0,
box: {
x: x1,
y: y1,
width: x2 - x1,
height: y2 - y1,
},
boxNormalized: {
x: x1,
y: y1,
width: x2 - x1,
height: y2 - y1,
},
});
}
}
return detections;
}
nonMaxSuppression(detections, iouThreshold) {
if (detections.length === 0)
return [];
const sorted = [...detections].sort((a, b) => b.score - a.score);
const selected = [];
const active = new Array(sorted.length).fill(true);
for (let i = 0; i < sorted.length; i++) {
if (!active[i])
continue;
const current = sorted[i];
selected.push(current);
for (let j = i + 1; j < sorted.length; j++) {
if (!active[j])
continue;
const other = sorted[j];
if (current.classId !== other.classId)
continue;
const iou = this.computeIoU(current.box, other.box);
if (iou > iouThreshold) {
active[j] = false;
}
}
}
return selected;
}
computeIoU(a, b) {
const xOverlap = Math.max(0, Math.min(a.x + a.width, b.x + b.width) - Math.max(a.x, b.x));
const yOverlap = Math.max(0, Math.min(a.y + a.height, b.y + b.height) - Math.max(a.y, b.y));
const intersection = xOverlap * yOverlap;
const aArea = a.width * a.height;
const bArea = b.width * b.height;
const union = aArea + bArea - intersection;
return union > 0 ? intersection / union : 0;
}
}
// ============================================================================
// Factory
// ============================================================================
export function createObjectDetectionPipeline(config, labels) {
return new ObjectDetectionPipeline(config, labels);
}
registerPipeline('object-detection', (config) => new ObjectDetectionPipeline(config));
//# sourceMappingURL=object-detection.js.map
================================================
FILE: dist/pipelines/question-answering.d.ts
================================================
/**
* edgeFlow.js - Question Answering Pipeline
*
* Extract answers from context given a question using real ONNX QA models.
*/
import { BasePipeline, PipelineResult } from './base.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { PipelineConfig, PipelineOptions } from '../core/types.js';
import { Tokenizer } from '../utils/tokenizer.js';
export interface QAInput {
question: string;
context: string;
}
export interface QuestionAnsweringOptions extends PipelineOptions {
maxAnswerLength?: number;
maxQuestionLength?: number;
topK?: number;
threshold?: number;
handleImpossible?: boolean;
}
export interface QuestionAnsweringResult extends PipelineResult {
answer: string;
score: number;
start: number;
end: number;
}
export declare class QuestionAnsweringPipeline extends BasePipeline {
private tokenizer;
private onnxModel;
private modelUrl;
private tokenizerUrl;
constructor(config?: PipelineConfig);
initialize(): Promise;
setTokenizer(tokenizer: Tokenizer): void;
run(input: QAInput | QAInput[], options?: QuestionAnsweringOptions): Promise;
private answerQuestion;
private tokenOffsetToCharOffset;
protected preprocess(input: QAInput | QAInput[]): Promise;
protected postprocess(outputs: EdgeFlowTensor[], _options?: PipelineOptions): Promise;
}
export declare function createQuestionAnsweringPipeline(config?: PipelineConfig): QuestionAnsweringPipeline;
//# sourceMappingURL=question-answering.d.ts.map
================================================
FILE: dist/pipelines/question-answering.js
================================================
/**
* edgeFlow.js - Question Answering Pipeline
*
* Extract answers from context given a question using real ONNX QA models.
*/
import { BasePipeline, registerPipeline } from './base.js';
import { EdgeFlowTensor, softmax } from '../core/tensor.js';
import { Tokenizer } from '../utils/tokenizer.js';
import { loadModelData } from '../utils/model-loader.js';
import { loadModelFromBuffer, runInferenceNamed } from '../core/runtime.js';
// ============================================================================
// Default Model (DistilBERT fine-tuned on SQuAD)
// ============================================================================
const DEFAULT_MODELS = {
model: 'https://huggingface.co/Xenova/distilbert-base-cased-distilled-squad/resolve/main/onnx/model_quantized.onnx',
tokenizer: 'https://huggingface.co/Xenova/distilbert-base-cased-distilled-squad/resolve/main/tokenizer.json',
};
// ============================================================================
// Question Answering Pipeline
// ============================================================================
export class QuestionAnsweringPipeline extends BasePipeline {
tokenizer = null;
onnxModel = null;
modelUrl;
tokenizerUrl;
constructor(config) {
super(config ?? {
task: 'question-answering',
model: 'default',
});
this.modelUrl = (config?.model && config.model !== 'default') ? config.model : DEFAULT_MODELS.model;
this.tokenizerUrl = DEFAULT_MODELS.tokenizer;
}
async initialize() {
await super.initialize();
if (!this.tokenizer) {
this.tokenizer = await Tokenizer.fromUrl(this.tokenizerUrl);
}
if (!this.onnxModel) {
const modelData = await loadModelData(this.modelUrl, { cache: this.config.cache ?? true });
this.onnxModel = await loadModelFromBuffer(modelData);
}
}
setTokenizer(tokenizer) {
this.tokenizer = tokenizer;
}
async run(input, options) {
await this.initialize();
const inputs = Array.isArray(input) ? input : [input];
const results = await Promise.all(inputs.map(i => this.answerQuestion(i, options ?? {})));
return Array.isArray(input) ? results : results[0];
}
async answerQuestion(input, options) {
const startTime = performance.now();
const { question, context } = input;
const maxAnswerLength = options.maxAnswerLength ?? 30;
const encoded = this.tokenizer.encode(question, {
textPair: context,
addSpecialTokens: true,
maxLength: 512,
truncation: true,
returnAttentionMask: true,
returnTokenTypeIds: true,
});
const inputIds = new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map(id => BigInt(id))), [1, encoded.inputIds.length], 'int64');
const attentionMask = new EdgeFlowTensor(BigInt64Array.from(encoded.attentionMask.map(m => BigInt(m))), [1, encoded.attentionMask.length], 'int64');
const namedInputs = new Map();
namedInputs.set('input_ids', inputIds);
namedInputs.set('attention_mask', attentionMask);
const outputs = await runInferenceNamed(this.onnxModel, namedInputs);
if (outputs.length < 2) {
return { answer: '', score: 0, start: 0, end: 0, processingTime: performance.now() - startTime };
}
const startLogits = outputs[0].toFloat32Array();
const endLogits = outputs[1].toFloat32Array();
const seqLen = startLogits.length;
const startProbs = softmax(new EdgeFlowTensor(new Float32Array(startLogits), [seqLen], 'float32')).toFloat32Array();
const endProbs = softmax(new EdgeFlowTensor(new Float32Array(endLogits), [seqLen], 'float32')).toFloat32Array();
// Find best start/end token positions
let bestStartIdx = 0;
let bestEndIdx = 0;
let bestScore = 0;
for (let s = 0; s < seqLen; s++) {
for (let e = s; e < Math.min(s + maxAnswerLength, seqLen); e++) {
const score = (startProbs[s] ?? 0) * (endProbs[e] ?? 0);
if (score > bestScore) {
bestScore = score;
bestStartIdx = s;
bestEndIdx = e;
}
}
}
// Decode the answer span back to text
const answerTokenIds = encoded.inputIds.slice(bestStartIdx, bestEndIdx + 1);
const answer = this.tokenizer.decode(answerTokenIds, true);
// Map token positions back to character offsets in context
const charStart = this.tokenOffsetToCharOffset(context, question, encoded.inputIds, bestStartIdx);
const charEnd = this.tokenOffsetToCharOffset(context, question, encoded.inputIds, bestEndIdx) + 1;
return {
answer: answer || '',
score: bestScore,
start: charStart,
end: charEnd,
processingTime: performance.now() - startTime,
};
}
tokenOffsetToCharOffset(context, _question, inputIds, tokenIdx) {
// Approximate mapping: decode tokens up to this index and measure length
// For a production implementation you'd use the tokenizer's offset mapping.
const decoded = this.tokenizer.decode(inputIds.slice(0, tokenIdx + 1), true);
const contextStart = context.indexOf(decoded.trim().split(' ').pop() ?? '');
return contextStart >= 0 ? contextStart : 0;
}
async preprocess(input) {
const qaInput = Array.isArray(input) ? input[0] : input;
const encoded = this.tokenizer.encode(qaInput.question, {
textPair: qaInput.context,
addSpecialTokens: true,
maxLength: 512,
truncation: true,
returnAttentionMask: true,
returnTokenTypeIds: true,
});
return [
new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map(id => BigInt(id))), [1, encoded.inputIds.length], 'int64'),
new EdgeFlowTensor(BigInt64Array.from(encoded.attentionMask.map(m => BigInt(m))), [1, encoded.attentionMask.length], 'int64'),
];
}
async postprocess(outputs, _options) {
if (outputs.length < 2) {
return { answer: '', score: 0, start: 0, end: 0 };
}
const startLogits = outputs[0].toFloat32Array();
const endLogits = outputs[1].toFloat32Array();
const seqLen = startLogits.length;
const startProbs = softmax(new EdgeFlowTensor(startLogits, [seqLen], 'float32')).toFloat32Array();
const endProbs = softmax(new EdgeFlowTensor(endLogits, [seqLen], 'float32')).toFloat32Array();
let bestStart = 0;
let bestEnd = 0;
let bestScore = 0;
for (let start = 0; start < seqLen; start++) {
for (let end = start; end < Math.min(start + 30, seqLen); end++) {
const score = (startProbs[start] ?? 0) * (endProbs[end] ?? 0);
if (score > bestScore) {
bestScore = score;
bestStart = start;
bestEnd = end;
}
}
}
return {
answer: '',
score: bestScore,
start: bestStart,
end: bestEnd,
};
}
}
// ============================================================================
// Factory
// ============================================================================
export function createQuestionAnsweringPipeline(config) {
return new QuestionAnsweringPipeline(config);
}
registerPipeline('question-answering', (config) => new QuestionAnsweringPipeline(config));
//# sourceMappingURL=question-answering.js.map
================================================
FILE: dist/pipelines/text-classification.d.ts
================================================
/**
* edgeFlow.js - Text Classification Pipeline
*
* High-level API for text classification tasks including
* sentiment analysis, topic classification, etc.
*/
import { PipelineConfig, PipelineOptions } from '../core/types.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { BasePipeline, TextClassificationResult } from './base.js';
export interface TextClassificationOptions extends PipelineOptions {
returnAllScores?: boolean;
labels?: string[];
topK?: number;
}
export declare class TextClassificationPipeline extends BasePipeline {
private tokenizer;
private onnxModel;
private labels;
private modelUrl;
private tokenizerUrl;
constructor(config: PipelineConfig, labels?: string[]);
initialize(): Promise;
setLabels(labels: string[]): void;
run(input: string | string[], options?: TextClassificationOptions): Promise;
protected preprocess(input: string | string[]): Promise;
private runInference;
protected postprocess(outputs: EdgeFlowTensor[], options?: TextClassificationOptions): Promise;
}
export declare class SentimentAnalysisPipeline extends TextClassificationPipeline {
constructor(config: PipelineConfig);
analyze(text: string | string[], options?: TextClassificationOptions): Promise;
}
export declare function createTextClassificationPipeline(config?: Partial): TextClassificationPipeline;
export declare function createSentimentAnalysisPipeline(config?: Partial): SentimentAnalysisPipeline;
//# sourceMappingURL=text-classification.d.ts.map
================================================
FILE: dist/pipelines/text-classification.js
================================================
/**
* edgeFlow.js - Text Classification Pipeline
*
* High-level API for text classification tasks including
* sentiment analysis, topic classification, etc.
*/
import { EdgeFlowTensor, softmax } from '../core/tensor.js';
import { Tokenizer } from '../utils/tokenizer.js';
import { loadModelData } from '../utils/model-loader.js';
import { loadModelFromBuffer, runInferenceNamed } from '../core/runtime.js';
import { BasePipeline, registerPipeline, SENTIMENT_LABELS, } from './base.js';
// ============================================================================
// Default Model (DistilBERT fine-tuned on SST-2)
// ============================================================================
const DEFAULT_MODELS = {
model: 'https://huggingface.co/Xenova/distilbert-base-uncased-finetuned-sst-2-english/resolve/main/onnx/model_quantized.onnx',
tokenizer: 'https://huggingface.co/Xenova/distilbert-base-uncased-finetuned-sst-2-english/resolve/main/tokenizer.json',
};
const DEFAULT_SST2_LABELS = ['NEGATIVE', 'POSITIVE'];
export class TextClassificationPipeline extends BasePipeline {
tokenizer = null;
onnxModel = null;
labels;
modelUrl;
tokenizerUrl;
constructor(config, labels) {
super(config);
this.labels = labels ?? DEFAULT_SST2_LABELS;
this.modelUrl = config.model !== 'default' ? config.model : DEFAULT_MODELS.model;
this.tokenizerUrl = DEFAULT_MODELS.tokenizer;
}
async initialize() {
await super.initialize();
if (!this.tokenizer) {
this.tokenizer = await Tokenizer.fromUrl(this.tokenizerUrl);
}
if (!this.onnxModel) {
const modelData = await loadModelData(this.modelUrl, { cache: this.config.cache ?? true });
this.onnxModel = await loadModelFromBuffer(modelData);
}
}
setLabels(labels) {
this.labels = labels;
}
async run(input, options) {
const isBatch = Array.isArray(input);
const inputs = isBatch ? input : [input];
await this.initialize();
const startTime = performance.now();
const results = [];
for (const text of inputs) {
const tensorInputs = await this.preprocess(text);
const outputs = await this.runInference(tensorInputs);
const result = await this.postprocess(outputs, options);
results.push(result);
}
const processingTime = performance.now() - startTime;
for (const result of results) {
result.processingTime = processingTime / results.length;
}
return isBatch ? results : results[0];
}
async preprocess(input) {
const text = Array.isArray(input) ? input[0] : input;
const encoded = this.tokenizer.encode(text, {
maxLength: 128,
padding: 'max_length',
truncation: true,
});
const inputIds = new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map(id => BigInt(id))), [1, encoded.inputIds.length], 'int64');
const attentionMask = new EdgeFlowTensor(BigInt64Array.from(encoded.attentionMask.map(m => BigInt(m))), [1, encoded.attentionMask.length], 'int64');
return [inputIds, attentionMask];
}
async runInference(inputs) {
const namedInputs = new Map();
namedInputs.set('input_ids', inputs[0]);
namedInputs.set('attention_mask', inputs[1]);
const outputs = await runInferenceNamed(this.onnxModel, namedInputs);
return outputs;
}
async postprocess(outputs, options) {
const logits = outputs[0];
if (!logits) {
return { label: 'unknown', score: 0 };
}
const probs = softmax(logits, -1);
const probsArray = probs.toFloat32Array();
let maxIdx = 0;
let maxScore = probsArray[0] ?? 0;
for (let i = 1; i < probsArray.length; i++) {
if ((probsArray[i] ?? 0) > maxScore) {
maxScore = probsArray[i] ?? 0;
maxIdx = i;
}
}
const label = options?.labels?.[maxIdx] ?? this.labels[maxIdx] ?? `class_${maxIdx}`;
return {
label,
score: maxScore,
};
}
}
// ============================================================================
// Sentiment Analysis Pipeline
// ============================================================================
export class SentimentAnalysisPipeline extends TextClassificationPipeline {
constructor(config) {
super(config, SENTIMENT_LABELS);
}
async analyze(text, options) {
return this.run(text, options);
}
}
// ============================================================================
// Factory Functions
// ============================================================================
export function createTextClassificationPipeline(config = {}) {
return new TextClassificationPipeline({
task: 'text-classification',
model: config.model ?? 'default',
runtime: config.runtime,
cache: config.cache ?? true,
quantization: config.quantization,
});
}
export function createSentimentAnalysisPipeline(config = {}) {
return new SentimentAnalysisPipeline({
task: 'sentiment-analysis',
model: config.model ?? 'default',
runtime: config.runtime,
cache: config.cache ?? true,
quantization: config.quantization,
});
}
registerPipeline('text-classification', (config) => new TextClassificationPipeline(config));
registerPipeline('sentiment-analysis', (config) => new SentimentAnalysisPipeline(config));
//# sourceMappingURL=text-classification.js.map
================================================
FILE: dist/pipelines/text-generation.d.ts
================================================
/**
* edgeFlow.js - Text Generation Pipeline
*
* Autoregressive text generation with streaming support.
* Supports GPT-2, LLaMA, Mistral, and other causal LM models.
* Includes chat/conversation support with message history.
*/
import { BasePipeline, PipelineResult } from './base.js';
import { Tokenizer } from '../utils/tokenizer.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { PipelineConfig, PipelineOptions } from '../core/types.js';
/**
* LLM model loading progress callback
*/
export interface LLMLoadProgress {
/** Stage: 'tokenizer' or 'model' */
stage: 'tokenizer' | 'model';
/** Bytes loaded */
loaded: number;
/** Total bytes */
total: number;
/** Progress percentage (0-100) */
progress: number;
}
/**
* Chat message
*/
export interface ChatMessage {
/** Role: 'system', 'user', or 'assistant' */
role: 'system' | 'user' | 'assistant';
/** Message content */
content: string;
}
/**
* Chat template type
*/
export type ChatTemplateType = 'chatml' | 'llama2' | 'llama3' | 'mistral' | 'phi3' | 'alpaca' | 'vicuna' | 'custom';
/**
* Text generation options
*/
export interface TextGenerationOptions {
/** Maximum number of new tokens to generate */
maxNewTokens?: number;
/** Maximum total length (prompt + generated) */
maxLength?: number;
/** Minimum number of new tokens to generate */
minNewTokens?: number;
/** Sampling temperature (higher = more random) */
temperature?: number;
/** Top-k sampling (0 = disabled) */
topK?: number;
/** Top-p (nucleus) sampling (1.0 = disabled) */
topP?: number;
/** Repetition penalty (1.0 = disabled) */
repetitionPenalty?: number;
/** Stop sequences */
stopSequences?: string[];
/** Whether to do sampling (false = greedy) */
doSample?: boolean;
/** Number of sequences to return */
numReturnSequences?: number;
/** Return full text (including prompt) */
returnFullText?: boolean;
/** Callback for each generated token */
onToken?: (token: string, tokenId: number) => void;
}
/**
* Chat generation options
*/
export interface ChatOptions extends TextGenerationOptions {
/** System prompt */
systemPrompt?: string;
/** Chat template type */
templateType?: ChatTemplateType;
/** Custom template (if templateType is 'custom') */
customTemplate?: {
systemPrefix?: string;
systemSuffix?: string;
userPrefix?: string;
userSuffix?: string;
assistantPrefix?: string;
assistantSuffix?: string;
separator?: string;
};
}
/**
* Text generation result
*/
export interface TextGenerationResult extends PipelineResult {
/** Generated text */
generatedText: string;
/** Full text (prompt + generated) if returnFullText is true */
fullText?: string;
/** Generated token IDs */
tokenIds: number[];
/** Number of tokens generated */
numTokens: number;
}
/**
* Streaming generation event
*/
export interface GenerationStreamEvent {
/** Current token */
token: string;
/** Token ID */
tokenId: number;
/** Generated text so far */
generatedText: string;
/** Whether generation is complete */
done: boolean;
}
/**
* TextGenerationPipeline - Autoregressive text generation
*
* @example
* ```typescript
* const generator = await pipeline('text-generation', 'Xenova/gpt2');
*
* // Simple generation
* const result = await generator.run('Once upon a time');
* console.log(result.generatedText);
*
* // Streaming generation
* for await (const event of generator.stream('Hello, ')) {
* process.stdout.write(event.token);
* }
* ```
*/
export declare class TextGenerationPipeline extends BasePipeline {
private tokenizer;
private eosTokenId;
private llmModel;
private modelsLoaded;
private modelUrl;
private tokenizerUrl;
constructor(config?: PipelineConfig);
/**
* Check if model is loaded
*/
get isModelLoaded(): boolean;
/**
* Set custom model URLs
*/
setModelUrls(model: string, tokenizer: string): void;
/**
* Load model and tokenizer with progress callback
*/
loadModel(onProgress?: (progress: LLMLoadProgress) => void): Promise;
/**
* Fetch model with progress tracking
*/
private fetchModelWithProgress;
/**
* Initialize pipeline (override to skip default model loading)
*/
initialize(): Promise;
/**
* Set tokenizer
*/
setTokenizer(tokenizer: Tokenizer): void;
/**
* Preprocess - not used for text generation (handled in generateSingle)
*/
protected preprocess(input: string | string[]): Promise;
/**
* Postprocess - not used for text generation (handled in generateSingle)
*/
protected postprocess(_outputs: EdgeFlowTensor[], _options?: PipelineOptions): Promise;
/**
* Generate text (non-streaming)
*/
run(prompt: string | string[], options?: PipelineOptions & TextGenerationOptions): Promise;
/**
* Generate text with streaming (async generator)
*/
stream(prompt: string, options?: TextGenerationOptions): AsyncGenerator;
/**
* Generate a single sequence (non-streaming)
*/
private generateSingle;
/**
* Generate next token using the model
*/
private generateNextToken;
/**
* Greedy decoding (argmax)
*/
private greedy;
/**
* Sample from probability distribution with top-k/top-p filtering
*/
private sample;
private conversationHistory;
private chatTemplateType;
/**
* Set the chat template type
*/
setChatTemplate(templateType: ChatTemplateType): void;
/**
* Apply chat template to messages
*/
applyChatTemplate(messages: ChatMessage[], options?: ChatOptions): string;
/**
* ChatML template (used by many models including Qwen, Yi)
*/
private applyChatMLTemplate;
/**
* Llama 2 template
*/
private applyLlama2Template;
/**
* Llama 3 template
*/
private applyLlama3Template;
/**
* Mistral template
*/
private applyMistralTemplate;
/**
* Phi-3 template
*/
private applyPhi3Template;
/**
* Alpaca template
*/
private applyAlpacaTemplate;
/**
* Vicuna template
*/
private applyVicunaTemplate;
/**
* Custom template
*/
private applyCustomTemplate;
/**
* Chat with the model
*
* @example
* ```typescript
* const generator = await pipeline('text-generation', 'model');
*
* // Single turn
* const response = await generator.chat('Hello, how are you?');
*
* // Multi-turn with history
* const response1 = await generator.chat('What is AI?');
* const response2 = await generator.chat('Can you give an example?');
*
* // With system prompt
* const response = await generator.chat('Hello', {
* systemPrompt: 'You are a helpful assistant.',
* });
* ```
*/
chat(userMessage: string, options?: ChatOptions): Promise;
/**
* Stream chat response
*/
chatStream(userMessage: string, options?: ChatOptions): AsyncGenerator;
/**
* Get conversation history
*/
getConversationHistory(): ChatMessage[];
/**
* Set conversation history
*/
setConversationHistory(messages: ChatMessage[]): void;
/**
* Clear conversation history
*/
clearConversation(): void;
/**
* Remove last exchange (user message + assistant response)
*/
undoLastExchange(): void;
}
/**
* Create text generation pipeline
*/
export declare function createTextGenerationPipeline(config?: PipelineConfig): TextGenerationPipeline;
//# sourceMappingURL=text-generation.d.ts.map
================================================
FILE: dist/pipelines/text-generation.js
================================================
/**
* edgeFlow.js - Text Generation Pipeline
*
* Autoregressive text generation with streaming support.
* Supports GPT-2, LLaMA, Mistral, and other causal LM models.
* Includes chat/conversation support with message history.
*/
import { BasePipeline } from './base.js';
import { Tokenizer } from '../utils/tokenizer.js';
import { EdgeFlowTensor, softmax } from '../core/tensor.js';
import { runInferenceNamed, loadModelFromBuffer } from '../core/runtime.js';
// ============================================================================
// Default Model URLs (TinyLlama - quantized for browser)
// ============================================================================
const DEFAULT_LLM_MODELS = {
model: 'https://huggingface.co/Xenova/TinyLlama-1.1B-Chat-v1.0/resolve/main/onnx/model_q4f16.onnx',
tokenizer: 'https://huggingface.co/Xenova/TinyLlama-1.1B-Chat-v1.0/resolve/main/tokenizer.json',
};
// ============================================================================
// Text Generation Pipeline
// ============================================================================
/**
* TextGenerationPipeline - Autoregressive text generation
*
* @example
* ```typescript
* const generator = await pipeline('text-generation', 'Xenova/gpt2');
*
* // Simple generation
* const result = await generator.run('Once upon a time');
* console.log(result.generatedText);
*
* // Streaming generation
* for await (const event of generator.stream('Hello, ')) {
* process.stdout.write(event.token);
* }
* ```
*/
export class TextGenerationPipeline extends BasePipeline {
tokenizer = null;
eosTokenId = 50256; // GPT-2 default
llmModel = null;
modelsLoaded = false;
// Custom model URLs
modelUrl;
tokenizerUrl;
constructor(config) {
super(config ?? {
task: 'text-generation',
model: 'default',
});
this.modelUrl = DEFAULT_LLM_MODELS.model;
this.tokenizerUrl = DEFAULT_LLM_MODELS.tokenizer;
}
/**
* Check if model is loaded
*/
get isModelLoaded() {
return this.modelsLoaded;
}
/**
* Set custom model URLs
*/
setModelUrls(model, tokenizer) {
this.modelUrl = model;
this.tokenizerUrl = tokenizer;
}
/**
* Load model and tokenizer with progress callback
*/
async loadModel(onProgress) {
if (this.modelsLoaded)
return;
// Load tokenizer first (small, fast)
onProgress?.({ stage: 'tokenizer', loaded: 0, total: 100, progress: 0 });
try {
const tokenizerResponse = await fetch(this.tokenizerUrl);
if (!tokenizerResponse.ok) {
throw new Error(`Failed to fetch tokenizer: ${tokenizerResponse.status}`);
}
const tokenizerJson = await tokenizerResponse.json();
this.tokenizer = await Tokenizer.fromJSON(tokenizerJson);
const specialIds = this.tokenizer.getSpecialTokenIds();
this.eosTokenId = specialIds.eosTokenId ?? specialIds.sepTokenId ?? 2; // TinyLlama uses 2 as EOS
onProgress?.({ stage: 'tokenizer', loaded: 100, total: 100, progress: 100 });
}
catch (error) {
throw new Error(`Failed to load tokenizer: ${error}`);
}
// Load model with progress tracking
onProgress?.({ stage: 'model', loaded: 0, total: 100, progress: 0 });
const modelData = await this.fetchModelWithProgress(this.modelUrl, (loaded, total) => {
onProgress?.({
stage: 'model',
loaded,
total,
progress: Math.round((loaded / total) * 100),
});
});
this.llmModel = await loadModelFromBuffer(modelData, {
runtime: 'wasm', // Uses ONNXRuntime which auto-detects WebGPU internally
});
this.model = this.llmModel;
this.modelsLoaded = true;
}
/**
* Fetch model with progress tracking
*/
async fetchModelWithProgress(url, onProgress) {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to fetch model: ${response.status} ${response.statusText}`);
}
const contentLength = response.headers.get('content-length');
const total = contentLength ? parseInt(contentLength, 10) : 0;
if (!response.body) {
// Fallback if no streaming support
const buffer = await response.arrayBuffer();
onProgress(buffer.byteLength, buffer.byteLength);
return buffer;
}
const reader = response.body.getReader();
const chunks = [];
let loaded = 0;
while (true) {
const { done, value } = await reader.read();
if (done)
break;
chunks.push(value);
loaded += value.length;
onProgress(loaded, total || loaded);
}
// Combine chunks into ArrayBuffer
const buffer = new Uint8Array(loaded);
let offset = 0;
for (const chunk of chunks) {
buffer.set(chunk, offset);
offset += chunk.length;
}
return buffer.buffer;
}
/**
* Initialize pipeline (override to skip default model loading)
*/
async initialize() {
if (this.isReady)
return;
// Don't call super.initialize() - we handle model loading separately
this.isReady = true;
}
/**
* Set tokenizer
*/
setTokenizer(tokenizer) {
this.tokenizer = tokenizer;
const specialIds = tokenizer.getSpecialTokenIds();
this.eosTokenId = specialIds.eosTokenId ?? specialIds.sepTokenId ?? 50256;
}
/**
* Preprocess - not used for text generation (handled in generateSingle)
*/
async preprocess(input) {
// For text generation, preprocessing is handled in generateNextToken
const text = Array.isArray(input) ? input[0] ?? '' : input;
if (!this.tokenizer) {
// Return dummy tensor if no tokenizer
return [new EdgeFlowTensor(new Float32Array([0]), [1], 'float32')];
}
const encoded = this.tokenizer.encode(text, {
addSpecialTokens: false,
padding: 'do_not_pad',
});
return [new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map(id => BigInt(id))), [1, encoded.inputIds.length], 'int64')];
}
/**
* Postprocess - not used for text generation (handled in generateSingle)
*/
async postprocess(_outputs, _options) {
// For text generation, postprocessing is handled in generateSingle
return {
generatedText: '',
tokenIds: [],
numTokens: 0,
processingTime: 0,
};
}
/**
* Generate text (non-streaming)
*/
async run(prompt, options) {
await this.initialize();
const prompts = Array.isArray(prompt) ? prompt : [prompt];
const results = await Promise.all(prompts.map(p => this.generateSingle(p, options ?? {})));
return Array.isArray(prompt) ? results : results[0];
}
/**
* Generate text with streaming (async generator)
*/
async *stream(prompt, options = {}) {
const startTime = performance.now();
if (!this.tokenizer) {
throw new Error('Tokenizer not set. Call setTokenizer() first.');
}
const { maxNewTokens = 50, maxLength = 512, temperature = 1.0, topK = 0, topP = 1.0, repetitionPenalty = 1.0, stopSequences = [], doSample = true, } = options;
// Encode prompt
const encoded = this.tokenizer.encode(prompt, {
addSpecialTokens: false,
padding: 'do_not_pad',
truncation: false,
});
let inputIds = [...encoded.inputIds];
const generatedIds = [];
let generatedText = '';
// Generation loop
for (let i = 0; i < maxNewTokens; i++) {
// Check max length
if (inputIds.length >= maxLength)
break;
// Run model forward pass
const nextTokenId = await this.generateNextToken(inputIds, temperature, topK, topP, repetitionPenalty, doSample);
// Check for EOS
if (nextTokenId === this.eosTokenId) {
yield {
token: '',
tokenId: nextTokenId,
generatedText,
done: true,
};
break;
}
// Decode token
const token = this.tokenizer.decode([nextTokenId], true);
generatedIds.push(nextTokenId);
inputIds.push(nextTokenId);
generatedText += token;
// Call token callback
if (options.onToken) {
options.onToken(token, nextTokenId);
}
// Check stop sequences
let shouldStop = false;
for (const stopSeq of stopSequences) {
if (generatedText.endsWith(stopSeq)) {
generatedText = generatedText.slice(0, -stopSeq.length);
shouldStop = true;
break;
}
}
yield {
token,
tokenId: nextTokenId,
generatedText,
done: shouldStop,
};
if (shouldStop)
break;
}
// Final event
const endTime = performance.now();
console.log(`Generation completed in ${(endTime - startTime).toFixed(2)}ms`);
}
/**
* Generate a single sequence (non-streaming)
*/
async generateSingle(prompt, options) {
const startTime = performance.now();
if (!this.tokenizer) {
throw new Error('Tokenizer not set. Call setTokenizer() first.');
}
const { maxNewTokens = 50, maxLength = 512, temperature = 1.0, topK = 0, topP = 1.0, repetitionPenalty = 1.0, stopSequences = [], doSample = true, returnFullText = false, } = options;
// Encode prompt
const encoded = this.tokenizer.encode(prompt, {
addSpecialTokens: false,
padding: 'do_not_pad',
truncation: false,
});
let inputIds = [...encoded.inputIds];
const generatedIds = [];
// Generation loop
for (let i = 0; i < maxNewTokens; i++) {
// Check max length
if (inputIds.length >= maxLength)
break;
// Run model forward pass
const nextTokenId = await this.generateNextToken(inputIds, temperature, topK, topP, repetitionPenalty, doSample);
// Check for EOS
if (nextTokenId === this.eosTokenId)
break;
// Add to sequence
generatedIds.push(nextTokenId);
inputIds.push(nextTokenId);
// Call token callback
if (options.onToken) {
const token = this.tokenizer.decode([nextTokenId], true);
options.onToken(token, nextTokenId);
}
// Check stop sequences
const currentText = this.tokenizer.decode(generatedIds, true);
let shouldStop = false;
for (const stopSeq of stopSequences) {
if (currentText.endsWith(stopSeq)) {
shouldStop = true;
break;
}
}
if (shouldStop)
break;
}
// Decode generated text
const generatedText = this.tokenizer.decode(generatedIds, true);
const endTime = performance.now();
return {
generatedText,
fullText: returnFullText ? prompt + generatedText : undefined,
tokenIds: generatedIds,
numTokens: generatedIds.length,
processingTime: endTime - startTime,
};
}
/**
* Generate next token using the model
*/
async generateNextToken(inputIds, temperature, topK, topP, repetitionPenalty, doSample) {
if (!this.model) {
throw new Error('Model not loaded');
}
const seqLen = inputIds.length;
// Prepare named inputs
const inputs = new Map();
// input_ids: [1, seq_len]
inputs.set('input_ids', new EdgeFlowTensor(BigInt64Array.from(inputIds.map(id => BigInt(id))), [1, seqLen], 'int64'));
// attention_mask: [1, seq_len]
inputs.set('attention_mask', new EdgeFlowTensor(BigInt64Array.from(inputIds.map(() => BigInt(1))), [1, seqLen], 'int64'));
// position_ids: [1, seq_len] - sequential positions from 0 to seq_len-1
inputs.set('position_ids', new EdgeFlowTensor(BigInt64Array.from(Array.from({ length: seqLen }, (_, i) => BigInt(i))), [1, seqLen], 'int64'));
// TinyLlama has 22 layers with GQA (4 KV heads, head_dim=64)
// For first inference without cache, provide empty past_key_values
const numLayers = 22;
const numKVHeads = 4;
const headDim = 64;
for (let i = 0; i < numLayers; i++) {
// past_key_values.{i}.key: [batch, num_kv_heads, 0, head_dim]
inputs.set(`past_key_values.${i}.key`, new EdgeFlowTensor(new Float32Array(0), [1, numKVHeads, 0, headDim], 'float32'));
// past_key_values.{i}.value: [batch, num_kv_heads, 0, head_dim]
inputs.set(`past_key_values.${i}.value`, new EdgeFlowTensor(new Float32Array(0), [1, numKVHeads, 0, headDim], 'float32'));
}
// Run inference with named inputs
const outputs = await runInferenceNamed(this.model, inputs);
if (!outputs || outputs.length === 0) {
throw new Error('Model returned no outputs');
}
// Get logits for last token
const logits = outputs[0];
const logitsData = logits.toFloat32Array();
const vocabSize = logits.shape[logits.shape.length - 1] ?? 50257;
// Get logits for the last position
const lastPositionLogits = new Float32Array(vocabSize);
const offset = (inputIds.length - 1) * vocabSize;
for (let i = 0; i < vocabSize; i++) {
lastPositionLogits[i] = logitsData[offset + i] ?? 0;
}
// Apply repetition penalty
if (repetitionPenalty !== 1.0) {
for (const prevId of inputIds) {
if (prevId < vocabSize) {
const score = lastPositionLogits[prevId] ?? 0;
lastPositionLogits[prevId] = score > 0
? score / repetitionPenalty
: score * repetitionPenalty;
}
}
}
// Apply temperature
if (temperature !== 1.0) {
for (let i = 0; i < vocabSize; i++) {
lastPositionLogits[i] = (lastPositionLogits[i] ?? 0) / temperature;
}
}
// Convert to probabilities
const logitsTensor = new EdgeFlowTensor(lastPositionLogits, [vocabSize], 'float32');
const probs = softmax(logitsTensor).toFloat32Array();
// Sample or greedy
if (doSample) {
return this.sample(probs, topK, topP);
}
else {
return this.greedy(probs);
}
}
/**
* Greedy decoding (argmax)
*/
greedy(probs) {
let maxIdx = 0;
let maxProb = probs[0] ?? 0;
for (let i = 1; i < probs.length; i++) {
if ((probs[i] ?? 0) > maxProb) {
maxProb = probs[i] ?? 0;
maxIdx = i;
}
}
return maxIdx;
}
/**
* Sample from probability distribution with top-k/top-p filtering
*/
sample(probs, topK, topP) {
// Create sorted indices
const indices = Array.from({ length: probs.length }, (_, i) => i);
indices.sort((a, b) => (probs[b] ?? 0) - (probs[a] ?? 0));
// Apply top-k filtering
let candidateIndices = indices;
if (topK > 0 && topK < probs.length) {
candidateIndices = indices.slice(0, topK);
}
// Apply top-p (nucleus) filtering
if (topP < 1.0) {
let cumulativeProb = 0;
const filtered = [];
for (const idx of candidateIndices) {
filtered.push(idx);
cumulativeProb += probs[idx] ?? 0;
if (cumulativeProb >= topP)
break;
}
candidateIndices = filtered;
}
// Renormalize probabilities
let totalProb = 0;
for (const idx of candidateIndices) {
totalProb += probs[idx] ?? 0;
}
// Sample
const r = Math.random() * totalProb;
let cumulative = 0;
for (const idx of candidateIndices) {
cumulative += probs[idx] ?? 0;
if (cumulative >= r) {
return idx;
}
}
// Fallback
return candidateIndices[0] ?? 0;
}
// ==========================================================================
// Chat / Conversation Support
// ==========================================================================
conversationHistory = [];
chatTemplateType = 'chatml';
/**
* Set the chat template type
*/
setChatTemplate(templateType) {
this.chatTemplateType = templateType;
}
/**
* Apply chat template to messages
*/
applyChatTemplate(messages, options) {
const templateType = options?.templateType ?? this.chatTemplateType;
switch (templateType) {
case 'chatml':
return this.applyChatMLTemplate(messages);
case 'llama2':
return this.applyLlama2Template(messages);
case 'llama3':
return this.applyLlama3Template(messages);
case 'mistral':
return this.applyMistralTemplate(messages);
case 'phi3':
return this.applyPhi3Template(messages);
case 'alpaca':
return this.applyAlpacaTemplate(messages);
case 'vicuna':
return this.applyVicunaTemplate(messages);
case 'custom':
return this.applyCustomTemplate(messages, options?.customTemplate ?? {});
default:
return this.applyChatMLTemplate(messages);
}
}
/**
* ChatML template (used by many models including Qwen, Yi)
*/
applyChatMLTemplate(messages) {
let prompt = '';
for (const msg of messages) {
prompt += `<|im_start|>${msg.role}\n${msg.content}<|im_end|>\n`;
}
prompt += '<|im_start|>assistant\n';
return prompt;
}
/**
* Llama 2 template
*/
applyLlama2Template(messages) {
let prompt = '';
let systemMsg = '';
for (const msg of messages) {
if (msg.role === 'system') {
systemMsg = msg.content;
}
else if (msg.role === 'user') {
if (systemMsg) {
prompt += `[INST] <>\n${systemMsg}\n< >\n\n${msg.content} [/INST]`;
systemMsg = '';
}
else {
prompt += `[INST] ${msg.content} [/INST]`;
}
}
else if (msg.role === 'assistant') {
prompt += ` ${msg.content} `;
}
}
return prompt;
}
/**
* Llama 3 template
*/
applyLlama3Template(messages) {
let prompt = '<|begin_of_text|>';
for (const msg of messages) {
prompt += `<|start_header_id|>${msg.role}<|end_header_id|>\n\n${msg.content}<|eot_id|>`;
}
prompt += '<|start_header_id|>assistant<|end_header_id|>\n\n';
return prompt;
}
/**
* Mistral template
*/
applyMistralTemplate(messages) {
let prompt = '';
for (const msg of messages) {
if (msg.role === 'user') {
prompt += `[INST] ${msg.content} [/INST]`;
}
else if (msg.role === 'assistant') {
prompt += ` ${msg.content} `;
}
else if (msg.role === 'system') {
prompt += `[INST] ${msg.content}\n`;
}
}
return prompt;
}
/**
* Phi-3 template
*/
applyPhi3Template(messages) {
let prompt = '';
for (const msg of messages) {
prompt += `<|${msg.role}|>\n${msg.content}<|end|>\n`;
}
prompt += '<|assistant|>\n';
return prompt;
}
/**
* Alpaca template
*/
applyAlpacaTemplate(messages) {
let prompt = '';
let instruction = '';
let input = '';
for (const msg of messages) {
if (msg.role === 'system') {
instruction = msg.content;
}
else if (msg.role === 'user') {
input = msg.content;
}
}
if (instruction) {
prompt = `### Instruction:\n${instruction}\n\n`;
}
if (input) {
prompt += `### Input:\n${input}\n\n`;
}
prompt += '### Response:\n';
return prompt;
}
/**
* Vicuna template
*/
applyVicunaTemplate(messages) {
let prompt = '';
for (const msg of messages) {
if (msg.role === 'system') {
prompt += `${msg.content}\n\n`;
}
else if (msg.role === 'user') {
prompt += `USER: ${msg.content}\n`;
}
else if (msg.role === 'assistant') {
prompt += `ASSISTANT: ${msg.content}\n`;
}
}
prompt += 'ASSISTANT:';
return prompt;
}
/**
* Custom template
*/
applyCustomTemplate(messages, template) {
const { systemPrefix = '', systemSuffix = '\n', userPrefix = 'User: ', userSuffix = '\n', assistantPrefix = 'Assistant: ', assistantSuffix = '\n', separator = '', } = template;
let prompt = '';
for (let i = 0; i < messages.length; i++) {
const msg = messages[i];
if (i > 0)
prompt += separator;
switch (msg.role) {
case 'system':
prompt += `${systemPrefix}${msg.content}${systemSuffix}`;
break;
case 'user':
prompt += `${userPrefix}${msg.content}${userSuffix}`;
break;
case 'assistant':
prompt += `${assistantPrefix}${msg.content}${assistantSuffix}`;
break;
}
}
prompt += assistantPrefix;
return prompt;
}
/**
* Chat with the model
*
* @example
* ```typescript
* const generator = await pipeline('text-generation', 'model');
*
* // Single turn
* const response = await generator.chat('Hello, how are you?');
*
* // Multi-turn with history
* const response1 = await generator.chat('What is AI?');
* const response2 = await generator.chat('Can you give an example?');
*
* // With system prompt
* const response = await generator.chat('Hello', {
* systemPrompt: 'You are a helpful assistant.',
* });
* ```
*/
async chat(userMessage, options) {
// Add system message if provided and not already present
if (options?.systemPrompt &&
(this.conversationHistory.length === 0 || this.conversationHistory[0]?.role !== 'system')) {
this.conversationHistory.unshift({
role: 'system',
content: options.systemPrompt,
});
}
// Add user message
this.conversationHistory.push({
role: 'user',
content: userMessage,
});
// Apply chat template
const prompt = this.applyChatTemplate(this.conversationHistory, options);
// Generate response
const result = await this.run(prompt, {
...options,
stopSequences: [
...(options?.stopSequences ?? []),
'<|im_end|>',
'<|end|>',
'<|eot_id|>',
' ',
'\n\nUser:',
'\n\nHuman:',
],
});
// Add assistant response to history
const response = Array.isArray(result) ? result[0] : result;
this.conversationHistory.push({
role: 'assistant',
content: response.generatedText.trim(),
});
return response;
}
/**
* Stream chat response
*/
async *chatStream(userMessage, options) {
// Add system message if provided
if (options?.systemPrompt &&
(this.conversationHistory.length === 0 || this.conversationHistory[0]?.role !== 'system')) {
this.conversationHistory.unshift({
role: 'system',
content: options.systemPrompt,
});
}
// Add user message
this.conversationHistory.push({
role: 'user',
content: userMessage,
});
// Apply chat template
const prompt = this.applyChatTemplate(this.conversationHistory, options);
// Stream response
let fullResponse = '';
for await (const event of this.stream(prompt, {
...options,
stopSequences: [
...(options?.stopSequences ?? []),
'<|im_end|>',
'<|end|>',
'<|eot_id|>',
'',
],
})) {
fullResponse = event.generatedText;
yield event;
}
// Add assistant response to history
this.conversationHistory.push({
role: 'assistant',
content: fullResponse.trim(),
});
}
/**
* Get conversation history
*/
getConversationHistory() {
return [...this.conversationHistory];
}
/**
* Set conversation history
*/
setConversationHistory(messages) {
this.conversationHistory = [...messages];
}
/**
* Clear conversation history
*/
clearConversation() {
this.conversationHistory = [];
}
/**
* Remove last exchange (user message + assistant response)
*/
undoLastExchange() {
// Remove assistant message
if (this.conversationHistory.length > 0 &&
this.conversationHistory[this.conversationHistory.length - 1]?.role === 'assistant') {
this.conversationHistory.pop();
}
// Remove user message
if (this.conversationHistory.length > 0 &&
this.conversationHistory[this.conversationHistory.length - 1]?.role === 'user') {
this.conversationHistory.pop();
}
}
}
// ============================================================================
// Factory Functions
// ============================================================================
/**
* Create text generation pipeline
*/
export function createTextGenerationPipeline(config) {
return new TextGenerationPipeline(config);
}
//# sourceMappingURL=text-generation.js.map
================================================
FILE: dist/pipelines/zero-shot-classification.d.ts
================================================
/**
* edgeFlow.js - Zero-shot Classification Pipeline
*
* Classify text into any set of labels without fine-tuning,
* using a real NLI (Natural Language Inference) model.
*/
import { BasePipeline, PipelineResult } from './base.js';
import { EdgeFlowTensor } from '../core/tensor.js';
import { PipelineConfig, PipelineOptions } from '../core/types.js';
import { Tokenizer } from '../utils/tokenizer.js';
export interface ZeroShotClassificationOptions extends PipelineOptions {
multiLabel?: boolean;
hypothesisTemplate?: string;
}
export interface ZeroShotClassificationResult extends PipelineResult {
sequence: string;
labels: string[];
scores: number[];
}
export interface ZeroShotInput {
text: string | string[];
candidateLabels: string[];
}
export declare class ZeroShotClassificationPipeline extends BasePipeline {
private tokenizer;
private onnxModel;
private hypothesisTemplate;
private modelUrl;
private tokenizerUrl;
constructor(config?: PipelineConfig);
initialize(): Promise;
setTokenizer(tokenizer: Tokenizer): void;
classify(text: string | string[], candidateLabels: string[], options?: ZeroShotClassificationOptions): Promise;
run(input: ZeroShotInput, options?: PipelineOptions): Promise;
private classifySingle;
/**
* Score a single hypothesis using the real NLI ONNX model.
* Returns the entailment logit.
*/
private scoreHypothesis;
protected preprocess(input: ZeroShotInput): Promise;
protected postprocess(_outputs: EdgeFlowTensor[], _options?: PipelineOptions): Promise;
}
export declare function createZeroShotClassificationPipeline(config?: PipelineConfig): ZeroShotClassificationPipeline;
//# sourceMappingURL=zero-shot-classification.d.ts.map
================================================
FILE: dist/pipelines/zero-shot-classification.js
================================================
/**
* edgeFlow.js - Zero-shot Classification Pipeline
*
* Classify text into any set of labels without fine-tuning,
* using a real NLI (Natural Language Inference) model.
*/
import { BasePipeline, registerPipeline } from './base.js';
import { EdgeFlowTensor, softmax } from '../core/tensor.js';
import { Tokenizer } from '../utils/tokenizer.js';
import { loadModelData } from '../utils/model-loader.js';
import { loadModelFromBuffer, runInferenceNamed } from '../core/runtime.js';
// ============================================================================
// Default Model (DistilBART fine-tuned on MNLI)
// ============================================================================
const DEFAULT_MODELS = {
model: 'https://huggingface.co/Xenova/nli-deberta-v3-small/resolve/main/onnx/model_quantized.onnx',
tokenizer: 'https://huggingface.co/Xenova/nli-deberta-v3-small/resolve/main/tokenizer.json',
};
// NLI output indices: [contradiction, neutral, entailment]
const ENTAILMENT_IDX = 2;
// ============================================================================
// Zero-shot Classification Pipeline
// ============================================================================
export class ZeroShotClassificationPipeline extends BasePipeline {
tokenizer = null;
onnxModel = null;
hypothesisTemplate = 'This text is about {label}.';
modelUrl;
tokenizerUrl;
constructor(config) {
super(config ?? {
task: 'zero-shot-classification',
model: 'default',
});
this.modelUrl = (config?.model && config.model !== 'default') ? config.model : DEFAULT_MODELS.model;
this.tokenizerUrl = DEFAULT_MODELS.tokenizer;
}
async initialize() {
await super.initialize();
if (!this.tokenizer) {
this.tokenizer = await Tokenizer.fromUrl(this.tokenizerUrl);
}
if (!this.onnxModel) {
const modelData = await loadModelData(this.modelUrl, { cache: this.config.cache ?? true });
this.onnxModel = await loadModelFromBuffer(modelData);
}
}
setTokenizer(tokenizer) {
this.tokenizer = tokenizer;
}
async classify(text, candidateLabels, options) {
return this.run({ text, candidateLabels }, options);
}
async run(input, options) {
await this.initialize();
const { text, candidateLabels } = input;
const opts = options ?? {};
const texts = Array.isArray(text) ? text : [text];
const template = opts.hypothesisTemplate ?? this.hypothesisTemplate;
const multiLabel = opts.multiLabel ?? false;
const results = await Promise.all(texts.map(t => this.classifySingle(t, candidateLabels, template, multiLabel)));
return Array.isArray(text) ? results : results[0];
}
async classifySingle(text, candidateLabels, template, multiLabel) {
const startTime = performance.now();
const hypotheses = candidateLabels.map(label => template.replace('{label}', label));
const scores = [];
for (const hypothesis of hypotheses) {
const score = await this.scoreHypothesis(text, hypothesis);
scores.push(score);
}
let normalizedScores;
if (multiLabel) {
normalizedScores = scores.map(s => 1 / (1 + Math.exp(-s)));
}
else {
const tensor = new EdgeFlowTensor(new Float32Array(scores), [scores.length], 'float32');
normalizedScores = Array.from(softmax(tensor).toFloat32Array());
}
const indexed = candidateLabels.map((label, i) => ({
label,
score: normalizedScores[i] ?? 0,
}));
indexed.sort((a, b) => b.score - a.score);
return {
sequence: text,
labels: indexed.map(i => i.label),
scores: indexed.map(i => i.score),
processingTime: performance.now() - startTime,
};
}
/**
* Score a single hypothesis using the real NLI ONNX model.
* Returns the entailment logit.
*/
async scoreHypothesis(premise, hypothesis) {
const encoded = this.tokenizer.encode(premise, {
textPair: hypothesis,
addSpecialTokens: true,
maxLength: 512,
truncation: true,
returnAttentionMask: true,
});
const inputIds = new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map(id => BigInt(id))), [1, encoded.inputIds.length], 'int64');
const attentionMask = new EdgeFlowTensor(BigInt64Array.from(encoded.attentionMask.map(m => BigInt(m))), [1, encoded.attentionMask.length], 'int64');
const namedInputs = new Map();
namedInputs.set('input_ids', inputIds);
namedInputs.set('attention_mask', attentionMask);
const outputs = await runInferenceNamed(this.onnxModel, namedInputs);
const logits = outputs[0].toFloat32Array();
// Return entailment logit (index 2 in [contradiction, neutral, entailment])
return logits[ENTAILMENT_IDX] ?? 0;
}
async preprocess(input) {
const { text, candidateLabels } = input;
const firstText = Array.isArray(text) ? text[0] ?? '' : text;
const firstLabel = candidateLabels[0] ?? '';
const encoded = this.tokenizer.encode(firstText, {
textPair: this.hypothesisTemplate.replace('{label}', firstLabel),
addSpecialTokens: true,
maxLength: 512,
});
return [new EdgeFlowTensor(BigInt64Array.from(encoded.inputIds.map(id => BigInt(id))), [1, encoded.inputIds.length], 'int64')];
}
async postprocess(_outputs, _options) {
return {
sequence: '',
labels: [],
scores: [],
};
}
}
// ============================================================================
// Factory
// ============================================================================
export function createZeroShotClassificationPipeline(config) {
return new ZeroShotClassificationPipeline(config);
}
registerPipeline('zero-shot-classification', (config) => new ZeroShotClassificationPipeline(config));
//# sourceMappingURL=zero-shot-classification.js.map
================================================
FILE: dist/tools/benchmark.d.ts
================================================
/**
* edgeFlow.js - Benchmark Utilities
*
* Performance testing and comparison tools.
*/
export interface BenchmarkOptions {
/** Number of warmup runs (default: 3) */
warmupRuns?: number;
/** Number of measured runs (default: 10) */
runs?: number;
/** Whether to log progress (default: true) */
verbose?: boolean;
/** Timeout per run in ms (default: 30000) */
timeout?: number;
/** Name for this benchmark */
name?: string;
}
export interface BenchmarkResult {
name: string;
/** Average time in ms */
avgTime: number;
/** Median time in ms */
medianTime: number;
/** Minimum time in ms */
minTime: number;
/** Maximum time in ms */
maxTime: number;
/** Standard deviation in ms */
stdDev: number;
/** 95th percentile in ms */
p95: number;
/** 99th percentile in ms */
p99: number;
/** Throughput (ops/sec) */
throughput: number;
/** All individual run times */
times: number[];
/** Number of runs */
totalRuns: number;
/** Number of failed runs */
failedRuns: number;
}
export interface CompareBenchmarkResult {
baseline: BenchmarkResult;
comparison: BenchmarkResult;
speedup: number;
percentFaster: number;
winner: 'baseline' | 'comparison' | 'tie';
}
/**
* Run a benchmark on an async function
*/
export declare function benchmark(fn: () => Promise | unknown, options?: BenchmarkOptions): Promise;
/**
* Compare two benchmarks
*/
export declare function compareBenchmarks(baseline: () => Promise | unknown, comparison: () => Promise | unknown, options?: BenchmarkOptions): Promise;
/**
* Run multiple benchmarks in a suite
*/
export declare function benchmarkSuite(suite: Record Promise | unknown>, options?: BenchmarkOptions): Promise>;
/**
* Format benchmark result as a table string
*/
export declare function formatBenchmarkResult(result: BenchmarkResult): string;
/**
* Format comparison result
*/
export declare function formatComparisonResult(result: CompareBenchmarkResult): string;
export interface MemoryBenchmarkResult {
name: string;
peakMemory: number;
avgMemory: number;
memoryDelta: number;
}
/**
* Benchmark memory usage
*/
export declare function benchmarkMemory(fn: () => Promise | unknown, options?: {
name?: string;
runs?: number;
}): Promise;
declare const _default: {
benchmark: typeof benchmark;
compareBenchmarks: typeof compareBenchmarks;
benchmarkSuite: typeof benchmarkSuite;
benchmarkMemory: typeof benchmarkMemory;
formatBenchmarkResult: typeof formatBenchmarkResult;
formatComparisonResult: typeof formatComparisonResult;
};
export default _default;
//# sourceMappingURL=benchmark.d.ts.map
================================================
FILE: dist/tools/benchmark.js
================================================
/**
* edgeFlow.js - Benchmark Utilities
*
* Performance testing and comparison tools.
*/
// ============================================================================
// Benchmark Functions
// ============================================================================
/**
* Run a benchmark on an async function
*/
export async function benchmark(fn, options = {}) {
const { warmupRuns = 3, runs = 10, verbose = false, timeout = 30000, name = 'benchmark', } = options;
const times = [];
let failedRuns = 0;
// Warmup
if (verbose)
console.log(`[${name}] Running ${warmupRuns} warmup iterations...`);
for (let i = 0; i < warmupRuns; i++) {
try {
await Promise.race([
Promise.resolve(fn()),
new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), timeout)),
]);
}
catch {
// Warmup failures are ignored
}
}
// Measured runs
if (verbose)
console.log(`[${name}] Running ${runs} measured iterations...`);
for (let i = 0; i < runs; i++) {
try {
const start = performance.now();
await Promise.race([
Promise.resolve(fn()),
new Promise((_, reject) => setTimeout(() => reject(new Error('Timeout')), timeout)),
]);
const end = performance.now();
times.push(end - start);
if (verbose)
console.log(` Run ${i + 1}: ${(end - start).toFixed(2)}ms`);
}
catch (error) {
failedRuns++;
if (verbose)
console.log(` Run ${i + 1}: FAILED - ${error}`);
}
}
if (times.length === 0) {
throw new Error(`All ${runs} runs failed`);
}
// Calculate statistics
const sorted = [...times].sort((a, b) => a - b);
const sum = times.reduce((a, b) => a + b, 0);
const avg = sum / times.length;
const variance = times.reduce((sum, t) => sum + Math.pow(t - avg, 2), 0) / times.length;
const stdDev = Math.sqrt(variance);
const result = {
name,
avgTime: avg,
medianTime: sorted[Math.floor(sorted.length / 2)] ?? 0,
minTime: sorted[0] ?? 0,
maxTime: sorted[sorted.length - 1] ?? 0,
stdDev,
p95: sorted[Math.floor(sorted.length * 0.95)] ?? sorted[sorted.length - 1] ?? 0,
p99: sorted[Math.floor(sorted.length * 0.99)] ?? sorted[sorted.length - 1] ?? 0,
throughput: 1000 / avg,
times,
totalRuns: runs,
failedRuns,
};
if (verbose) {
console.log(`\n[${name}] Results:`);
console.log(` Avg: ${result.avgTime.toFixed(2)}ms`);
console.log(` Median: ${result.medianTime.toFixed(2)}ms`);
console.log(` Min: ${result.minTime.toFixed(2)}ms`);
console.log(` Max: ${result.maxTime.toFixed(2)}ms`);
console.log(` Std Dev: ${result.stdDev.toFixed(2)}ms`);
console.log(` P95: ${result.p95.toFixed(2)}ms`);
console.log(` Throughput: ${result.throughput.toFixed(2)} ops/sec`);
}
return result;
}
/**
* Compare two benchmarks
*/
export async function compareBenchmarks(baseline, comparison, options = {}) {
const baselineResult = await benchmark(baseline, {
...options,
name: options.name ? `${options.name} (baseline)` : 'baseline'
});
const comparisonResult = await benchmark(comparison, {
...options,
name: options.name ? `${options.name} (comparison)` : 'comparison'
});
const speedup = baselineResult.avgTime / comparisonResult.avgTime;
const percentFaster = ((baselineResult.avgTime - comparisonResult.avgTime) / baselineResult.avgTime) * 100;
let winner;
if (Math.abs(percentFaster) < 5) {
winner = 'tie';
}
else if (percentFaster > 0) {
winner = 'comparison';
}
else {
winner = 'baseline';
}
return {
baseline: baselineResult,
comparison: comparisonResult,
speedup,
percentFaster,
winner,
};
}
/**
* Run multiple benchmarks in a suite
*/
export async function benchmarkSuite(suite, options = {}) {
const results = {};
for (const [name, fn] of Object.entries(suite)) {
console.log(`\n=== ${name} ===`);
results[name] = await benchmark(fn, { ...options, name, verbose: true });
}
return results;
}
/**
* Format benchmark result as a table string
*/
export function formatBenchmarkResult(result) {
return `
┌─────────────────────────────────────────┐
│ ${result.name.padEnd(39)} │
├─────────────────────────────────────────┤
│ Avg Time: ${result.avgTime.toFixed(2).padStart(10)}ms │
│ Median: ${result.medianTime.toFixed(2).padStart(10)}ms │
│ Min Time: ${result.minTime.toFixed(2).padStart(10)}ms │
│ Max Time: ${result.maxTime.toFixed(2).padStart(10)}ms │
│ Std Dev: ${result.stdDev.toFixed(2).padStart(10)}ms │
│ P95: ${result.p95.toFixed(2).padStart(10)}ms │
│ P99: ${result.p99.toFixed(2).padStart(10)}ms │
│ Throughput: ${result.throughput.toFixed(2).padStart(10)} ops/sec │
│ Runs: ${result.totalRuns.toString().padStart(10)} (${result.failedRuns} failed) │
└─────────────────────────────────────────┘
`.trim();
}
/**
* Format comparison result
*/
export function formatComparisonResult(result) {
const arrow = result.percentFaster > 0 ? '↑' : result.percentFaster < 0 ? '↓' : '=';
const winnerText = result.winner === 'comparison'
? 'Comparison is faster!'
: result.winner === 'baseline'
? 'Baseline is faster!'
: 'Results are similar';
return `
┌─────────────────────────────────────────────────────┐
│ BENCHMARK COMPARISON │
├─────────────────────────────────────────────────────┤
│ Baseline: ${result.baseline.avgTime.toFixed(2).padStart(10)}ms │
│ Comparison: ${result.comparison.avgTime.toFixed(2).padStart(10)}ms │
├─────────────────────────────────────────────────────┤
│ Speedup: ${result.speedup.toFixed(2).padStart(10)}x │
│ Difference: ${arrow} ${Math.abs(result.percentFaster).toFixed(1).padStart(8)}% │
├─────────────────────────────────────────────────────┤
│ Winner: ${winnerText.padEnd(42)} │
└─────────────────────────────────────────────────────┘
`.trim();
}
/**
* Benchmark memory usage
*/
export async function benchmarkMemory(fn, options = {}) {
const { name = 'memory-benchmark', runs = 5 } = options;
// Note: Memory APIs are limited in browsers
// This is a simplified version that works when performance.memory is available
const getMemory = () => {
if (typeof performance !== 'undefined' && 'memory' in performance) {
return performance.memory.usedJSHeapSize;
}
return 0;
};
const memoryReadings = [];
const initialMemory = getMemory();
for (let i = 0; i < runs; i++) {
await fn();
memoryReadings.push(getMemory());
}
const peakMemory = Math.max(...memoryReadings);
const avgMemory = memoryReadings.reduce((a, b) => a + b, 0) / memoryReadings.length;
const memoryDelta = avgMemory - initialMemory;
return {
name,
peakMemory,
avgMemory,
memoryDelta,
};
}
// ============================================================================
// Export
// ============================================================================
export default {
benchmark,
compareBenchmarks,
benchmarkSuite,
benchmarkMemory,
formatBenchmarkResult,
formatComparisonResult,
};
//# sourceMappingURL=benchmark.js.map
================================================
FILE: dist/tools/debugger.d.ts
================================================
/**
* edgeFlow.js - Visual Debugging Tools
*
* In-browser debugging and visualization utilities for ML models.
*/
import { EdgeFlowTensor } from '../core/index.js';
/**
* Debugger configuration
*/
export interface DebuggerConfig {
/** Enable logging */
logging?: boolean;
/** Log level */
logLevel?: 'debug' | 'info' | 'warn' | 'error';
/** Enable tensor inspection */
inspectTensors?: boolean;
/** Maximum values to display per tensor */
maxDisplayValues?: number;
/** Enable performance tracking */
trackPerformance?: boolean;
/** Custom logger function */
logger?: (level: string, message: string, data?: unknown) => void;
}
/**
* Tensor inspection result
*/
export interface TensorInspection {
name: string;
shape: number[];
dtype: string;
size: number;
memoryBytes: number;
stats: TensorStats;
sample: number[];
histogram?: HistogramData;
}
/**
* Tensor statistics
*/
export interface TensorStats {
min: number;
max: number;
mean: number;
std: number;
zeros: number;
nans: number;
infinities: number;
sparsity: number;
}
/**
* Histogram data
*/
export interface HistogramData {
bins: number[];
counts: number[];
binEdges: number[];
}
/**
* Inference trace
*/
export interface InferenceTrace {
id: string;
modelId: string;
timestamp: number;
inputs: TensorInspection[];
outputs: TensorInspection[];
duration: number;
memoryUsed: number;
operations: OperationTrace[];
}
/**
* Operation trace
*/
export interface OperationTrace {
name: string;
type: string;
duration: number;
inputShapes: number[][];
outputShapes: number[][];
attributes?: Record;
}
/**
* Debug event
*/
export interface DebugEvent {
type: 'tensor' | 'inference' | 'error' | 'warning' | 'info' | 'performance';
timestamp: number;
data: unknown;
message: string;
}
/**
* Performance metrics
*/
export interface PerformanceMetrics {
inferenceCount: number;
totalInferenceTime: number;
averageInferenceTime: number;
minInferenceTime: number;
maxInferenceTime: number;
peakMemoryUsage: number;
currentMemoryUsage: number;
tensorAllocations: number;
tensorDeallocations: number;
}
/**
* Inspect a tensor
*/
export declare function inspectTensor(tensor: EdgeFlowTensor, name?: string, options?: {
histogram?: boolean;
maxSample?: number;
}): TensorInspection;
/**
* Format tensor inspection for display
*/
export declare function formatTensorInspection(inspection: TensorInspection): string;
/**
* Visual debugger for edgeFlow.js
*/
export declare class EdgeFlowDebugger {
private config;
private events;
private traces;
private performanceMetrics;
private listeners;
private isEnabled;
constructor(config?: DebuggerConfig);
/**
* Default logger
*/
private defaultLogger;
/**
* Log a message
*/
log(level: string, message: string, data?: unknown): void;
/**
* Add debug event
*/
private addEvent;
/**
* Enable debugger
*/
enable(): void;
/**
* Disable debugger
*/
disable(): void;
/**
* Subscribe to events
*/
on(type: string, callback: (event: DebugEvent) => void): () => void;
/**
* Inspect and log a tensor
*/
inspectTensor(tensor: EdgeFlowTensor, name?: string): TensorInspection;
/**
* Start tracing an inference
*/
startTrace(modelId: string): string;
/**
* Add input to trace
*/
traceInput(traceId: string, tensor: EdgeFlowTensor, name: string): void;
/**
* Add output to trace
*/
traceOutput(traceId: string, tensor: EdgeFlowTensor, name: string): void;
/**
* Add operation to trace
*/
traceOperation(traceId: string, operation: OperationTrace): void;
/**
* End trace
*/
endTrace(traceId: string): InferenceTrace | undefined;
/**
* Record tensor allocation
*/
recordAllocation(tensor: EdgeFlowTensor): void;
/**
* Record tensor deallocation
*/
recordDeallocation(tensor: EdgeFlowTensor): void;
/**
* Get performance metrics
*/
getPerformanceMetrics(): PerformanceMetrics;
/**
* Get all events
*/
getEvents(): DebugEvent[];
/**
* Get all traces
*/
getTraces(): InferenceTrace[];
/**
* Get trace by ID
*/
getTrace(traceId: string): InferenceTrace | undefined;
/**
* Clear all data
*/
clear(): void;
/**
* Export debug data
*/
export(): {
events: DebugEvent[];
traces: InferenceTrace[];
metrics: PerformanceMetrics;
timestamp: number;
};
/**
* Generate summary report
*/
generateReport(): string;
}
/**
* Get or create the global debugger instance
*/
export declare function getDebugger(config?: DebuggerConfig): EdgeFlowDebugger;
/**
* Enable debugging
*/
export declare function enableDebugging(config?: DebuggerConfig): EdgeFlowDebugger;
/**
* Disable debugging
*/
export declare function disableDebugging(): void;
/**
* Create ASCII histogram
*/
export declare function createAsciiHistogram(histogram: HistogramData, width?: number, height?: number): string;
/**
* Create tensor heatmap (for 2D tensors)
*/
export declare function createTensorHeatmap(tensor: EdgeFlowTensor, width?: number): string;
/**
* Create model architecture visualization
*/
export declare function visualizeModelArchitecture(layers: Array<{
name: string;
type: string;
inputShape: number[];
outputShape: number[];
}>): string;
declare const _default: {
EdgeFlowDebugger: typeof EdgeFlowDebugger;
getDebugger: typeof getDebugger;
enableDebugging: typeof enableDebugging;
disableDebugging: typeof disableDebugging;
inspectTensor: typeof inspectTensor;
formatTensorInspection: typeof formatTensorInspection;
createAsciiHistogram: typeof createAsciiHistogram;
createTensorHeatmap: typeof createTensorHeatmap;
visualizeModelArchitecture: typeof visualizeModelArchitecture;
};
export default _default;
//# sourceMappingURL=debugger.d.ts.map
================================================
FILE: dist/tools/debugger.js
================================================
/**
* edgeFlow.js - Visual Debugging Tools
*
* In-browser debugging and visualization utilities for ML models.
*/
// ============================================================================
// Tensor Inspection
// ============================================================================
/**
* Calculate tensor statistics
*/
function calculateTensorStats(data) {
const arr = data instanceof Float32Array ? data : new Float32Array(data);
let min = Infinity;
let max = -Infinity;
let sum = 0;
let zeros = 0;
let nans = 0;
let infinities = 0;
for (let i = 0; i < arr.length; i++) {
const val = arr[i] ?? 0;
if (isNaN(val)) {
nans++;
continue;
}
if (!isFinite(val)) {
infinities++;
continue;
}
min = Math.min(min, val);
max = Math.max(max, val);
sum += val;
if (val === 0)
zeros++;
}
const validCount = arr.length - nans - infinities;
const mean = validCount > 0 ? sum / validCount : 0;
// Calculate std
let varianceSum = 0;
for (let i = 0; i < arr.length; i++) {
const val = arr[i] ?? 0;
if (!isNaN(val) && isFinite(val)) {
varianceSum += Math.pow(val - mean, 2);
}
}
const std = validCount > 0 ? Math.sqrt(varianceSum / validCount) : 0;
return {
min: min === Infinity ? 0 : min,
max: max === -Infinity ? 0 : max,
mean,
std,
zeros,
nans,
infinities,
sparsity: zeros / arr.length,
};
}
/**
* Create histogram from data
*/
function createHistogram(data, bins = 50) {
const arr = data instanceof Float32Array ? data : new Float32Array(data);
// Find min/max (excluding NaN/Inf)
let min = Infinity;
let max = -Infinity;
for (let i = 0; i < arr.length; i++) {
const val = arr[i] ?? 0;
if (!isNaN(val) && isFinite(val)) {
min = Math.min(min, val);
max = Math.max(max, val);
}
}
if (min === Infinity || max === -Infinity || min === max) {
return { bins: [min || 0], counts: [arr.length], binEdges: [min || 0, max || 0] };
}
const binWidth = (max - min) / bins;
const counts = new Array(bins).fill(0);
const binEdges = new Array(bins + 1);
for (let i = 0; i <= bins; i++) {
binEdges[i] = min + i * binWidth;
}
for (let i = 0; i < arr.length; i++) {
const val = arr[i] ?? 0;
if (!isNaN(val) && isFinite(val)) {
const binIndex = Math.min(Math.floor((val - min) / binWidth), bins - 1);
counts[binIndex]++;
}
}
return {
bins: binEdges.slice(0, -1).map((e, i) => (e + binEdges[i + 1]) / 2),
counts,
binEdges,
};
}
/**
* Inspect a tensor
*/
export function inspectTensor(tensor, name = 'tensor', options = {}) {
const { histogram = true, maxSample = 10 } = options;
const data = tensor.toFloat32Array();
const shape = tensor.shape;
const size = tensor.size;
// Get sample of values
const sampleIndices = [];
const step = Math.max(1, Math.floor(size / maxSample));
for (let i = 0; i < size && sampleIndices.length < maxSample; i += step) {
sampleIndices.push(i);
}
const sample = sampleIndices.map(i => data[i] ?? 0);
// Calculate memory (assuming float32)
const bytesPerElement = tensor.dtype === 'float32' ? 4
: tensor.dtype === 'int32' ? 4
: tensor.dtype === 'int64' ? 8
: 4;
const memoryBytes = size * bytesPerElement;
return {
name,
shape,
dtype: tensor.dtype,
size,
memoryBytes,
stats: calculateTensorStats(data),
sample,
histogram: histogram ? createHistogram(data) : undefined,
};
}
/**
* Format tensor inspection for display
*/
export function formatTensorInspection(inspection) {
const { name, shape, dtype, size, memoryBytes, stats, sample } = inspection;
const lines = [
`┌─ Tensor: ${name} ─────────────────────────────`,
`│ Shape: [${shape.join(', ')}]`,
`│ Dtype: ${dtype}`,
`│ Size: ${size.toLocaleString()} elements`,
`│ Memory: ${formatBytes(memoryBytes)}`,
`├─ Statistics ─────────────────────────────────`,
`│ Min: ${stats.min.toFixed(6)}`,
`│ Max: ${stats.max.toFixed(6)}`,
`│ Mean: ${stats.mean.toFixed(6)}`,
`│ Std: ${stats.std.toFixed(6)}`,
`│ Sparsity: ${(stats.sparsity * 100).toFixed(2)}%`,
];
if (stats.nans > 0) {
lines.push(`│ ⚠️ NaN values: ${stats.nans}`);
}
if (stats.infinities > 0) {
lines.push(`│ ⚠️ Infinity values: ${stats.infinities}`);
}
lines.push(`├─ Sample Values ──────────────────────────────`);
lines.push(`│ [${sample.map(v => v.toFixed(4)).join(', ')}]`);
lines.push(`└──────────────────────────────────────────────`);
return lines.join('\n');
}
/**
* Format bytes to human readable
*/
function formatBytes(bytes) {
if (bytes < 1024)
return `${bytes} B`;
if (bytes < 1024 * 1024)
return `${(bytes / 1024).toFixed(2)} KB`;
if (bytes < 1024 * 1024 * 1024)
return `${(bytes / (1024 * 1024)).toFixed(2)} MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(2)} GB`;
}
// ============================================================================
// Visual Debugger Class
// ============================================================================
/**
* Visual debugger for edgeFlow.js
*/
export class EdgeFlowDebugger {
config;
events = [];
traces = [];
performanceMetrics;
listeners = new Map();
isEnabled = true;
constructor(config = {}) {
this.config = {
logging: config.logging ?? true,
logLevel: config.logLevel ?? 'info',
inspectTensors: config.inspectTensors ?? true,
maxDisplayValues: config.maxDisplayValues ?? 10,
trackPerformance: config.trackPerformance ?? true,
logger: config.logger ?? this.defaultLogger.bind(this),
};
this.performanceMetrics = {
inferenceCount: 0,
totalInferenceTime: 0,
averageInferenceTime: 0,
minInferenceTime: Infinity,
maxInferenceTime: 0,
peakMemoryUsage: 0,
currentMemoryUsage: 0,
tensorAllocations: 0,
tensorDeallocations: 0,
};
}
/**
* Default logger
*/
defaultLogger(level, message, data) {
const timestamp = new Date().toISOString();
const prefix = `[edgeFlow.js ${timestamp}] [${level.toUpperCase()}]`;
switch (level) {
case 'debug':
console.debug(prefix, message, data ?? '');
break;
case 'info':
console.info(prefix, message, data ?? '');
break;
case 'warn':
console.warn(prefix, message, data ?? '');
break;
case 'error':
console.error(prefix, message, data ?? '');
break;
default:
console.log(prefix, message, data ?? '');
}
}
/**
* Log a message
*/
log(level, message, data) {
if (!this.isEnabled || !this.config.logging)
return;
const levels = ['debug', 'info', 'warn', 'error'];
const configLevel = levels.indexOf(this.config.logLevel);
const msgLevel = levels.indexOf(level);
if (msgLevel >= configLevel) {
this.config.logger(level, message, data);
}
}
/**
* Add debug event
*/
addEvent(event) {
this.events.push(event);
// Notify listeners
const listeners = this.listeners.get(event.type) ?? [];
for (const listener of listeners) {
listener(event);
}
// Keep only last 1000 events
if (this.events.length > 1000) {
this.events = this.events.slice(-1000);
}
}
/**
* Enable debugger
*/
enable() {
this.isEnabled = true;
this.log('info', 'Debugger enabled');
}
/**
* Disable debugger
*/
disable() {
this.isEnabled = false;
}
/**
* Subscribe to events
*/
on(type, callback) {
const listeners = this.listeners.get(type) ?? [];
listeners.push(callback);
this.listeners.set(type, listeners);
return () => {
const idx = listeners.indexOf(callback);
if (idx !== -1)
listeners.splice(idx, 1);
};
}
/**
* Inspect and log a tensor
*/
inspectTensor(tensor, name = 'tensor') {
const inspection = inspectTensor(tensor, name, {
histogram: true,
maxSample: this.config.maxDisplayValues,
});
if (this.config.inspectTensors) {
this.log('debug', `Tensor: ${name}`, inspection);
this.addEvent({
type: 'tensor',
timestamp: Date.now(),
message: `Inspected tensor: ${name}`,
data: inspection,
});
// Check for issues
if (inspection.stats.nans > 0) {
this.log('warn', `Tensor "${name}" contains ${inspection.stats.nans} NaN values`);
}
if (inspection.stats.infinities > 0) {
this.log('warn', `Tensor "${name}" contains ${inspection.stats.infinities} Infinity values`);
}
}
return inspection;
}
/**
* Start tracing an inference
*/
startTrace(modelId) {
const id = `trace_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
const trace = {
id,
modelId,
timestamp: Date.now(),
inputs: [],
outputs: [],
duration: 0,
memoryUsed: 0,
operations: [],
};
this.traces.push(trace);
this.log('debug', `Started trace: ${id} for model: ${modelId}`);
return id;
}
/**
* Add input to trace
*/
traceInput(traceId, tensor, name) {
const trace = this.traces.find(t => t.id === traceId);
if (!trace)
return;
trace.inputs.push(inspectTensor(tensor, name));
}
/**
* Add output to trace
*/
traceOutput(traceId, tensor, name) {
const trace = this.traces.find(t => t.id === traceId);
if (!trace)
return;
trace.outputs.push(inspectTensor(tensor, name));
}
/**
* Add operation to trace
*/
traceOperation(traceId, operation) {
const trace = this.traces.find(t => t.id === traceId);
if (!trace)
return;
trace.operations.push(operation);
}
/**
* End trace
*/
endTrace(traceId) {
const trace = this.traces.find(t => t.id === traceId);
if (!trace)
return;
trace.duration = Date.now() - trace.timestamp;
// Update performance metrics
this.performanceMetrics.inferenceCount++;
this.performanceMetrics.totalInferenceTime += trace.duration;
this.performanceMetrics.averageInferenceTime =
this.performanceMetrics.totalInferenceTime / this.performanceMetrics.inferenceCount;
this.performanceMetrics.minInferenceTime =
Math.min(this.performanceMetrics.minInferenceTime, trace.duration);
this.performanceMetrics.maxInferenceTime =
Math.max(this.performanceMetrics.maxInferenceTime, trace.duration);
this.log('info', `Trace completed: ${traceId}`, {
duration: `${trace.duration}ms`,
inputs: trace.inputs.length,
outputs: trace.outputs.length,
operations: trace.operations.length,
});
this.addEvent({
type: 'inference',
timestamp: Date.now(),
message: `Inference completed in ${trace.duration}ms`,
data: trace,
});
return trace;
}
/**
* Record tensor allocation
*/
recordAllocation(tensor) {
if (!this.config.trackPerformance)
return;
this.performanceMetrics.tensorAllocations++;
const memory = tensor.size * 4; // Assume float32
this.performanceMetrics.currentMemoryUsage += memory;
this.performanceMetrics.peakMemoryUsage = Math.max(this.performanceMetrics.peakMemoryUsage, this.performanceMetrics.currentMemoryUsage);
}
/**
* Record tensor deallocation
*/
recordDeallocation(tensor) {
if (!this.config.trackPerformance)
return;
this.performanceMetrics.tensorDeallocations++;
const memory = tensor.size * 4;
this.performanceMetrics.currentMemoryUsage -= memory;
}
/**
* Get performance metrics
*/
getPerformanceMetrics() {
return { ...this.performanceMetrics };
}
/**
* Get all events
*/
getEvents() {
return [...this.events];
}
/**
* Get all traces
*/
getTraces() {
return [...this.traces];
}
/**
* Get trace by ID
*/
getTrace(traceId) {
return this.traces.find(t => t.id === traceId);
}
/**
* Clear all data
*/
clear() {
this.events = [];
this.traces = [];
this.performanceMetrics = {
inferenceCount: 0,
totalInferenceTime: 0,
averageInferenceTime: 0,
minInferenceTime: Infinity,
maxInferenceTime: 0,
peakMemoryUsage: 0,
currentMemoryUsage: 0,
tensorAllocations: 0,
tensorDeallocations: 0,
};
}
/**
* Export debug data
*/
export() {
return {
events: this.getEvents(),
traces: this.getTraces(),
metrics: this.getPerformanceMetrics(),
timestamp: Date.now(),
};
}
/**
* Generate summary report
*/
generateReport() {
const metrics = this.getPerformanceMetrics();
const traces = this.getTraces();
const lines = [
'╔══════════════════════════════════════════════════════════════════╗',
'║ edgeFlow.js Debug Report ║',
'╠══════════════════════════════════════════════════════════════════╣',
'║ Performance Metrics ║',
'╟──────────────────────────────────────────────────────────────────╢',
`║ Total Inferences: ${metrics.inferenceCount.toString().padStart(10)} ║`,
`║ Average Time: ${metrics.averageInferenceTime.toFixed(2).padStart(10)}ms ║`,
`║ Min Time: ${(metrics.minInferenceTime === Infinity ? 0 : metrics.minInferenceTime).toFixed(2).padStart(10)}ms ║`,
`║ Max Time: ${metrics.maxInferenceTime.toFixed(2).padStart(10)}ms ║`,
`║ Peak Memory: ${formatBytes(metrics.peakMemoryUsage).padStart(10)} ║`,
`║ Current Memory: ${formatBytes(metrics.currentMemoryUsage).padStart(10)} ║`,
`║ Tensor Allocations: ${metrics.tensorAllocations.toString().padStart(10)} ║`,
`║ Tensor Deallocations: ${metrics.tensorDeallocations.toString().padStart(10)} ║`,
'╟──────────────────────────────────────────────────────────────────╢',
'║ Recent Traces ║',
'╟──────────────────────────────────────────────────────────────────╢',
];
const recentTraces = traces.slice(-5);
for (const trace of recentTraces) {
lines.push(`║ ${trace.id.slice(0, 20).padEnd(20)} | ${trace.duration.toFixed(2).padStart(8)}ms | ${trace.modelId.slice(0, 20).padEnd(20)} ║`);
}
if (recentTraces.length === 0) {
lines.push('║ No traces recorded ║');
}
lines.push('╚══════════════════════════════════════════════════════════════════╝');
return lines.join('\n');
}
}
// ============================================================================
// Global Debugger Instance
// ============================================================================
let globalDebugger = null;
/**
* Get or create the global debugger instance
*/
export function getDebugger(config) {
if (!globalDebugger || config) {
globalDebugger = new EdgeFlowDebugger(config);
}
return globalDebugger;
}
/**
* Enable debugging
*/
export function enableDebugging(config) {
const debugger_ = getDebugger(config);
debugger_.enable();
return debugger_;
}
/**
* Disable debugging
*/
export function disableDebugging() {
globalDebugger?.disable();
}
// ============================================================================
// Visualization Helpers
// ============================================================================
/**
* Create ASCII histogram
*/
export function createAsciiHistogram(histogram, width = 50, height = 10) {
const { counts, binEdges } = histogram;
const maxCount = Math.max(...counts);
if (maxCount === 0)
return 'No data to display';
const lines = [];
// Scale counts to height
const scaled = counts.map(c => Math.round((c / maxCount) * height));
// Create rows
for (let row = height; row > 0; row--) {
let line = row === height ? `${maxCount.toString().padStart(6)} │` : ' │';
for (let col = 0; col < width && col < scaled.length; col++) {
line += (scaled[col] ?? 0) >= row ? '█' : ' ';
}
lines.push(line);
}
// X axis
lines.push(' └' + '─'.repeat(Math.min(width, scaled.length)));
// Labels
const minLabel = (binEdges[0] ?? 0).toFixed(2);
const maxLabel = (binEdges[binEdges.length - 1] ?? 0).toFixed(2);
lines.push(` ${minLabel}${' '.repeat(Math.max(0, Math.min(width, scaled.length) - minLabel.length - maxLabel.length))}${maxLabel}`);
return lines.join('\n');
}
/**
* Create tensor heatmap (for 2D tensors)
*/
export function createTensorHeatmap(tensor, width = 40) {
const shape = tensor.shape;
if (shape.length !== 2) {
return 'Heatmap only supports 2D tensors';
}
const [rows, cols] = shape;
if (rows === undefined || cols === undefined) {
return 'Invalid tensor shape';
}
const data = tensor.toFloat32Array();
// Find min/max
let min = Infinity;
let max = -Infinity;
for (let i = 0; i < data.length; i++) {
const val = data[i] ?? 0;
if (!isNaN(val) && isFinite(val)) {
min = Math.min(min, val);
max = Math.max(max, val);
}
}
const range = max - min;
const chars = [' ', '░', '▒', '▓', '█'];
const lines = [];
const scaleX = Math.max(1, Math.ceil(cols / width));
const displayCols = Math.min(cols, width);
for (let r = 0; r < rows; r++) {
let line = '';
for (let c = 0; c < displayCols; c++) {
const idx = r * cols + c * scaleX;
const val = data[idx] ?? 0;
const normalized = range > 0 ? (val - min) / range : 0;
const charIdx = Math.floor(normalized * (chars.length - 1));
line += chars[charIdx];
}
lines.push(line);
}
return lines.join('\n');
}
/**
* Create model architecture visualization
*/
export function visualizeModelArchitecture(layers) {
const lines = [];
lines.push('┌─────────────────────────────────────────────────────────────────────┐');
lines.push('│ Model Architecture │');
lines.push('├─────────────────────────────────────────────────────────────────────┤');
for (let i = 0; i < layers.length; i++) {
const layer = layers[i];
const inputStr = `[${layer.inputShape.join('×')}]`;
const outputStr = `[${layer.outputShape.join('×')}]`;
lines.push(`│ ${(i + 1).toString().padStart(2)}. ${layer.name.padEnd(20)} │ ${layer.type.padEnd(15)} │`);
lines.push(`│ ${inputStr.padEnd(15)} → ${outputStr.padEnd(15)} │`);
if (i < layers.length - 1) {
lines.push('│ ↓ │');
}
}
lines.push('└─────────────────────────────────────────────────────────────────────┘');
return lines.join('\n');
}
// ============================================================================
// Exports
// ============================================================================
export default {
EdgeFlowDebugger,
getDebugger,
enableDebugging,
disableDebugging,
inspectTensor,
formatTensorInspection,
createAsciiHistogram,
createTensorHeatmap,
visualizeModelArchitecture,
};
//# sourceMappingURL=debugger.js.map
================================================
FILE: dist/tools/index.d.ts
================================================
/**
* edgeFlow.js - Tools and Utilities
*
* Model optimization, quantization, and analysis tools.
*/
import { LoadedModel, QuantizationType } from '../core/types.js';
/**
* Quantization options
*/
export interface QuantizationOptions {
/** Quantization method */
method: QuantizationType;
/** Calibration data for calibrated quantization */
calibrationData?: Float32Array[];
/** Whether to quantize weights only */
weightsOnly?: boolean;
/** Layers to exclude from quantization */
excludeLayers?: string[];
}
/**
* Quantization result
*/
export interface QuantizationResult {
/** Quantized model data */
modelData: ArrayBuffer;
/** Original size in bytes */
originalSize: number;
/** Quantized size in bytes */
quantizedSize: number;
/** Compression ratio */
compressionRatio: number;
/** Quantization statistics */
stats: {
layersQuantized: number;
layersSkipped: number;
};
}
/**
* Quantize a model
*
* @example
* ```typescript
* const quantized = await quantize(model, {
* method: 'int8',
* calibrationData: samples,
* });
* ```
*/
export declare function quantize(model: LoadedModel | ArrayBuffer, options: QuantizationOptions): Promise;
/**
* Pruning options
*/
export interface PruningOptions {
/** Target sparsity (0-1) */
sparsity: number;
/** Pruning method */
method?: 'magnitude' | 'random' | 'structured';
/** Layers to exclude */
excludeLayers?: string[];
}
/**
* Pruning result
*/
export interface PruningResult {
/** Pruned model data */
modelData: ArrayBuffer;
/** Achieved sparsity */
actualSparsity: number;
/** Number of parameters pruned */
parametersPruned: number;
/** Total parameters */
totalParameters: number;
}
/**
* Prune model weights
*/
export declare function prune(model: LoadedModel | ArrayBuffer, options: PruningOptions): Promise;
/**
* Model analysis result
*/
export interface ModelAnalysis {
/** Total number of parameters */
totalParameters: number;
/** Model size in bytes */
sizeBytes: number;
/** Layer information */
layers: Array<{
name: string;
type: string;
parameters: number;
inputShape: number[];
outputShape: number[];
}>;
/** Estimated FLOPs */
estimatedFlops: number;
/** Memory requirements */
memoryRequirements: {
weights: number;
activations: number;
total: number;
};
}
/**
* Analyze a model
*/
export declare function analyzeModel(model: LoadedModel | ArrayBuffer): Promise;
/**
* Benchmark options
*/
export interface BenchmarkOptions {
/** Number of warmup runs */
warmupRuns?: number;
/** Number of benchmark runs */
runs?: number;
/** Input shape */
inputShape?: number[];
}
/**
* Benchmark result
*/
export interface BenchmarkResult {
/** Average inference time in ms */
avgTime: number;
/** Minimum inference time in ms */
minTime: number;
/** Maximum inference time in ms */
maxTime: number;
/** Standard deviation */
stdDev: number;
/** Throughput (inferences per second) */
throughput: number;
/** All run times */
times: number[];
}
/**
* Benchmark model inference
*/
export declare function benchmark(runFn: () => Promise, options?: BenchmarkOptions): Promise;
export { benchmark as runBenchmark, compareBenchmarks, benchmarkSuite, benchmarkMemory, formatBenchmarkResult, formatComparisonResult, } from './benchmark.js';
export type { BenchmarkOptions as DetailedBenchmarkOptions, BenchmarkResult as DetailedBenchmarkResult, CompareBenchmarkResult, MemoryBenchmarkResult, } from './benchmark.js';
export { quantizeModel, quantizeTensor, dequantizeTensor, pruneModel, pruneTensor, analyzeModel as analyzeModelDetailed, exportModel as exportModelAdvanced, dequantizeInt8, dequantizeUint8, dequantizeFloat16, float16ToFloat32, } from './quantization.js';
export type { QuantizationType as QuantizationMethod, QuantizationOptions as AdvancedQuantizationOptions, QuantizationProgress, QuantizationResult as AdvancedQuantizationResult, LayerQuantizationStats, QuantizationStats, PruningOptions as AdvancedPruningOptions, PruningResult as AdvancedPruningResult, ModelAnalysis as DetailedModelAnalysis, ExportFormat, ExportOptions, } from './quantization.js';
export { EdgeFlowDebugger, getDebugger, enableDebugging, disableDebugging, inspectTensor, formatTensorInspection, createAsciiHistogram, createTensorHeatmap, visualizeModelArchitecture, } from './debugger.js';
export type { DebuggerConfig, TensorInspection, TensorStats, HistogramData, InferenceTrace, OperationTrace, DebugEvent, PerformanceMetrics as DebugPerformanceMetrics, } from './debugger.js';
export { PerformanceMonitor, getMonitor, startMonitoring, stopMonitoring, generateDashboardHTML, generateAsciiDashboard, } from './monitor.js';
export type { MonitorConfig, PerformanceSample, InferenceMetrics, MemoryMetrics, SystemMetrics, AlertConfig, AlertEvent, WidgetData, } from './monitor.js';
/**
* Export model to different formats
*/
export declare function exportModel(model: LoadedModel | ArrayBuffer, format: 'onnx' | 'json' | 'binary'): Promise;
//# sourceMappingURL=index.d.ts.map
================================================
FILE: dist/tools/index.js
================================================
/**
* edgeFlow.js - Tools and Utilities
*
* Model optimization, quantization, and analysis tools.
*/
/**
* Quantize a model
*
* @example
* ```typescript
* const quantized = await quantize(model, {
* method: 'int8',
* calibrationData: samples,
* });
* ```
*/
export async function quantize(model, options) {
// Get model data
const modelData = model instanceof ArrayBuffer
? model
: await getModelData(model);
const originalSize = modelData.byteLength;
// Apply quantization based on method
let quantizedData;
let layersQuantized = 0;
let layersSkipped = 0;
switch (options.method) {
case 'int8':
({ data: quantizedData, layersQuantized, layersSkipped } =
quantizeInt8(modelData, options));
break;
case 'uint8':
({ data: quantizedData, layersQuantized, layersSkipped } =
quantizeUint8(modelData, options));
break;
case 'float16':
({ data: quantizedData, layersQuantized, layersSkipped } =
quantizeFloat16(modelData, options));
break;
case 'int4':
({ data: quantizedData, layersQuantized, layersSkipped } =
quantizeInt4(modelData, options));
break;
default:
quantizedData = modelData;
}
return {
modelData: quantizedData,
originalSize,
quantizedSize: quantizedData.byteLength,
compressionRatio: originalSize / quantizedData.byteLength,
stats: {
layersQuantized,
layersSkipped,
},
};
}
/**
* Placeholder for getting model data
*/
async function getModelData(_model) {
// In production, this would extract the model weights
return new ArrayBuffer(0);
}
/**
* INT8 quantization
*/
function quantizeInt8(data, _options) {
// Simplified INT8 quantization
const input = new Float32Array(data);
const output = new Int8Array(input.length);
// Find scale
let max = 0;
for (let i = 0; i < input.length; i++) {
const abs = Math.abs(input[i] ?? 0);
if (abs > max)
max = abs;
}
const scale = max / 127;
// Quantize
for (let i = 0; i < input.length; i++) {
output[i] = Math.round((input[i] ?? 0) / scale);
}
return {
data: output.buffer,
layersQuantized: 1,
layersSkipped: 0,
};
}
/**
* UINT8 quantization
*/
function quantizeUint8(data, _options) {
const input = new Float32Array(data);
const output = new Uint8Array(input.length);
// Find min/max
let min = Infinity, max = -Infinity;
for (let i = 0; i < input.length; i++) {
const val = input[i] ?? 0;
if (val < min)
min = val;
if (val > max)
max = val;
}
const scale = (max - min) / 255;
// Quantize
for (let i = 0; i < input.length; i++) {
output[i] = Math.round(((input[i] ?? 0) - min) / scale);
}
return {
data: output.buffer,
layersQuantized: 1,
layersSkipped: 0,
};
}
/**
* Float16 quantization
*/
function quantizeFloat16(data, _options) {
const input = new Float32Array(data);
const output = new Uint16Array(input.length);
// Convert float32 to float16
for (let i = 0; i < input.length; i++) {
output[i] = float32ToFloat16(input[i] ?? 0);
}
return {
data: output.buffer,
layersQuantized: 1,
layersSkipped: 0,
};
}
/**
* INT4 quantization
*/
function quantizeInt4(data, _options) {
const input = new Float32Array(data);
// Pack two INT4 values per byte
const output = new Uint8Array(Math.ceil(input.length / 2));
// Find scale
let max = 0;
for (let i = 0; i < input.length; i++) {
const abs = Math.abs(input[i] ?? 0);
if (abs > max)
max = abs;
}
const scale = max / 7; // INT4 range: -8 to 7
// Quantize and pack
for (let i = 0; i < input.length; i += 2) {
const val1 = Math.round((input[i] ?? 0) / scale) + 8;
const val2 = Math.round((input[i + 1] ?? 0) / scale) + 8;
output[i / 2] = ((val1 & 0xF) << 4) | (val2 & 0xF);
}
return {
data: output.buffer,
layersQuantized: 1,
layersSkipped: 0,
};
}
/**
* Convert float32 to float16
*/
function float32ToFloat16(value) {
const floatView = new Float32Array(1);
const int32View = new Int32Array(floatView.buffer);
floatView[0] = value;
const x = int32View[0] ?? 0;
let bits = (x >> 16) & 0x8000; // sign
let m = (x >> 12) & 0x07ff; // mantissa
const e = (x >> 23) & 0xff; // exponent
if (e < 103) {
// Too small, return zero
return bits;
}
if (e > 142) {
// Too large, return infinity
bits |= 0x7c00;
bits |= ((e === 255) ? 0 : 1) && (x & 0x007fffff);
return bits;
}
if (e < 113) {
// Denormalized
m |= 0x0800;
bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
return bits;
}
bits |= ((e - 112) << 10) | (m >> 1);
bits += m & 1;
return bits;
}
/**
* Prune model weights
*/
export async function prune(model, options) {
const modelData = model instanceof ArrayBuffer
? model
: await getModelData(model);
const weights = new Float32Array(modelData);
const total = weights.length;
// Calculate threshold for magnitude pruning
const magnitudes = weights.map(Math.abs);
const sorted = [...magnitudes].sort((a, b) => a - b);
const thresholdIdx = Math.floor(options.sparsity * sorted.length);
const threshold = sorted[thresholdIdx] ?? 0;
// Prune weights
let pruned = 0;
for (let i = 0; i < weights.length; i++) {
if (Math.abs(weights[i] ?? 0) < threshold) {
weights[i] = 0;
pruned++;
}
}
return {
modelData: weights.buffer,
actualSparsity: pruned / total,
parametersPruned: pruned,
totalParameters: total,
};
}
/**
* Analyze a model
*/
export async function analyzeModel(model) {
// Simplified analysis
const size = model instanceof ArrayBuffer
? model.byteLength
: model.metadata.sizeBytes;
const estimatedParams = Math.floor(size / 4); // Assume float32
return {
totalParameters: estimatedParams,
sizeBytes: size,
layers: [],
estimatedFlops: estimatedParams * 2, // Rough estimate
memoryRequirements: {
weights: size,
activations: size * 0.1, // Rough estimate
total: size * 1.1,
},
};
}
/**
* Benchmark model inference
*/
export async function benchmark(runFn, options = {}) {
const { warmupRuns = 3, runs = 10, } = options;
// Warmup
for (let i = 0; i < warmupRuns; i++) {
await runFn();
}
// Benchmark
const times = [];
for (let i = 0; i < runs; i++) {
const start = performance.now();
await runFn();
times.push(performance.now() - start);
}
// Calculate statistics
const sum = times.reduce((a, b) => a + b, 0);
const avgTime = sum / times.length;
const minTime = Math.min(...times);
const maxTime = Math.max(...times);
const squaredDiffs = times.map(t => Math.pow(t - avgTime, 2));
const avgSquaredDiff = squaredDiffs.reduce((a, b) => a + b, 0) / times.length;
const stdDev = Math.sqrt(avgSquaredDiff);
return {
avgTime,
minTime,
maxTime,
stdDev,
throughput: 1000 / avgTime,
times,
};
}
// ============================================================================
// Re-export benchmark utilities
// ============================================================================
export { benchmark as runBenchmark, compareBenchmarks, benchmarkSuite, benchmarkMemory, formatBenchmarkResult, formatComparisonResult, } from './benchmark.js';
// ============================================================================
// Re-export advanced quantization tools
// ============================================================================
export { quantizeModel, quantizeTensor, dequantizeTensor, pruneModel, pruneTensor, analyzeModel as analyzeModelDetailed, exportModel as exportModelAdvanced, dequantizeInt8, dequantizeUint8, dequantizeFloat16, float16ToFloat32, } from './quantization.js';
// ============================================================================
// Re-export debugging tools
// ============================================================================
export { EdgeFlowDebugger, getDebugger, enableDebugging, disableDebugging, inspectTensor, formatTensorInspection, createAsciiHistogram, createTensorHeatmap, visualizeModelArchitecture, } from './debugger.js';
// ============================================================================
// Re-export monitoring tools
// ============================================================================
export { PerformanceMonitor, getMonitor, startMonitoring, stopMonitoring, generateDashboardHTML, generateAsciiDashboard, } from './monitor.js';
// ============================================================================
// Export Utilities
// ============================================================================
/**
* Export model to different formats
*/
export async function exportModel(model, format) {
const modelData = model instanceof ArrayBuffer
? model
: await getModelData(model);
switch (format) {
case 'json':
// Export as JSON (for small models)
const array = new Float32Array(modelData);
return JSON.stringify(Array.from(array));
case 'binary':
case 'onnx':
default:
return modelData;
}
}
//# sourceMappingURL=index.js.map
================================================
FILE: dist/tools/monitor.d.ts
================================================
/**
* edgeFlow.js - Performance Monitoring Dashboard
*
* Real-time performance monitoring and metrics visualization.
*/
/**
* Monitor configuration
*/
export interface MonitorConfig {
/** Enable monitoring (default: true) */
enabled?: boolean;
/** Sampling interval in ms (default: 1000) */
sampleInterval?: number;
/** History size (number of samples to keep) */
historySize?: number;
/** Enable memory monitoring (default: true) */
monitorMemory?: boolean;
/** Enable FPS monitoring (default: true) */
monitorFPS?: boolean;
/** Custom metric collectors */
collectors?: Array<() => Record>;
}
/**
* Performance sample
*/
export interface PerformanceSample {
timestamp: number;
inference: InferenceMetrics;
memory: MemoryMetrics;
system: SystemMetrics;
custom: Record;
}
/**
* Inference metrics
*/
export interface InferenceMetrics {
/** Inferences in the last interval */
count: number;
/** Average inference time (ms) */
avgTime: number;
/** Min inference time (ms) */
minTime: number;
/** Max inference time (ms) */
maxTime: number;
/** Throughput (inferences per second) */
throughput: number;
/** Queue length */
queueLength: number;
/** Active inferences */
activeCount: number;
}
/**
* Memory metrics
*/
export interface MemoryMetrics {
/** Used JS heap size (bytes) */
usedHeap: number;
/** Total JS heap size (bytes) */
totalHeap: number;
/** Heap limit (bytes) */
heapLimit: number;
/** Heap usage percentage */
heapUsage: number;
/** Tensor memory (bytes) */
tensorMemory: number;
/** Cache memory (bytes) */
cacheMemory: number;
}
/**
* System metrics
*/
export interface SystemMetrics {
/** Frames per second */
fps: number;
/** CPU usage estimate (0-1) */
cpuUsage: number;
/** Time since last sample (ms) */
deltaTime: number;
/** Browser info */
userAgent: string;
/** WebGPU available */
webgpuAvailable: boolean;
/** WebNN available */
webnnAvailable: boolean;
}
/**
* Alert configuration
*/
export interface AlertConfig {
/** Metric name */
metric: string;
/** Threshold value */
threshold: number;
/** Comparison operator */
operator: '>' | '<' | '>=' | '<=' | '==' | '!=';
/** Alert message */
message: string;
/** Alert level */
level: 'info' | 'warn' | 'error';
}
/**
* Alert event
*/
export interface AlertEvent {
config: AlertConfig;
value: number;
timestamp: number;
}
/**
* Dashboard widget data
*/
export interface WidgetData {
type: 'chart' | 'gauge' | 'counter' | 'text';
title: string;
data: unknown;
}
/**
* Performance monitor for edgeFlow.js
*/
export declare class PerformanceMonitor {
private config;
private samples;
private isRunning;
private intervalId;
private alerts;
private alertListeners;
private sampleListeners;
private inferenceCount;
private inferenceTimes;
private queueLength;
private activeCount;
private frameCount;
private lastFrameTime;
private fps;
private rafId;
private tensorMemory;
private cacheMemory;
constructor(config?: MonitorConfig);
/**
* Start monitoring
*/
start(): void;
/**
* Stop monitoring
*/
stop(): void;
/**
* Monitor FPS
*/
private monitorFPS;
/**
* Collect a performance sample
*/
private collectSample;
/**
* Collect memory metrics
*/
private collectMemoryMetrics;
/**
* Collect system metrics
*/
private collectSystemMetrics;
/**
* Estimate CPU usage based on inference times
*/
private estimateCPUUsage;
/**
* Check alerts
*/
private checkAlerts;
/**
* Get metric value from sample
*/
private getMetricValue;
/**
* Record an inference
*/
recordInference(duration: number): void;
/**
* Update queue length
*/
updateQueueLength(length: number): void;
/**
* Update active count
*/
updateActiveCount(count: number): void;
/**
* Update tensor memory
*/
updateTensorMemory(bytes: number): void;
/**
* Update cache memory
*/
updateCacheMemory(bytes: number): void;
/**
* Add an alert
*/
addAlert(config: AlertConfig): void;
/**
* Remove an alert
*/
removeAlert(metric: string): void;
/**
* Subscribe to alerts
*/
onAlert(callback: (alert: AlertEvent) => void): () => void;
/**
* Subscribe to samples
*/
onSample(callback: (sample: PerformanceSample) => void): () => void;
/**
* Get current sample
*/
getCurrentSample(): PerformanceSample | undefined;
/**
* Get all samples
*/
getSamples(): PerformanceSample[];
/**
* Get samples in time range
*/
getSamplesInRange(startTime: number, endTime: number): PerformanceSample[];
/**
* Get summary statistics
*/
getSummary(): {
avgInferenceTime: number;
avgThroughput: number;
avgMemoryUsage: number;
avgFPS: number;
totalInferences: number;
uptime: number;
};
/**
* Clear all data
*/
clear(): void;
/**
* Export data
*/
export(): {
samples: PerformanceSample[];
summary: {
avgInferenceTime: number;
avgThroughput: number;
avgMemoryUsage: number;
avgFPS: number;
totalInferences: number;
uptime: number;
};
config: MonitorConfig;
timestamp: number;
};
}
/**
* Generate HTML dashboard
*/
export declare function generateDashboardHTML(monitor: PerformanceMonitor): string;
/**
* Generate ASCII dashboard
*/
export declare function generateAsciiDashboard(monitor: PerformanceMonitor): string;
/**
* Get or create global monitor
*/
export declare function getMonitor(config?: MonitorConfig): PerformanceMonitor;
/**
* Start monitoring
*/
export declare function startMonitoring(config?: MonitorConfig): PerformanceMonitor;
/**
* Stop monitoring
*/
export declare function stopMonitoring(): void;
declare const _default: {
PerformanceMonitor: typeof PerformanceMonitor;
getMonitor: typeof getMonitor;
startMonitoring: typeof startMonitoring;
stopMonitoring: typeof stopMonitoring;
generateDashboardHTML: typeof generateDashboardHTML;
generateAsciiDashboard: typeof generateAsciiDashboard;
};
export default _default;
//# sourceMappingURL=monitor.d.ts.map
================================================
FILE: dist/tools/monitor.js
================================================
/**
* edgeFlow.js - Performance Monitoring Dashboard
*
* Real-time performance monitoring and metrics visualization.
*/
// ============================================================================
// Performance Monitor
// ============================================================================
/**
* Performance monitor for edgeFlow.js
*/
export class PerformanceMonitor {
config;
samples = [];
isRunning = false;
intervalId = null;
alerts = [];
alertListeners = [];
sampleListeners = [];
// Inference tracking
inferenceCount = 0;
inferenceTimes = [];
queueLength = 0;
activeCount = 0;
// FPS tracking
frameCount = 0;
lastFrameTime = 0;
fps = 0;
rafId = null;
// Memory tracking
tensorMemory = 0;
cacheMemory = 0;
constructor(config = {}) {
this.config = {
enabled: config.enabled ?? true,
sampleInterval: config.sampleInterval ?? 1000,
historySize: config.historySize ?? 60,
monitorMemory: config.monitorMemory ?? true,
monitorFPS: config.monitorFPS ?? true,
collectors: config.collectors ?? [],
};
}
/**
* Start monitoring
*/
start() {
if (this.isRunning)
return;
this.isRunning = true;
// Start sampling
this.intervalId = setInterval(() => {
this.collectSample();
}, this.config.sampleInterval);
// Start FPS monitoring
if (this.config.monitorFPS && typeof requestAnimationFrame !== 'undefined') {
this.lastFrameTime = performance.now();
this.frameCount = 0;
this.monitorFPS();
}
}
/**
* Stop monitoring
*/
stop() {
this.isRunning = false;
if (this.intervalId) {
clearInterval(this.intervalId);
this.intervalId = null;
}
if (this.rafId) {
cancelAnimationFrame(this.rafId);
this.rafId = null;
}
}
/**
* Monitor FPS
*/
monitorFPS() {
if (!this.isRunning)
return;
this.frameCount++;
const now = performance.now();
const elapsed = now - this.lastFrameTime;
if (elapsed >= 1000) {
this.fps = Math.round((this.frameCount * 1000) / elapsed);
this.frameCount = 0;
this.lastFrameTime = now;
}
this.rafId = requestAnimationFrame(() => this.monitorFPS());
}
/**
* Collect a performance sample
*/
collectSample() {
const now = Date.now();
// Calculate inference metrics
const avgTime = this.inferenceTimes.length > 0
? this.inferenceTimes.reduce((a, b) => a + b, 0) / this.inferenceTimes.length
: 0;
const minTime = this.inferenceTimes.length > 0
? Math.min(...this.inferenceTimes)
: 0;
const maxTime = this.inferenceTimes.length > 0
? Math.max(...this.inferenceTimes)
: 0;
const throughput = this.inferenceCount / (this.config.sampleInterval / 1000);
const inference = {
count: this.inferenceCount,
avgTime,
minTime,
maxTime,
throughput,
queueLength: this.queueLength,
activeCount: this.activeCount,
};
// Collect memory metrics
const memory = this.collectMemoryMetrics();
// Collect system metrics
const system = this.collectSystemMetrics();
// Collect custom metrics
const custom = {};
for (const collector of this.config.collectors) {
try {
Object.assign(custom, collector());
}
catch {
// Ignore collector errors
}
}
const sample = {
timestamp: now,
inference,
memory,
system,
custom,
};
// Add to history
this.samples.push(sample);
if (this.samples.length > this.config.historySize) {
this.samples.shift();
}
// Check alerts
this.checkAlerts(sample);
// Notify listeners
for (const listener of this.sampleListeners) {
listener(sample);
}
// Reset counters
this.inferenceCount = 0;
this.inferenceTimes = [];
}
/**
* Collect memory metrics
*/
collectMemoryMetrics() {
let usedHeap = 0;
let totalHeap = 0;
let heapLimit = 0;
if (typeof performance !== 'undefined' && 'memory' in performance) {
const memory = performance.memory;
usedHeap = memory.usedJSHeapSize;
totalHeap = memory.totalJSHeapSize;
heapLimit = memory.jsHeapSizeLimit;
}
return {
usedHeap,
totalHeap,
heapLimit,
heapUsage: heapLimit > 0 ? usedHeap / heapLimit : 0,
tensorMemory: this.tensorMemory,
cacheMemory: this.cacheMemory,
};
}
/**
* Collect system metrics
*/
collectSystemMetrics() {
const lastSample = this.samples[this.samples.length - 1];
const deltaTime = lastSample
? Date.now() - lastSample.timestamp
: this.config.sampleInterval;
// Check WebGPU availability
let webgpuAvailable = false;
if (typeof navigator !== 'undefined' && 'gpu' in navigator) {
webgpuAvailable = true;
}
// Check WebNN availability
let webnnAvailable = false;
if (typeof navigator !== 'undefined' && 'ml' in navigator) {
webnnAvailable = true;
}
return {
fps: this.fps,
cpuUsage: this.estimateCPUUsage(),
deltaTime,
userAgent: typeof navigator !== 'undefined' ? navigator.userAgent : 'unknown',
webgpuAvailable,
webnnAvailable,
};
}
/**
* Estimate CPU usage based on inference times
*/
estimateCPUUsage() {
if (this.inferenceTimes.length === 0)
return 0;
const totalTime = this.inferenceTimes.reduce((a, b) => a + b, 0);
return Math.min(1, totalTime / this.config.sampleInterval);
}
/**
* Check alerts
*/
checkAlerts(sample) {
for (const alert of this.alerts) {
const value = this.getMetricValue(sample, alert.metric);
if (value === undefined)
continue;
let triggered = false;
switch (alert.operator) {
case '>':
triggered = value > alert.threshold;
break;
case '<':
triggered = value < alert.threshold;
break;
case '>=':
triggered = value >= alert.threshold;
break;
case '<=':
triggered = value <= alert.threshold;
break;
case '==':
triggered = value === alert.threshold;
break;
case '!=':
triggered = value !== alert.threshold;
break;
}
if (triggered) {
const event = {
config: alert,
value,
timestamp: sample.timestamp,
};
for (const listener of this.alertListeners) {
listener(event);
}
}
}
}
/**
* Get metric value from sample
*/
getMetricValue(sample, metric) {
const parts = metric.split('.');
let value = sample;
for (const part of parts) {
if (value && typeof value === 'object' && part in value) {
value = value[part];
}
else {
return undefined;
}
}
return typeof value === 'number' ? value : undefined;
}
/**
* Record an inference
*/
recordInference(duration) {
this.inferenceCount++;
this.inferenceTimes.push(duration);
}
/**
* Update queue length
*/
updateQueueLength(length) {
this.queueLength = length;
}
/**
* Update active count
*/
updateActiveCount(count) {
this.activeCount = count;
}
/**
* Update tensor memory
*/
updateTensorMemory(bytes) {
this.tensorMemory = bytes;
}
/**
* Update cache memory
*/
updateCacheMemory(bytes) {
this.cacheMemory = bytes;
}
/**
* Add an alert
*/
addAlert(config) {
this.alerts.push(config);
}
/**
* Remove an alert
*/
removeAlert(metric) {
this.alerts = this.alerts.filter(a => a.metric !== metric);
}
/**
* Subscribe to alerts
*/
onAlert(callback) {
this.alertListeners.push(callback);
return () => {
const idx = this.alertListeners.indexOf(callback);
if (idx !== -1)
this.alertListeners.splice(idx, 1);
};
}
/**
* Subscribe to samples
*/
onSample(callback) {
this.sampleListeners.push(callback);
return () => {
const idx = this.sampleListeners.indexOf(callback);
if (idx !== -1)
this.sampleListeners.splice(idx, 1);
};
}
/**
* Get current sample
*/
getCurrentSample() {
return this.samples[this.samples.length - 1];
}
/**
* Get all samples
*/
getSamples() {
return [...this.samples];
}
/**
* Get samples in time range
*/
getSamplesInRange(startTime, endTime) {
return this.samples.filter(s => s.timestamp >= startTime && s.timestamp <= endTime);
}
/**
* Get summary statistics
*/
getSummary() {
if (this.samples.length === 0) {
return {
avgInferenceTime: 0,
avgThroughput: 0,
avgMemoryUsage: 0,
avgFPS: 0,
totalInferences: 0,
uptime: 0,
};
}
const avgInferenceTime = this.samples.reduce((sum, s) => sum + s.inference.avgTime, 0) / this.samples.length;
const avgThroughput = this.samples.reduce((sum, s) => sum + s.inference.throughput, 0) / this.samples.length;
const avgMemoryUsage = this.samples.reduce((sum, s) => sum + s.memory.heapUsage, 0) / this.samples.length;
const avgFPS = this.samples.reduce((sum, s) => sum + s.system.fps, 0) / this.samples.length;
const totalInferences = this.samples.reduce((sum, s) => sum + s.inference.count, 0);
const firstSample = this.samples[0];
const lastSample = this.samples[this.samples.length - 1];
const uptime = lastSample.timestamp - firstSample.timestamp;
return {
avgInferenceTime,
avgThroughput,
avgMemoryUsage,
avgFPS,
totalInferences,
uptime,
};
}
/**
* Clear all data
*/
clear() {
this.samples = [];
this.inferenceCount = 0;
this.inferenceTimes = [];
this.queueLength = 0;
this.activeCount = 0;
this.tensorMemory = 0;
this.cacheMemory = 0;
}
/**
* Export data
*/
export() {
return {
samples: this.getSamples(),
summary: this.getSummary(),
config: this.config,
timestamp: Date.now(),
};
}
}
// ============================================================================
// Dashboard Generator
// ============================================================================
/**
* Generate HTML dashboard
*/
export function generateDashboardHTML(monitor) {
const summary = monitor.getSummary();
const samples = monitor.getSamples();
const lastSample = samples[samples.length - 1];
const formatBytes = (bytes) => {
if (bytes < 1024)
return `${bytes} B`;
if (bytes < 1024 * 1024)
return `${(bytes / 1024).toFixed(1)} KB`;
if (bytes < 1024 * 1024 * 1024)
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
};
const formatDuration = (ms) => {
if (ms < 1000)
return `${ms.toFixed(0)}ms`;
if (ms < 60000)
return `${(ms / 1000).toFixed(1)}s`;
return `${(ms / 60000).toFixed(1)}m`;
};
return `
edgeFlow.js Performance Dashboard
${summary.totalInferences.toLocaleString()}
${summary.avgInferenceTime.toFixed(1)}ms
${summary.avgThroughput.toFixed(1)}ops/s
${Math.round(summary.avgFPS)}
${formatBytes(lastSample?.memory.usedHeap ?? 0)}
${formatBytes(lastSample?.memory.tensorMemory ?? 0)}
${formatBytes(lastSample?.memory.cacheMemory ?? 0)}
${lastSample?.inference.queueLength ?? 0}
${generateChartPath(samples)}
Time
Inferences
Avg Time
Throughput
Memory
FPS
${samples.slice(-10).reverse().map(s => `
${new Date(s.timestamp).toLocaleTimeString()}
${s.inference.count}
${s.inference.avgTime.toFixed(2)}ms
${s.inference.throughput.toFixed(1)}/s
${formatBytes(s.memory.usedHeap)}
${s.system.fps}
`).join('')}
Generated at ${new Date().toLocaleString()} | edgeFlow.js Performance Monitor
`.trim();
}
/**
* Generate SVG chart path
*/
function generateChartPath(samples) {
if (samples.length < 2)
return '';
const width = 600;
const height = 180;
const padding = 10;
const times = samples.map(s => s.inference.avgTime);
const maxTime = Math.max(...times, 1);
const points = samples.map((s, i) => {
const x = padding + (i / (samples.length - 1)) * (width - 2 * padding);
const y = height - padding - (s.inference.avgTime / maxTime) * (height - 2 * padding);
return `${x},${y}`;
});
const linePath = `M ${points.join(' L ')}`;
const areaPath = `M ${padding},${height - padding} L ${points.join(' L ')} L ${width - padding},${height - padding} Z`;
// Grid lines
const gridLines = [];
for (let i = 0; i <= 4; i++) {
const y = padding + (i / 4) * (height - 2 * padding);
gridLines.push(` `);
}
return `
${gridLines.join('\n')}
`;
}
/**
* Generate ASCII dashboard
*/
export function generateAsciiDashboard(monitor) {
const summary = monitor.getSummary();
const samples = monitor.getSamples();
const lastSample = samples[samples.length - 1];
const formatBytes = (bytes) => {
if (bytes < 1024)
return `${bytes} B`;
if (bytes < 1024 * 1024)
return `${(bytes / 1024).toFixed(1)} KB`;
if (bytes < 1024 * 1024 * 1024)
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
};
const bar = (value, max, width = 20) => {
const filled = Math.round((value / max) * width);
return '█'.repeat(filled) + '░'.repeat(width - filled);
};
const lines = [
'╔══════════════════════════════════════════════════════════════════════════╗',
'║ edgeFlow.js Performance Monitor Dashboard ║',
'╠══════════════════════════════════════════════════════════════════════════╣',
'║ ║',
`║ Total Inferences: ${summary.totalInferences.toString().padStart(10)} ║`,
`║ Avg Inference: ${summary.avgInferenceTime.toFixed(2).padStart(10)}ms ║`,
`║ Throughput: ${summary.avgThroughput.toFixed(2).padStart(10)} ops/s ║`,
`║ Avg FPS: ${Math.round(summary.avgFPS).toString().padStart(10)} ║`,
'║ ║',
'╟──────────────────────────────────────────────────────────────────────────╢',
'║ Memory Usage ║',
`║ Heap: ${bar(summary.avgMemoryUsage, 1)} ${(summary.avgMemoryUsage * 100).toFixed(0).padStart(3)}% ║`,
`║ Used: ${formatBytes(lastSample?.memory.usedHeap ?? 0).padStart(10)} ║`,
`║ Tensor: ${formatBytes(lastSample?.memory.tensorMemory ?? 0).padStart(10)} ║`,
`║ Cache: ${formatBytes(lastSample?.memory.cacheMemory ?? 0).padStart(10)} ║`,
'║ ║',
'╟──────────────────────────────────────────────────────────────────────────╢',
'║ Inference Time History (last 30 samples) ║',
'║ ║',
];
// Add mini chart
const recentSamples = samples.slice(-30);
if (recentSamples.length > 0) {
const times = recentSamples.map(s => s.inference.avgTime);
const maxTime = Math.max(...times, 1);
const chartHeight = 5;
for (let row = chartHeight; row > 0; row--) {
let line = '║ ';
for (const time of times) {
const height = Math.ceil((time / maxTime) * chartHeight);
line += height >= row ? '▓' : ' ';
}
lines.push(line.padEnd(76) + '║');
}
lines.push('║ ' + '─'.repeat(30) + ' ║');
}
lines.push('║ ║');
lines.push(`║ Last updated: ${new Date().toLocaleString().padEnd(40)} ║`);
lines.push('╚══════════════════════════════════════════════════════════════════════════╝');
return lines.join('\n');
}
// ============================================================================
// Global Instance
// ============================================================================
let globalMonitor = null;
/**
* Get or create global monitor
*/
export function getMonitor(config) {
if (!globalMonitor || config) {
globalMonitor = new PerformanceMonitor(config);
}
return globalMonitor;
}
/**
* Start monitoring
*/
export function startMonitoring(config) {
const monitor = getMonitor(config);
monitor.start();
return monitor;
}
/**
* Stop monitoring
*/
export function stopMonitoring() {
globalMonitor?.stop();
}
// ============================================================================
// Exports
// ============================================================================
export default {
PerformanceMonitor,
getMonitor,
startMonitoring,
stopMonitoring,
generateDashboardHTML,
generateAsciiDashboard,
};
//# sourceMappingURL=monitor.js.map
================================================
FILE: dist/tools/quantization.d.ts
================================================
/**
* edgeFlow.js - Model Compression & Quantization Tools
*
* In-browser model quantization and compression utilities.
* Supports dynamic quantization (no calibration data needed).
*/
import { EdgeFlowTensor } from '../core/index.js';
/**
* Quantization type
*/
export type QuantizationType = 'int8' | 'uint8' | 'int4' | 'float16' | 'dynamic';
/**
* Quantization options
*/
export interface QuantizationOptions {
/** Quantization type */
type: QuantizationType;
/** Layers/ops to skip quantization (by name pattern) */
skipPatterns?: (string | RegExp)[];
/** Per-channel quantization (more accurate, larger model) */
perChannel?: boolean;
/** Symmetric quantization (simpler, slightly less accurate) */
symmetric?: boolean;
/** Progress callback */
onProgress?: (progress: QuantizationProgress) => void;
/** Minimum tensor size to quantize (in elements) */
minTensorSize?: number;
/** Keep original weights for comparison */
keepOriginal?: boolean;
}
/**
* Quantization progress
*/
export interface QuantizationProgress {
stage: 'analyzing' | 'quantizing' | 'packing' | 'complete';
current: number;
total: number;
percent: number;
layerName?: string;
}
/**
* Quantization result
*/
export interface QuantizationResult {
/** Quantized model data */
data: ArrayBuffer;
/** Original model size in bytes */
originalSize: number;
/** Quantized model size in bytes */
quantizedSize: number;
/** Compression ratio */
compressionRatio: number;
/** Number of tensors quantized */
tensorsQuantized: number;
/** Number of tensors skipped */
tensorsSkipped: number;
/** Quantization statistics per layer */
layerStats: LayerQuantizationStats[];
/** Overall statistics */
stats: QuantizationStats;
}
/**
* Layer quantization statistics
*/
export interface LayerQuantizationStats {
name: string;
originalDtype: string;
quantizedDtype: string;
originalSize: number;
quantizedSize: number;
scale: number | number[];
zeroPoint: number | number[];
minValue: number;
maxValue: number;
skipped: boolean;
skipReason?: string;
}
/**
* Overall quantization statistics
*/
export interface QuantizationStats {
totalParameters: number;
quantizedParameters: number;
averageScale: number;
minScale: number;
maxScale: number;
errorEstimate: number;
}
/**
* Dequantize int8 data back to float32
*/
export declare function dequantizeInt8(data: Int8Array, scale: number | Float32Array, zeroPoint: number | Int32Array, perChannel?: boolean, channelSize?: number): Float32Array;
/**
* Dequantize uint8 data back to float32
*/
export declare function dequantizeUint8(data: Uint8Array, scale: number | Float32Array, zeroPoint: number | Int32Array, perChannel?: boolean, channelSize?: number): Float32Array;
/**
* Convert float16 bits back to float32
*/
export declare function float16ToFloat32(value: number): number;
/**
* Dequantize float16 data back to float32
*/
export declare function dequantizeFloat16(data: Uint16Array): Float32Array;
/**
* Quantize a model
*/
export declare function quantizeModel(modelData: ArrayBuffer, options: QuantizationOptions): Promise;
/**
* Quantize a single EdgeFlowTensor
*/
export declare function quantizeTensor(tensor: EdgeFlowTensor, type: QuantizationType, options?: {
symmetric?: boolean;
perChannel?: boolean;
}): {
tensor: EdgeFlowTensor;
scale: number | number[];
zeroPoint: number | number[];
};
/**
* Dequantize a tensor back to float32
*/
export declare function dequantizeTensor(tensor: EdgeFlowTensor, scale: number | number[], zeroPoint: number | number[], type: QuantizationType): EdgeFlowTensor;
/**
* Pruning options
*/
export interface PruningOptions {
/** Pruning ratio (0-1, default: 0.5 = 50% sparsity) */
ratio?: number;
/** Pruning method */
method?: 'magnitude' | 'random' | 'structured';
/** For structured pruning: dimension to prune along */
dim?: number;
/** Minimum absolute value to keep */
threshold?: number;
/** Progress callback */
onProgress?: (progress: {
current: number;
total: number;
percent: number;
}) => void;
}
/**
* Pruning result
*/
export interface PruningResult {
/** Pruned model data */
data: ArrayBuffer;
/** Original size */
originalSize: number;
/** Pruned size (sparse representation) */
prunedSize: number;
/** Sparsity ratio achieved */
sparsity: number;
/** Number of parameters pruned */
parametersPruned: number;
/** Total parameters */
totalParameters: number;
}
/**
* Prune a tensor using magnitude-based pruning
*/
export declare function pruneTensor(tensor: EdgeFlowTensor, options?: PruningOptions): {
tensor: EdgeFlowTensor;
mask: EdgeFlowTensor;
sparsity: number;
};
/**
* Prune a model
*/
export declare function pruneModel(modelData: ArrayBuffer, options?: PruningOptions): Promise;
/**
* Model analysis result
*/
export interface ModelAnalysis {
/** Total model size in bytes */
totalSize: number;
/** Number of tensors */
tensorCount: number;
/** Total number of parameters */
totalParameters: number;
/** Parameter breakdown by dtype */
dtypeBreakdown: Record;
/** Largest tensors */
largestTensors: Array<{
name: string;
size: number;
shape: number[];
}>;
/** Estimated memory usage at runtime */
estimatedMemory: number;
/** Recommended quantization type */
recommendedQuantization: QuantizationType;
/** Estimated size after quantization */
estimatedQuantizedSizes: Record;
}
/**
* Analyze a model
*/
export declare function analyzeModel(modelData: ArrayBuffer): Promise;
/**
* Export format
*/
export type ExportFormat = 'onnx' | 'tflite' | 'edgeflow';
/**
* Export options
*/
export interface ExportOptions {
format: ExportFormat;
optimize?: boolean;
quantize?: QuantizationType;
}
/**
* Export a model to different formats
* Note: This is a placeholder - real implementation would require proper format conversion
*/
export declare function exportModel(modelData: ArrayBuffer, options: ExportOptions): Promise;
declare const _default: {
quantizeModel: typeof quantizeModel;
quantizeTensor: typeof quantizeTensor;
dequantizeTensor: typeof dequantizeTensor;
pruneModel: typeof pruneModel;
pruneTensor: typeof pruneTensor;
analyzeModel: typeof analyzeModel;
exportModel: typeof exportModel;
dequantizeInt8: typeof dequantizeInt8;
dequantizeUint8: typeof dequantizeUint8;
dequantizeFloat16: typeof dequantizeFloat16;
float16ToFloat32: typeof float16ToFloat32;
};
export default _default;
//# sourceMappingURL=quantization.d.ts.map
================================================
FILE: dist/tools/quantization.js
================================================
/**
* edgeFlow.js - Model Compression & Quantization Tools
*
* In-browser model quantization and compression utilities.
* Supports dynamic quantization (no calibration data needed).
*/
import { EdgeFlowTensor } from '../core/index.js';
// ============================================================================
// Quantization Core
// ============================================================================
/**
* Calculate quantization parameters for a tensor
*/
function calculateQuantParams(data, bits, symmetric, perChannel, channelAxis = 0, shape = []) {
const qmin = symmetric ? -(1 << (bits - 1)) : 0;
const qmax = symmetric ? (1 << (bits - 1)) - 1 : (1 << bits) - 1;
if (perChannel && shape.length > 1) {
// Per-channel quantization
const numChannels = shape[channelAxis] ?? 1;
const scales = new Float32Array(numChannels);
const zeroPoints = new Int32Array(numChannels);
const channelSize = data.length / numChannels;
let globalMin = Infinity;
let globalMax = -Infinity;
for (let c = 0; c < numChannels; c++) {
let min = Infinity;
let max = -Infinity;
for (let i = 0; i < channelSize; i++) {
const idx = c * channelSize + i;
const val = data[idx] ?? 0;
min = Math.min(min, val);
max = Math.max(max, val);
}
globalMin = Math.min(globalMin, min);
globalMax = Math.max(globalMax, max);
if (symmetric) {
const absMax = Math.max(Math.abs(min), Math.abs(max));
scales[c] = absMax / qmax;
zeroPoints[c] = 0;
}
else {
scales[c] = (max - min) / (qmax - qmin);
zeroPoints[c] = Math.round(qmin - min / (scales[c] || 1));
}
// Avoid division by zero
if (scales[c] === 0)
scales[c] = 1;
}
return { scale: scales, zeroPoint: zeroPoints, min: globalMin, max: globalMax };
}
else {
// Per-tensor quantization
let min = Infinity;
let max = -Infinity;
for (let i = 0; i < data.length; i++) {
const val = data[i] ?? 0;
min = Math.min(min, val);
max = Math.max(max, val);
}
let scale;
let zeroPoint;
if (symmetric) {
const absMax = Math.max(Math.abs(min), Math.abs(max));
scale = absMax / qmax;
zeroPoint = 0;
}
else {
scale = (max - min) / (qmax - qmin);
zeroPoint = Math.round(qmin - min / (scale || 1));
}
// Avoid division by zero
if (scale === 0)
scale = 1;
return { scale, zeroPoint, min, max };
}
}
/**
* Quantize float32 data to int8
*/
function quantizeToInt8(data, scale, zeroPoint, perChannel, channelSize = data.length) {
const result = new Int8Array(data.length);
if (perChannel && scale instanceof Float32Array) {
const numChannels = scale.length;
for (let c = 0; c < numChannels; c++) {
const s = scale[c] ?? 1;
const zp = zeroPoint[c] ?? 0;
for (let i = 0; i < channelSize; i++) {
const idx = c * channelSize + i;
const val = data[idx] ?? 0;
result[idx] = Math.max(-128, Math.min(127, Math.round(val / s + zp)));
}
}
}
else {
const s = scale;
const zp = zeroPoint;
for (let i = 0; i < data.length; i++) {
const val = data[i] ?? 0;
result[i] = Math.max(-128, Math.min(127, Math.round(val / s + zp)));
}
}
return result;
}
/**
* Quantize float32 data to uint8
*/
function quantizeToUint8(data, scale, zeroPoint, perChannel, channelSize = data.length) {
const result = new Uint8Array(data.length);
if (perChannel && scale instanceof Float32Array) {
const numChannels = scale.length;
for (let c = 0; c < numChannels; c++) {
const s = scale[c] ?? 1;
const zp = zeroPoint[c] ?? 0;
for (let i = 0; i < channelSize; i++) {
const idx = c * channelSize + i;
const val = data[idx] ?? 0;
result[idx] = Math.max(0, Math.min(255, Math.round(val / s + zp)));
}
}
}
else {
const s = scale;
const zp = zeroPoint;
for (let i = 0; i < data.length; i++) {
const val = data[i] ?? 0;
result[i] = Math.max(0, Math.min(255, Math.round(val / s + zp)));
}
}
return result;
}
/**
* Quantize float32 data to int4 (packed as uint8, 2 values per byte)
*/
function quantizeToInt4(data, scale, zeroPoint) {
const packedLength = Math.ceil(data.length / 2);
const result = new Uint8Array(packedLength);
for (let i = 0; i < data.length; i += 2) {
const val1 = data[i] ?? 0;
const val2 = data[i + 1] ?? 0;
// Quantize to range [-8, 7] then shift to [0, 15]
const q1 = Math.max(0, Math.min(15, Math.round(val1 / scale + zeroPoint + 8)));
const q2 = Math.max(0, Math.min(15, Math.round(val2 / scale + zeroPoint + 8)));
// Pack two 4-bit values into one byte
result[i >> 1] = (q1 << 4) | q2;
}
return result;
}
/**
* Convert float32 to float16 (stored in Uint16Array)
*/
function quantizeToFloat16(data) {
const result = new Uint16Array(data.length);
for (let i = 0; i < data.length; i++) {
result[i] = float32ToFloat16(data[i] ?? 0);
}
return result;
}
/**
* Convert a single float32 value to float16 bits
*/
function float32ToFloat16(value) {
const float32View = new Float32Array(1);
const int32View = new Int32Array(float32View.buffer);
float32View[0] = value;
const f = int32View[0];
const sign = (f >> 16) & 0x8000;
const exponent = ((f >> 23) & 0xff) - 127 + 15;
const mantissa = f & 0x7fffff;
if (exponent <= 0) {
// Denormalized or zero
if (exponent < -10) {
return sign;
}
const m = (mantissa | 0x800000) >> (1 - exponent);
return sign | (m >> 13);
}
else if (exponent >= 31) {
// Overflow to infinity
return sign | 0x7c00;
}
return sign | (exponent << 10) | (mantissa >> 13);
}
/**
* Dequantize int8 data back to float32
*/
export function dequantizeInt8(data, scale, zeroPoint, perChannel = false, channelSize = data.length) {
const result = new Float32Array(data.length);
if (perChannel && scale instanceof Float32Array) {
const numChannels = scale.length;
for (let c = 0; c < numChannels; c++) {
const s = scale[c] ?? 1;
const zp = zeroPoint[c] ?? 0;
for (let i = 0; i < channelSize; i++) {
const idx = c * channelSize + i;
result[idx] = ((data[idx] ?? 0) - zp) * s;
}
}
}
else {
const s = scale;
const zp = zeroPoint;
for (let i = 0; i < data.length; i++) {
result[i] = ((data[i] ?? 0) - zp) * s;
}
}
return result;
}
/**
* Dequantize uint8 data back to float32
*/
export function dequantizeUint8(data, scale, zeroPoint, perChannel = false, channelSize = data.length) {
const result = new Float32Array(data.length);
if (perChannel && scale instanceof Float32Array) {
const numChannels = scale.length;
for (let c = 0; c < numChannels; c++) {
const s = scale[c] ?? 1;
const zp = zeroPoint[c] ?? 0;
for (let i = 0; i < channelSize; i++) {
const idx = c * channelSize + i;
result[idx] = ((data[idx] ?? 0) - zp) * s;
}
}
}
else {
const s = scale;
const zp = zeroPoint;
for (let i = 0; i < data.length; i++) {
result[i] = ((data[i] ?? 0) - zp) * s;
}
}
return result;
}
/**
* Convert float16 bits back to float32
*/
export function float16ToFloat32(value) {
const sign = (value & 0x8000) >> 15;
const exponent = (value & 0x7c00) >> 10;
const mantissa = value & 0x03ff;
if (exponent === 0) {
if (mantissa === 0) {
return sign === 0 ? 0 : -0;
}
// Denormalized
return (sign === 0 ? 1 : -1) * Math.pow(2, -14) * (mantissa / 1024);
}
else if (exponent === 31) {
if (mantissa === 0) {
return sign === 0 ? Infinity : -Infinity;
}
return NaN;
}
return (sign === 0 ? 1 : -1) * Math.pow(2, exponent - 15) * (1 + mantissa / 1024);
}
/**
* Dequantize float16 data back to float32
*/
export function dequantizeFloat16(data) {
const result = new Float32Array(data.length);
for (let i = 0; i < data.length; i++) {
result[i] = float16ToFloat32(data[i] ?? 0);
}
return result;
}
/**
* Parse ONNX model to extract weights
* Note: This is a simplified parser for demonstration
*/
function parseModelWeights(modelData) {
// Check if it's an ONNX model by magic number
// const view = new DataView(modelData); // Reserved for future ONNX header parsing
const weights = [];
// Simple heuristic: look for float32 arrays in the buffer
// In a real implementation, we'd use proper ONNX parsing
const float32Array = new Float32Array(modelData);
// Create a single weight tensor from the model data
// This is a placeholder - real implementation would parse ONNX properly
weights.push({
name: 'model_weights',
data: float32Array,
shape: [float32Array.length],
dtype: 'float32',
});
return weights;
}
/**
* Serialize quantized model to ArrayBuffer
*/
function serializeQuantizedModel(model) {
// Create a simple binary format:
// Header: version (4 bytes) + type (4 bytes) + originalSize (8 bytes) + numWeights (4 bytes)
// For each weight: nameLen (4) + name + shapeLen (4) + shape + dtypeLen (4) + dtype +
// origDtypeLen (4) + origDtype + hasScale (1) + scale + hasZP (1) + zp + dataLen (8) + data
const encoder = new TextEncoder();
// Calculate total size
let totalSize = 20; // Header
for (const weight of model.weights) {
const nameBytes = encoder.encode(weight.name);
const dtypeBytes = encoder.encode(weight.dtype);
const origDtypeBytes = encoder.encode(weight.originalDtype);
totalSize += 4 + nameBytes.length; // name
totalSize += 4 + weight.shape.length * 4; // shape
totalSize += 4 + dtypeBytes.length; // dtype
totalSize += 4 + origDtypeBytes.length; // originalDtype
totalSize += 1; // hasScale
if (weight.scale !== undefined) {
totalSize += Array.isArray(weight.scale) ? 4 + weight.scale.length * 4 : 4;
}
totalSize += 1; // hasZeroPoint
if (weight.zeroPoint !== undefined) {
totalSize += Array.isArray(weight.zeroPoint) ? 4 + weight.zeroPoint.length * 4 : 4;
}
totalSize += 8 + weight.data.byteLength; // data
}
const buffer = new ArrayBuffer(totalSize);
const view = new DataView(buffer);
const uint8 = new Uint8Array(buffer);
let offset = 0;
// Write header
view.setUint32(offset, model.version, true);
offset += 4;
view.setUint32(offset, ['int8', 'uint8', 'int4', 'float16', 'dynamic'].indexOf(model.quantizationType), true);
offset += 4;
// Write originalSize as two 32-bit integers (for 64-bit compatibility)
view.setUint32(offset, model.originalSize & 0xFFFFFFFF, true);
offset += 4;
view.setUint32(offset, (model.originalSize / 0x100000000) >>> 0, true);
offset += 4;
view.setUint32(offset, model.weights.length, true);
offset += 4;
// Write weights
for (const weight of model.weights) {
const nameBytes = encoder.encode(weight.name);
const dtypeBytes = encoder.encode(weight.dtype);
const origDtypeBytes = encoder.encode(weight.originalDtype);
// Name
view.setUint32(offset, nameBytes.length, true);
offset += 4;
uint8.set(nameBytes, offset);
offset += nameBytes.length;
// Shape
view.setUint32(offset, weight.shape.length, true);
offset += 4;
for (const dim of weight.shape) {
view.setInt32(offset, dim, true);
offset += 4;
}
// Dtype
view.setUint32(offset, dtypeBytes.length, true);
offset += 4;
uint8.set(dtypeBytes, offset);
offset += dtypeBytes.length;
// Original dtype
view.setUint32(offset, origDtypeBytes.length, true);
offset += 4;
uint8.set(origDtypeBytes, offset);
offset += origDtypeBytes.length;
// Scale
if (weight.scale !== undefined) {
view.setUint8(offset, 1);
offset += 1;
if (Array.isArray(weight.scale)) {
view.setUint32(offset, weight.scale.length, true);
offset += 4;
for (const s of weight.scale) {
view.setFloat32(offset, s, true);
offset += 4;
}
}
else {
view.setUint32(offset, 1, true);
offset += 4;
view.setFloat32(offset, weight.scale, true);
offset += 4;
}
}
else {
view.setUint8(offset, 0);
offset += 1;
}
// Zero point
if (weight.zeroPoint !== undefined) {
view.setUint8(offset, 1);
offset += 1;
if (Array.isArray(weight.zeroPoint)) {
view.setUint32(offset, weight.zeroPoint.length, true);
offset += 4;
for (const zp of weight.zeroPoint) {
view.setInt32(offset, zp, true);
offset += 4;
}
}
else {
view.setUint32(offset, 1, true);
offset += 4;
view.setInt32(offset, weight.zeroPoint, true);
offset += 4;
}
}
else {
view.setUint8(offset, 0);
offset += 1;
}
// Data
const dataLow = weight.data.byteLength & 0xFFFFFFFF;
const dataHigh = (weight.data.byteLength / 0x100000000) >>> 0;
view.setUint32(offset, dataLow, true);
offset += 4;
view.setUint32(offset, dataHigh, true);
offset += 4;
uint8.set(new Uint8Array(weight.data), offset);
offset += weight.data.byteLength;
}
return buffer;
}
/**
* Quantize a model
*/
export async function quantizeModel(modelData, options) {
const { type, skipPatterns = [], perChannel = false, symmetric = true, onProgress, minTensorSize = 100, } = options;
const originalSize = modelData.byteLength;
const layerStats = [];
let tensorsQuantized = 0;
let tensorsSkipped = 0;
// Parse model weights
onProgress?.({ stage: 'analyzing', current: 0, total: 1, percent: 0 });
const weights = parseModelWeights(modelData);
const quantizedWeights = [];
let totalParams = 0;
let quantizedParams = 0;
const scales = [];
// Quantize each weight tensor
for (let i = 0; i < weights.length; i++) {
const weight = weights[i];
const percent = ((i + 1) / weights.length) * 100;
onProgress?.({
stage: 'quantizing',
current: i + 1,
total: weights.length,
percent,
layerName: weight.name,
});
totalParams += weight.data.length;
// Check if should skip
const shouldSkip = weight.data.length < minTensorSize ||
skipPatterns.some(pattern => {
if (typeof pattern === 'string') {
return weight.name.includes(pattern);
}
return pattern.test(weight.name);
});
if (shouldSkip) {
tensorsSkipped++;
layerStats.push({
name: weight.name,
originalDtype: weight.dtype,
quantizedDtype: weight.dtype,
originalSize: weight.data.byteLength,
quantizedSize: weight.data.byteLength,
scale: 1,
zeroPoint: 0,
minValue: Math.min(...weight.data),
maxValue: Math.max(...weight.data),
skipped: true,
skipReason: weight.data.length < minTensorSize
? 'Tensor too small'
: 'Matched skip pattern',
});
quantizedWeights.push({
name: weight.name,
data: weight.data.buffer.slice(0),
shape: weight.shape,
dtype: weight.dtype,
originalDtype: weight.dtype,
});
continue;
}
// Calculate quantization parameters
const bits = type === 'int4' ? 4 : 8;
const params = calculateQuantParams(weight.data, bits, symmetric, perChannel, 0, weight.shape);
// Quantize data
let quantizedData;
let quantizedDtype;
switch (type) {
case 'int8':
const int8Data = quantizeToInt8(weight.data, params.scale, params.zeroPoint, perChannel, perChannel ? weight.data.length / (weight.shape[0] ?? 1) : weight.data.length);
quantizedData = int8Data.buffer.slice(0);
quantizedDtype = 'int8';
break;
case 'uint8':
const uint8Data = quantizeToUint8(weight.data, params.scale, params.zeroPoint, perChannel, perChannel ? weight.data.length / (weight.shape[0] ?? 1) : weight.data.length);
quantizedData = uint8Data.buffer.slice(0);
quantizedDtype = 'uint8';
break;
case 'int4':
const int4Data = quantizeToInt4(weight.data, params.scale, params.zeroPoint);
quantizedData = int4Data.buffer.slice(0);
quantizedDtype = 'int4';
break;
case 'float16':
const fp16Data = quantizeToFloat16(weight.data);
quantizedData = fp16Data.buffer.slice(0);
quantizedDtype = 'float16';
break;
case 'dynamic':
default:
// Dynamic quantization: use int8 for weights
const dynData = quantizeToInt8(weight.data, params.scale, params.zeroPoint, perChannel, perChannel ? weight.data.length / (weight.shape[0] ?? 1) : weight.data.length);
quantizedData = dynData.buffer.slice(0);
quantizedDtype = 'int8';
break;
}
tensorsQuantized++;
quantizedParams += weight.data.length;
const scaleValue = params.scale instanceof Float32Array
? Array.from(params.scale)
: params.scale;
const zpValue = params.zeroPoint instanceof Int32Array
? Array.from(params.zeroPoint)
: params.zeroPoint;
if (typeof scaleValue === 'number') {
scales.push(scaleValue);
}
else {
scales.push(...scaleValue);
}
layerStats.push({
name: weight.name,
originalDtype: weight.dtype,
quantizedDtype,
originalSize: weight.data.byteLength,
quantizedSize: quantizedData.byteLength,
scale: scaleValue,
zeroPoint: zpValue,
minValue: params.min,
maxValue: params.max,
skipped: false,
});
quantizedWeights.push({
name: weight.name,
data: quantizedData,
shape: weight.shape,
dtype: quantizedDtype,
originalDtype: weight.dtype,
scale: scaleValue,
zeroPoint: zpValue,
});
}
// Pack into final format
onProgress?.({ stage: 'packing', current: 0, total: 1, percent: 0 });
const quantizedModel = {
version: 1,
quantizationType: type,
originalSize,
weights: quantizedWeights,
};
const quantizedData = serializeQuantizedModel(quantizedModel);
onProgress?.({ stage: 'complete', current: 1, total: 1, percent: 100 });
// Calculate statistics
const avgScale = scales.length > 0
? scales.reduce((a, b) => a + b, 0) / scales.length
: 1;
const minScale = scales.length > 0 ? Math.min(...scales) : 1;
const maxScale = scales.length > 0 ? Math.max(...scales) : 1;
// Estimate quantization error (very rough approximation)
const bitsReduction = type === 'int4' ? 8 : type === 'float16' ? 2 : 4;
const errorEstimate = avgScale / bitsReduction;
return {
data: quantizedData,
originalSize,
quantizedSize: quantizedData.byteLength,
compressionRatio: originalSize / quantizedData.byteLength,
tensorsQuantized,
tensorsSkipped,
layerStats,
stats: {
totalParameters: totalParams,
quantizedParameters: quantizedParams,
averageScale: avgScale,
minScale,
maxScale,
errorEstimate,
},
};
}
// ============================================================================
// Tensor Quantization (for individual tensors)
// ============================================================================
/**
* Quantize a single EdgeFlowTensor
*/
export function quantizeTensor(tensor, type, options = {}) {
const { symmetric = true, perChannel = false } = options;
const data = tensor.toFloat32Array();
const shape = tensor.shape;
const bits = type === 'int4' ? 4 : 8;
const params = calculateQuantParams(data, bits, symmetric, perChannel, 0, shape);
let quantizedData;
let dtype;
switch (type) {
case 'int8':
quantizedData = quantizeToInt8(data, params.scale, params.zeroPoint, perChannel);
dtype = 'int32'; // Store as int32 since we don't have int8 dtype
break;
case 'uint8':
quantizedData = quantizeToUint8(data, params.scale, params.zeroPoint, perChannel);
dtype = 'int32';
break;
case 'float16':
quantizedData = quantizeToFloat16(data);
dtype = 'float32'; // Will be stored differently
break;
default:
quantizedData = quantizeToInt8(data, params.scale, params.zeroPoint, perChannel);
dtype = 'int32';
}
const scaleValue = params.scale instanceof Float32Array
? Array.from(params.scale)
: params.scale;
const zpValue = params.zeroPoint instanceof Int32Array
? Array.from(params.zeroPoint)
: params.zeroPoint;
return {
tensor: new EdgeFlowTensor(Array.from(quantizedData), shape, dtype),
scale: scaleValue,
zeroPoint: zpValue,
};
}
/**
* Dequantize a tensor back to float32
*/
export function dequantizeTensor(tensor, scale, zeroPoint, type) {
const data = tensor.toArray();
const shape = tensor.shape;
let dequantizedData;
const scaleArr = Array.isArray(scale) ? new Float32Array(scale) : scale;
const zpArr = Array.isArray(zeroPoint) ? new Int32Array(zeroPoint) : zeroPoint;
const perChannel = Array.isArray(scale);
switch (type) {
case 'int8':
dequantizedData = dequantizeInt8(new Int8Array(data.map(Number)), scaleArr, zpArr, perChannel);
break;
case 'uint8':
dequantizedData = dequantizeUint8(new Uint8Array(data.map(Number)), scaleArr, zpArr, perChannel);
break;
case 'float16':
dequantizedData = dequantizeFloat16(new Uint16Array(data.map(Number)));
break;
default:
dequantizedData = dequantizeInt8(new Int8Array(data.map(Number)), scaleArr, zpArr, perChannel);
}
return new EdgeFlowTensor(Array.from(dequantizedData), shape, 'float32');
}
/**
* Prune a tensor using magnitude-based pruning
*/
export function pruneTensor(tensor, options = {}) {
const { ratio = 0.5, method = 'magnitude', threshold } = options;
const data = tensor.toFloat32Array();
const shape = tensor.shape;
const mask = new Float32Array(data.length);
const prunedData = new Float32Array(data.length);
let prunedCount = 0;
if (method === 'magnitude') {
// Get threshold based on ratio
const absValues = Array.from(data).map(Math.abs).sort((a, b) => a - b);
const thresholdIndex = Math.floor(absValues.length * ratio);
const computedThreshold = threshold ?? (absValues[thresholdIndex] ?? 0);
for (let i = 0; i < data.length; i++) {
if (Math.abs(data[i] ?? 0) > computedThreshold) {
mask[i] = 1;
prunedData[i] = data[i] ?? 0;
}
else {
mask[i] = 0;
prunedData[i] = 0;
prunedCount++;
}
}
}
else if (method === 'random') {
for (let i = 0; i < data.length; i++) {
if (Math.random() > ratio) {
mask[i] = 1;
prunedData[i] = data[i] ?? 0;
}
else {
mask[i] = 0;
prunedData[i] = 0;
prunedCount++;
}
}
}
return {
tensor: new EdgeFlowTensor(Array.from(prunedData), shape, 'float32'),
mask: new EdgeFlowTensor(Array.from(mask), shape, 'float32'),
sparsity: prunedCount / data.length,
};
}
/**
* Prune a model
*/
export async function pruneModel(modelData, options = {}) {
const { onProgress } = options;
onProgress?.({ current: 0, total: 1, percent: 0 });
// This is a simplified implementation
// Real implementation would parse the model properly
const weights = parseModelWeights(modelData);
let totalParams = 0;
let prunedParams = 0;
for (const weight of weights) {
totalParams += weight.data.length;
const tensor = new EdgeFlowTensor(Array.from(weight.data), weight.shape, 'float32');
const { sparsity } = pruneTensor(tensor, options);
prunedParams += Math.floor(weight.data.length * sparsity);
}
onProgress?.({ current: 1, total: 1, percent: 100 });
return {
data: modelData, // In a real implementation, we'd create a sparse format
originalSize: modelData.byteLength,
prunedSize: modelData.byteLength, // Would be smaller with sparse format
sparsity: prunedParams / totalParams,
parametersPruned: prunedParams,
totalParameters: totalParams,
};
}
/**
* Analyze a model
*/
export async function analyzeModel(modelData) {
const weights = parseModelWeights(modelData);
const totalSize = modelData.byteLength;
const dtypeBreakdown = {};
let totalParams = 0;
const tensorInfos = [];
for (const weight of weights) {
totalParams += weight.data.length;
const bytesPerElement = weight.dtype === 'float32' ? 4
: weight.dtype === 'float16' ? 2
: weight.dtype === 'int8' ? 1
: 4;
const size = weight.data.length * bytesPerElement;
if (!dtypeBreakdown[weight.dtype]) {
dtypeBreakdown[weight.dtype] = { count: 0, size: 0 };
}
dtypeBreakdown[weight.dtype].count++;
dtypeBreakdown[weight.dtype].size += size;
tensorInfos.push({
name: weight.name,
size,
shape: weight.shape,
});
}
// Sort by size and get top 10
tensorInfos.sort((a, b) => b.size - a.size);
const largestTensors = tensorInfos.slice(0, 10);
// Estimate quantized sizes
const estimatedQuantizedSizes = {
int8: Math.ceil(totalSize / 4),
uint8: Math.ceil(totalSize / 4),
int4: Math.ceil(totalSize / 8),
float16: Math.ceil(totalSize / 2),
dynamic: Math.ceil(totalSize / 4),
};
// Recommend quantization based on model size
let recommendedQuantization = 'dynamic';
if (totalSize > 500 * 1024 * 1024) {
recommendedQuantization = 'int4';
}
else if (totalSize > 100 * 1024 * 1024) {
recommendedQuantization = 'int8';
}
else if (totalSize > 50 * 1024 * 1024) {
recommendedQuantization = 'float16';
}
return {
totalSize,
tensorCount: weights.length,
totalParameters: totalParams,
dtypeBreakdown,
largestTensors,
estimatedMemory: totalParams * 4, // Assuming float32 at runtime
recommendedQuantization,
estimatedQuantizedSizes,
};
}
/**
* Export a model to different formats
* Note: This is a placeholder - real implementation would require proper format conversion
*/
export async function exportModel(modelData, options) {
const { format, quantize } = options;
// Apply quantization if requested
let data = modelData;
if (quantize) {
const result = await quantizeModel(modelData, { type: quantize });
data = result.data;
}
// Format conversion would happen here
// For now, we just return the (possibly quantized) data
switch (format) {
case 'edgeflow':
return data;
case 'onnx':
// Would convert to ONNX format
return data;
case 'tflite':
// Would convert to TFLite format
return data;
default:
return data;
}
}
// ============================================================================
// Exports
// ============================================================================
export default {
quantizeModel,
quantizeTensor,
dequantizeTensor,
pruneModel,
pruneTensor,
analyzeModel,
exportModel,
dequantizeInt8,
dequantizeUint8,
dequantizeFloat16,
float16ToFloat32,
};
//# sourceMappingURL=quantization.js.map
================================================
FILE: dist/utils/cache.d.ts
================================================
/**
* edgeFlow.js - Caching Utilities
*
* Smart caching for models, tensors, and inference results.
*/
/**
* Cache strategy types
*/
export type CacheStrategy = 'lru' | 'lfu' | 'fifo' | 'ttl';
/**
* Cache options
*/
export interface CacheOptions {
/** Cache strategy */
strategy?: CacheStrategy;
/** Maximum cache size in bytes */
maxSize?: number;
/** Maximum number of entries */
maxEntries?: number;
/** Default TTL in milliseconds */
ttl?: number;
/** Enable persistence to IndexedDB */
persistent?: boolean;
/** Cache name for persistence */
name?: string;
}
/**
* Cache statistics
*/
export interface CacheStats {
/** Number of entries */
entries: number;
/** Total size in bytes */
size: number;
/** Cache hits */
hits: number;
/** Cache misses */
misses: number;
/** Hit rate (0-1) */
hitRate: number;
}
/**
* Cache - Generic cache implementation
*/
export declare class Cache {
private readonly options;
private readonly cache;
private currentSize;
private hits;
private misses;
constructor(options?: CacheOptions);
/**
* Get value from cache
*/
get(key: string): T | undefined;
/**
* Set value in cache
*/
set(key: string, value: T, size: number, ttl?: number): void;
/**
* Check if key exists
*/
has(key: string): boolean;
/**
* Delete entry
*/
delete(key: string): boolean;
/**
* Clear the cache
*/
clear(): void;
/**
* Get cache statistics
*/
getStats(): CacheStats;
/**
* Evict an entry based on strategy
*/
private evict;
/**
* Find least recently used entry
*/
private findLRU;
/**
* Find least frequently used entry
*/
private findLFU;
/**
* Find oldest entry (FIFO)
*/
private findOldest;
/**
* Find expired entry
*/
private findExpired;
/**
* Load cache from IndexedDB
*/
private loadFromStorage;
/**
* Save cache to IndexedDB
*/
private saveToStorage;
/**
* Clear IndexedDB storage
*/
private clearStorage;
/**
* Open IndexedDB database
*/
private openDB;
}
/**
* InferenceCache - Cache for inference results
*/
export declare class InferenceCache extends Cache {
/**
* Generate cache key from input
*/
generateKey(modelId: string, input: Float32Array | number[]): string;
/**
* Simple hash function for arrays
*/
private hashArray;
}
/**
* Model download cache using Cache API
*/
export declare class ModelDownloadCache {
private readonly cacheName;
private cache;
constructor(cacheName?: string);
/**
* Initialize cache
*/
private ensureCache;
/**
* Get cached response
*/
get(url: string): Promise;
/**
* Store response in cache
*/
put(url: string, response: Response): Promise;
/**
* Delete cached response
*/
delete(url: string): Promise;
/**
* Clear all cached models
*/
clear(): Promise;
/**
* Get all cached URLs
*/
keys(): Promise;
}
/**
* Create a cache with common presets
*/
export declare function createCache(preset?: 'small' | 'medium' | 'large' | 'custom', options?: CacheOptions): Cache;
//# sourceMappingURL=cache.d.ts.map
================================================
FILE: dist/utils/cache.js
================================================
/**
* edgeFlow.js - Caching Utilities
*
* Smart caching for models, tensors, and inference results.
*/
// ============================================================================
// Cache Implementation
// ============================================================================
/**
* Cache - Generic cache implementation
*/
export class Cache {
options;
cache = new Map();
currentSize = 0;
hits = 0;
misses = 0;
constructor(options = {}) {
this.options = {
strategy: options.strategy ?? 'lru',
maxSize: options.maxSize ?? 100 * 1024 * 1024, // 100MB
maxEntries: options.maxEntries ?? 1000,
ttl: options.ttl ?? 0, // 0 = no TTL
persistent: options.persistent ?? false,
name: options.name ?? 'edgeflow-cache',
};
// Load from persistent storage if enabled
if (this.options.persistent) {
this.loadFromStorage();
}
}
/**
* Get value from cache
*/
get(key) {
const entry = this.cache.get(key);
if (!entry) {
this.misses++;
return undefined;
}
// Check TTL
if (entry.ttl && Date.now() - entry.createdAt > entry.ttl) {
this.delete(key);
this.misses++;
return undefined;
}
// Update access stats
entry.accessedAt = Date.now();
entry.accessCount++;
this.hits++;
return entry.value;
}
/**
* Set value in cache
*/
set(key, value, size, ttl) {
// Remove existing entry if present
if (this.cache.has(key)) {
this.delete(key);
}
// Evict entries if necessary
while ((this.currentSize + size > this.options.maxSize ||
this.cache.size >= this.options.maxEntries) &&
this.cache.size > 0) {
this.evict();
}
// Determine TTL value
const entryTtl = ttl !== undefined ? ttl : (this.options.ttl > 0 ? this.options.ttl : undefined);
// Add new entry
const entry = {
value,
size,
createdAt: Date.now(),
accessedAt: Date.now(),
accessCount: 1,
ttl: entryTtl,
};
this.cache.set(key, entry);
this.currentSize += size;
// Persist if enabled
if (this.options.persistent) {
this.saveToStorage();
}
}
/**
* Check if key exists
*/
has(key) {
const entry = this.cache.get(key);
if (!entry)
return false;
// Check TTL
if (entry.ttl && Date.now() - entry.createdAt > entry.ttl) {
this.delete(key);
return false;
}
return true;
}
/**
* Delete entry
*/
delete(key) {
const entry = this.cache.get(key);
if (entry) {
this.currentSize -= entry.size;
this.cache.delete(key);
if (this.options.persistent) {
this.saveToStorage();
}
return true;
}
return false;
}
/**
* Clear the cache
*/
clear() {
this.cache.clear();
this.currentSize = 0;
this.hits = 0;
this.misses = 0;
if (this.options.persistent) {
this.clearStorage();
}
}
/**
* Get cache statistics
*/
getStats() {
const total = this.hits + this.misses;
return {
entries: this.cache.size,
size: this.currentSize,
hits: this.hits,
misses: this.misses,
hitRate: total > 0 ? this.hits / total : 0,
};
}
/**
* Evict an entry based on strategy
*/
evict() {
let keyToEvict = null;
switch (this.options.strategy) {
case 'lru':
keyToEvict = this.findLRU();
break;
case 'lfu':
keyToEvict = this.findLFU();
break;
case 'fifo':
keyToEvict = this.findOldest();
break;
case 'ttl':
keyToEvict = this.findExpired() ?? this.findOldest();
break;
}
if (keyToEvict) {
this.delete(keyToEvict);
}
}
/**
* Find least recently used entry
*/
findLRU() {
let oldest = null;
let oldestTime = Infinity;
for (const [key, entry] of this.cache) {
if (entry.accessedAt < oldestTime) {
oldestTime = entry.accessedAt;
oldest = key;
}
}
return oldest;
}
/**
* Find least frequently used entry
*/
findLFU() {
let lfu = null;
let minCount = Infinity;
for (const [key, entry] of this.cache) {
if (entry.accessCount < minCount) {
minCount = entry.accessCount;
lfu = key;
}
}
return lfu;
}
/**
* Find oldest entry (FIFO)
*/
findOldest() {
let oldest = null;
let oldestTime = Infinity;
for (const [key, entry] of this.cache) {
if (entry.createdAt < oldestTime) {
oldestTime = entry.createdAt;
oldest = key;
}
}
return oldest;
}
/**
* Find expired entry
*/
findExpired() {
const now = Date.now();
for (const [key, entry] of this.cache) {
if (entry.ttl && now - entry.createdAt > entry.ttl) {
return key;
}
}
return null;
}
/**
* Load cache from IndexedDB
*/
async loadFromStorage() {
if (typeof indexedDB === 'undefined')
return;
try {
const db = await this.openDB();
const tx = db.transaction('cache', 'readonly');
const store = tx.objectStore('cache');
const request = store.getAll();
return new Promise((resolve, reject) => {
request.onsuccess = () => {
const entries = request.result;
for (const { key, entry } of entries) {
this.cache.set(key, entry);
this.currentSize += entry.size;
}
resolve();
};
request.onerror = () => reject(request.error);
});
}
catch {
// Ignore storage errors
}
}
/**
* Save cache to IndexedDB
*/
async saveToStorage() {
if (typeof indexedDB === 'undefined')
return;
try {
const db = await this.openDB();
const tx = db.transaction('cache', 'readwrite');
const store = tx.objectStore('cache');
// Clear existing entries
store.clear();
// Add current entries
for (const [key, entry] of this.cache) {
store.put({ key, entry });
}
return new Promise((resolve, reject) => {
tx.oncomplete = () => resolve();
tx.onerror = () => reject(tx.error);
});
}
catch {
// Ignore storage errors
}
}
/**
* Clear IndexedDB storage
*/
async clearStorage() {
if (typeof indexedDB === 'undefined')
return;
try {
const db = await this.openDB();
const tx = db.transaction('cache', 'readwrite');
const store = tx.objectStore('cache');
store.clear();
}
catch {
// Ignore storage errors
}
}
/**
* Open IndexedDB database
*/
openDB() {
return new Promise((resolve, reject) => {
const request = indexedDB.open(this.options.name, 1);
request.onupgradeneeded = () => {
const db = request.result;
if (!db.objectStoreNames.contains('cache')) {
db.createObjectStore('cache', { keyPath: 'key' });
}
};
request.onsuccess = () => resolve(request.result);
request.onerror = () => reject(request.error);
});
}
}
// ============================================================================
// Inference Result Cache
// ============================================================================
/**
* InferenceCache - Cache for inference results
*/
export class InferenceCache extends Cache {
/**
* Generate cache key from input
*/
generateKey(modelId, input) {
// Create hash from input data
const inputArray = Array.isArray(input) ? input : Array.from(input);
const hash = this.hashArray(inputArray);
return `${modelId}:${hash}`;
}
/**
* Simple hash function for arrays
*/
hashArray(arr) {
let hash = 0;
const sample = arr.length > 100
? arr.filter((_, i) => i % Math.floor(arr.length / 100) === 0)
: arr;
for (let i = 0; i < sample.length; i++) {
const value = sample[i] ?? 0;
hash = ((hash << 5) - hash) + (value * 1000 | 0);
hash |= 0;
}
return hash.toString(36);
}
}
// ============================================================================
// Model Cache
// ============================================================================
/**
* Model download cache using Cache API
*/
export class ModelDownloadCache {
cacheName;
cache = null;
constructor(cacheName = 'edgeflow-models') {
this.cacheName = cacheName;
}
/**
* Initialize cache
*/
async ensureCache() {
if (!this.cache) {
if (typeof caches === 'undefined') {
throw new Error('Cache API is not available');
}
this.cache = await caches.open(this.cacheName);
}
return this.cache;
}
/**
* Get cached response
*/
async get(url) {
try {
const cache = await this.ensureCache();
return await cache.match(url) ?? undefined;
}
catch {
return undefined;
}
}
/**
* Store response in cache
*/
async put(url, response) {
try {
const cache = await this.ensureCache();
await cache.put(url, response.clone());
}
catch {
// Ignore cache errors
}
}
/**
* Delete cached response
*/
async delete(url) {
try {
const cache = await this.ensureCache();
return await cache.delete(url);
}
catch {
return false;
}
}
/**
* Clear all cached models
*/
async clear() {
try {
await caches.delete(this.cacheName);
this.cache = null;
}
catch {
// Ignore cache errors
}
}
/**
* Get all cached URLs
*/
async keys() {
try {
const cache = await this.ensureCache();
const requests = await cache.keys();
return requests.map(r => r.url);
}
catch {
return [];
}
}
}
// ============================================================================
// Factory Functions
// ============================================================================
/**
* Create a cache with common presets
*/
export function createCache(preset = 'medium', options = {}) {
const presets = {
small: {
maxSize: 10 * 1024 * 1024, // 10MB
maxEntries: 100,
},
medium: {
maxSize: 100 * 1024 * 1024, // 100MB
maxEntries: 500,
},
large: {
maxSize: 500 * 1024 * 1024, // 500MB
maxEntries: 2000,
},
custom: {},
};
return new Cache({ ...presets[preset], ...options });
}
//# sourceMappingURL=cache.js.map
================================================
FILE: dist/utils/hub.d.ts
================================================
/**
* edgeFlow.js - Hugging Face Hub Integration
*
* Automatically download models, tokenizers, and configs from Hugging Face Hub.
*/
import { type DownloadProgress } from './model-loader.js';
import { Tokenizer } from './tokenizer.js';
/**
* Hub options
*/
export interface HubOptions {
/** HuggingFace API endpoint (default: https://huggingface.co) */
endpoint?: string;
/** Model revision/branch (default: main) */
revision?: string;
/** Subfolder within the repo */
subfolder?: string;
/** Enable caching */
cache?: boolean;
/** Force re-download */
forceDownload?: boolean;
/** Progress callback */
onProgress?: (progress: HubDownloadProgress) => void;
/** HuggingFace API token (for private repos) */
token?: string;
}
/**
* Download progress for hub
*/
export interface HubDownloadProgress {
/** Current file being downloaded */
file: string;
/** File index (1-based) */
fileIndex: number;
/** Total files */
totalFiles: number;
/** File download progress */
fileProgress: DownloadProgress;
/** Overall progress (0-100) */
overallProgress: number;
}
/**
* Model info from config.json
*/
export interface ModelConfig {
model_type?: string;
architectures?: string[];
hidden_size?: number;
num_attention_heads?: number;
num_hidden_layers?: number;
vocab_size?: number;
max_position_embeddings?: number;
type_vocab_size?: number;
id2label?: Record;
label2id?: Record;
[key: string]: unknown;
}
/**
* Downloaded model bundle
*/
export interface ModelBundle {
/** Model ID */
modelId: string;
/** Model data (ArrayBuffer) */
modelData: ArrayBuffer;
/** Tokenizer instance */
tokenizer?: Tokenizer;
/** Model config */
config?: ModelConfig;
/** Model files info */
files: {
model?: string;
tokenizer?: string;
config?: string;
};
}
/**
* Download a file from HuggingFace Hub
*/
export declare function downloadFile(modelId: string, filename: string, options?: HubOptions): Promise;
/**
* Download JSON file from HuggingFace Hub
*/
export declare function downloadJson(modelId: string, filename: string, options?: HubOptions): Promise;
/**
* Download tokenizer from HuggingFace Hub
*/
export declare function downloadTokenizer(modelId: string, options?: HubOptions): Promise;
/**
* Download model config from HuggingFace Hub
*/
export declare function downloadConfig(modelId: string, options?: HubOptions): Promise;
/**
* Download complete model bundle (model + tokenizer + config)
*/
export declare function downloadModel(modelId: string, options?: HubOptions): Promise;
/**
* Load a model from HuggingFace Hub
*
* @example
* ```typescript
* // Load a sentiment analysis model
* const bundle = await fromHub('Xenova/distilbert-base-uncased-finetuned-sst-2-english');
*
* // Use with edgeFlow
* const model = await loadModelFromBuffer(bundle.modelData);
* const tokens = bundle.tokenizer.encode('I love this!');
* ```
*/
export declare function fromHub(modelId: string, options?: HubOptions): Promise;
/**
* Check if a model exists on HuggingFace Hub
*/
export declare function modelExists(modelId: string, options?: HubOptions): Promise;
/**
* Get model info from HuggingFace Hub
*/
export declare function getModelInfo(modelId: string, options?: HubOptions): Promise<{
hasOnnx: boolean;
onnxFile?: string;
hasTokenizer: boolean;
hasConfig: boolean;
config?: ModelConfig;
}>;
/**
* Pre-configured popular models
*/
export declare const POPULAR_MODELS: {
readonly 'sentiment-analysis': "Xenova/distilbert-base-uncased-finetuned-sst-2-english";
readonly 'text-classification': "Xenova/distilbert-base-uncased-finetuned-sst-2-english";
readonly 'feature-extraction': "Xenova/all-MiniLM-L6-v2";
readonly 'sentence-similarity': "Xenova/all-MiniLM-L6-v2";
readonly 'question-answering': "Xenova/distilbert-base-cased-distilled-squad";
readonly ner: "Xenova/bert-base-NER";
readonly 'token-classification': "Xenova/bert-base-NER";
readonly 'text-generation': "Xenova/gpt2";
readonly 'translation-en-fr': "Xenova/t5-small";
readonly 'translation-en-de': "Xenova/t5-small";
readonly summarization: "Xenova/distilbart-cnn-6-6";
readonly 'fill-mask': "Xenova/bert-base-uncased";
readonly 'image-classification': "Xenova/vit-base-patch16-224";
readonly 'object-detection': "Xenova/detr-resnet-50";
readonly 'image-segmentation': "Xenova/segformer-b0-finetuned-ade-512-512";
readonly 'zero-shot-classification': "Xenova/mobilebert-uncased-mnli";
readonly 'automatic-speech-recognition': "Xenova/whisper-tiny.en";
readonly 'text-to-speech': "Xenova/speecht5_tts";
};
export type PopularModelTask = keyof typeof POPULAR_MODELS;
/**
* Get the default model ID for a task
*/
export declare function getDefaultModel(task: PopularModelTask): string;
/**
* Load a model by task name
*
* @example
* ```typescript
* const bundle = await fromTask('sentiment-analysis');
* ```
*/
export declare function fromTask(task: PopularModelTask, options?: HubOptions): Promise;
//# sourceMappingURL=hub.d.ts.map
================================================
FILE: dist/utils/hub.js
================================================
/**
* edgeFlow.js - Hugging Face Hub Integration
*
* Automatically download models, tokenizers, and configs from Hugging Face Hub.
*/
import { loadModelData, isModelCached } from './model-loader.js';
import { Tokenizer } from './tokenizer.js';
import { EdgeFlowError, ErrorCodes } from '../core/types.js';
// ============================================================================
// Constants
// ============================================================================
const DEFAULT_ENDPOINT = 'https://huggingface.co';
const DEFAULT_REVISION = 'main';
/**
* Common ONNX model file patterns (in order of preference)
*/
const ONNX_MODEL_FILES = [
'model.onnx',
'model_quantized.onnx',
'model_int8.onnx',
'model_uint8.onnx',
'model_fp16.onnx',
'onnx/model.onnx',
'onnx/model_quantized.onnx',
];
// ============================================================================
// Hub API
// ============================================================================
/**
* Build URL for a file in a HuggingFace repo
*/
function buildFileUrl(modelId, filename, options = {}) {
const endpoint = options.endpoint ?? DEFAULT_ENDPOINT;
const revision = options.revision ?? DEFAULT_REVISION;
const subfolder = options.subfolder ? `${options.subfolder}/` : '';
return `${endpoint}/${modelId}/resolve/${revision}/${subfolder}${filename}`;
}
/**
* Fetch with optional auth token
*/
async function fetchWithAuth(url, token) {
const headers = {};
if (token) {
headers['Authorization'] = `Bearer ${token}`;
}
const response = await fetch(url, { headers });
return response;
}
/**
* Check if a file exists in a repo
*/
async function fileExists(modelId, filename, options = {}) {
const url = buildFileUrl(modelId, filename, options);
try {
const response = await fetchWithAuth(url, options.token);
// HuggingFace returns 302 redirect for existing files
return response.ok || response.status === 302;
}
catch {
return false;
}
}
/**
* Find the best ONNX model file in a repo
*/
async function findOnnxModel(modelId, options = {}) {
// Try common file patterns
for (const filename of ONNX_MODEL_FILES) {
if (await fileExists(modelId, filename, options)) {
return filename;
}
}
return null;
}
/**
* Download a file from HuggingFace Hub
*/
export async function downloadFile(modelId, filename, options = {}) {
const url = buildFileUrl(modelId, filename, options);
// Use model loader for caching and resume support
return loadModelData(url, {
cache: options.cache ?? true,
forceDownload: options.forceDownload ?? false,
onProgress: options.onProgress ? (progress) => {
options.onProgress({
file: filename,
fileIndex: 1,
totalFiles: 1,
fileProgress: progress,
overallProgress: progress.percent,
});
} : undefined,
});
}
/**
* Download JSON file from HuggingFace Hub
*/
export async function downloadJson(modelId, filename, options = {}) {
const url = buildFileUrl(modelId, filename, options);
// Check cache first
if (options.cache !== false && !options.forceDownload) {
const cached = await isModelCached(url);
if (cached) {
const data = await loadModelData(url, { cache: true });
const text = new TextDecoder().decode(data);
return JSON.parse(text);
}
}
// Fetch directly for small JSON files
const response = await fetchWithAuth(url, options.token);
if (!response.ok) {
throw new EdgeFlowError(`Failed to download ${filename} from ${modelId}: ${response.status}`, ErrorCodes.MODEL_NOT_FOUND);
}
return response.json();
}
/**
* Download tokenizer from HuggingFace Hub
*/
export async function downloadTokenizer(modelId, options = {}) {
const url = buildFileUrl(modelId, 'tokenizer.json', options);
return Tokenizer.fromUrl(url);
}
/**
* Download model config from HuggingFace Hub
*/
export async function downloadConfig(modelId, options = {}) {
return downloadJson(modelId, 'config.json', options);
}
/**
* Download complete model bundle (model + tokenizer + config)
*/
export async function downloadModel(modelId, options = {}) {
const files = {};
const totalSteps = 3; // model, tokenizer, config
let currentStep = 0;
const reportProgress = (file, progress) => {
if (options.onProgress) {
const baseProgress = (currentStep / totalSteps) * 100;
const stepProgress = (progress.percent / totalSteps);
options.onProgress({
file,
fileIndex: currentStep + 1,
totalFiles: totalSteps,
fileProgress: progress,
overallProgress: baseProgress + stepProgress,
});
}
};
// 1. Find and download ONNX model
console.log(`🔍 Finding ONNX model in ${modelId}...`);
const modelFile = await findOnnxModel(modelId, options);
if (!modelFile) {
throw new EdgeFlowError(`No ONNX model found in ${modelId}. Please ensure the model has an ONNX file.`, ErrorCodes.MODEL_NOT_FOUND, { modelId, triedFiles: ONNX_MODEL_FILES });
}
files.model = modelFile;
console.log(`📦 Downloading model: ${modelFile}`);
const modelData = await downloadFile(modelId, modelFile, {
...options,
onProgress: (p) => reportProgress(modelFile, p.fileProgress),
});
currentStep = 1;
// 2. Download tokenizer (optional)
let tokenizer;
try {
console.log(`📝 Downloading tokenizer...`);
files.tokenizer = 'tokenizer.json';
tokenizer = await downloadTokenizer(modelId, options);
console.log(`✓ Tokenizer loaded`);
}
catch (error) {
console.warn(`⚠️ No tokenizer found for ${modelId}`);
}
currentStep = 2;
// 3. Download config (optional)
let config;
try {
console.log(`⚙️ Downloading config...`);
files.config = 'config.json';
config = await downloadConfig(modelId, options);
console.log(`✓ Config loaded`);
}
catch (error) {
console.warn(`⚠️ No config found for ${modelId}`);
}
currentStep = 3;
if (options.onProgress) {
options.onProgress({
file: 'complete',
fileIndex: totalSteps,
totalFiles: totalSteps,
fileProgress: { loaded: 1, total: 1, percent: 100, speed: 0, eta: 0 },
overallProgress: 100,
});
}
console.log(`✅ Model bundle downloaded: ${modelId}`);
return {
modelId,
modelData,
tokenizer,
config,
files,
};
}
// ============================================================================
// High-level API
// ============================================================================
/**
* Load a model from HuggingFace Hub
*
* @example
* ```typescript
* // Load a sentiment analysis model
* const bundle = await fromHub('Xenova/distilbert-base-uncased-finetuned-sst-2-english');
*
* // Use with edgeFlow
* const model = await loadModelFromBuffer(bundle.modelData);
* const tokens = bundle.tokenizer.encode('I love this!');
* ```
*/
export async function fromHub(modelId, options = {}) {
return downloadModel(modelId, options);
}
/**
* Check if a model exists on HuggingFace Hub
*/
export async function modelExists(modelId, options = {}) {
try {
// Try to find an ONNX model
const modelFile = await findOnnxModel(modelId, options);
return modelFile !== null;
}
catch {
return false;
}
}
/**
* Get model info from HuggingFace Hub
*/
export async function getModelInfo(modelId, options = {}) {
const [onnxFile, hasTokenizer, config] = await Promise.all([
findOnnxModel(modelId, options),
fileExists(modelId, 'tokenizer.json', options),
downloadConfig(modelId, options).catch(() => undefined),
]);
return {
hasOnnx: onnxFile !== null,
onnxFile: onnxFile ?? undefined,
hasTokenizer,
hasConfig: config !== undefined,
config,
};
}
// ============================================================================
// Popular Models Registry
// ============================================================================
/**
* Pre-configured popular models
*/
export const POPULAR_MODELS = {
// Text Classification / Sentiment
'sentiment-analysis': 'Xenova/distilbert-base-uncased-finetuned-sst-2-english',
'text-classification': 'Xenova/distilbert-base-uncased-finetuned-sst-2-english',
// Feature Extraction
'feature-extraction': 'Xenova/all-MiniLM-L6-v2',
'sentence-similarity': 'Xenova/all-MiniLM-L6-v2',
// Question Answering
'question-answering': 'Xenova/distilbert-base-cased-distilled-squad',
// Token Classification
'ner': 'Xenova/bert-base-NER',
'token-classification': 'Xenova/bert-base-NER',
// Text Generation
'text-generation': 'Xenova/gpt2',
// Translation
'translation-en-fr': 'Xenova/t5-small',
'translation-en-de': 'Xenova/t5-small',
// Summarization
'summarization': 'Xenova/distilbart-cnn-6-6',
// Fill Mask
'fill-mask': 'Xenova/bert-base-uncased',
// Image Classification
'image-classification': 'Xenova/vit-base-patch16-224',
// Object Detection
'object-detection': 'Xenova/detr-resnet-50',
// Image Segmentation
'image-segmentation': 'Xenova/segformer-b0-finetuned-ade-512-512',
// Zero-shot Classification
'zero-shot-classification': 'Xenova/mobilebert-uncased-mnli',
// Speech Recognition
'automatic-speech-recognition': 'Xenova/whisper-tiny.en',
// Text-to-Speech
'text-to-speech': 'Xenova/speecht5_tts',
};
/**
* Get the default model ID for a task
*/
export function getDefaultModel(task) {
return POPULAR_MODELS[task];
}
/**
* Load a model by task name
*
* @example
* ```typescript
* const bundle = await fromTask('sentiment-analysis');
* ```
*/
export async function fromTask(task, options = {}) {
const modelId = getDefaultModel(task);
return downloadModel(modelId, options);
}
//# sourceMappingURL=hub.js.map
================================================
FILE: dist/utils/index.d.ts
================================================
/**
* edgeFlow.js - Utilities Exports
*/
export { Tokenizer, createBasicTokenizer, loadTokenizer, loadTokenizerFromHub, type TokenizerModel, type TokenizerOptions, } from './tokenizer.js';
export { ImagePreprocessor, AudioPreprocessor, preprocessText, createImagePreprocessor, createAudioPreprocessor, type ImagePreprocessorOptions, type AudioPreprocessorOptions, type TextPreprocessorOptions, } from './preprocessor.js';
export { Cache, InferenceCache, ModelDownloadCache, createCache, type CacheStrategy, type CacheOptions, type CacheStats, } from './cache.js';
export { loadModelData, preloadModel, preloadModels, isModelCached, getCachedModel, deleteCachedModel, clearModelCache, getModelCacheStats, getPreloadStatus, cancelPreload, getPreloadedModel, type DownloadProgress, type ModelLoaderOptions, type PreloadOptions, } from './model-loader.js';
export { fromHub, fromTask, downloadModel, downloadFile, downloadTokenizer, downloadConfig, modelExists, getModelInfo, getDefaultModel, POPULAR_MODELS, type HubOptions, type HubDownloadProgress, type ModelConfig, type ModelBundle, type PopularModelTask, } from './hub.js';
export { OfflineManager, getOfflineManager, initOffline, isOffline, isPWASupported, generateServiceWorker, generateManifest, type OfflineConfig, type OfflineStatus, type CachedModelInfo, } from './offline.js';
//# sourceMappingURL=index.d.ts.map
================================================
FILE: dist/utils/index.js
================================================
/**
* edgeFlow.js - Utilities Exports
*/
// Tokenizer
export { Tokenizer, createBasicTokenizer, loadTokenizer, loadTokenizerFromHub, } from './tokenizer.js';
// Preprocessor
export { ImagePreprocessor, AudioPreprocessor, preprocessText, createImagePreprocessor, createAudioPreprocessor, } from './preprocessor.js';
// Cache
export { Cache, InferenceCache, ModelDownloadCache, createCache, } from './cache.js';
// Model Loader (Preloading, Sharding, Resume, Caching)
export { loadModelData, preloadModel, preloadModels, isModelCached, getCachedModel, deleteCachedModel, clearModelCache, getModelCacheStats, getPreloadStatus, cancelPreload, getPreloadedModel, } from './model-loader.js';
// HuggingFace Hub Integration
export { fromHub, fromTask, downloadModel, downloadFile, downloadTokenizer, downloadConfig, modelExists, getModelInfo, getDefaultModel, POPULAR_MODELS, } from './hub.js';
// Offline/PWA Support
export { OfflineManager, getOfflineManager, initOffline, isOffline, isPWASupported, generateServiceWorker, generateManifest, } from './offline.js';
//# sourceMappingURL=index.js.map
================================================
FILE: dist/utils/model-loader.d.ts
================================================
/**
* edgeFlow.js - Advanced Model Loader
*
* Features:
* - Preloading: Background model loading
* - Sharding: Split large files into chunks for download
* - Resume Download: Continue download from where it left off
* - Model Caching: IndexedDB storage for large models
*/
/**
* Download progress information
*/
export interface DownloadProgress {
/** Downloaded bytes */
loaded: number;
/** Total bytes (0 if unknown) */
total: number;
/** Progress percentage (0-100) */
percent: number;
/** Download speed in bytes/sec */
speed: number;
/** Estimated time remaining in ms */
eta: number;
/** Current chunk index (for sharded downloads) */
currentChunk?: number;
/** Total chunks (for sharded downloads) */
totalChunks?: number;
}
/**
* Model loader options
*/
export interface ModelLoaderOptions {
/** Enable caching (default: true) */
cache?: boolean;
/** Cache name for IndexedDB (default: 'edgeflow-models') */
cacheName?: string;
/** Enable resume download (default: true) */
resumable?: boolean;
/** Chunk size for sharded downloads in bytes (default: 5MB) */
chunkSize?: number;
/** Progress callback */
onProgress?: (progress: DownloadProgress) => void;
/** Number of parallel download connections (default: 4) */
parallelConnections?: number;
/** Request timeout in ms (default: 30000) */
timeout?: number;
/** Force re-download even if cached */
forceDownload?: boolean;
}
/**
* Preload options
*/
export interface PreloadOptions extends ModelLoaderOptions {
/** Priority (higher = more important, default: 0) */
priority?: number;
}
/**
* Load model data with caching, sharding, and resume support
*/
export declare function loadModelData(url: string, options?: ModelLoaderOptions): Promise;
/**
* Preload a model in the background
*/
export declare function preloadModel(url: string, options?: PreloadOptions): Promise;
/**
* Preload multiple models
*/
export declare function preloadModels(urls: Array<{
url: string;
priority?: number;
}>, options?: Omit): Promise;
/**
* Check if a model is cached
*/
export declare function isModelCached(url: string): Promise