Repository: TejasQ/gen-subs
Branch: main
Commit: d26193de6d87
Files: 26
Total size: 71.6 KB

Directory structure:
gitextract_tnt85tu5/

├── .github/
│   └── FUNDING.yml
├── .gitignore
├── .prettierrc
├── README.md
├── actions/
│   ├── burnIn.ts
│   ├── embed.ts
│   ├── for.ts
│   ├── models.ts
│   ├── modelsLs.ts
│   └── modelsPurge.ts
├── burnInSubtitles.ts
├── cli.ts
├── createAssfromRecognitionResults.ts
├── createCueFromWords.ts
├── createSrtFromRecognitionResults.ts
├── createTextFromAudioFile.ts
├── downloadAndUnzip.ts
├── embedSubtitles.ts
├── extractAudio.ts
├── formatDuration.ts
├── loadModel.ts
├── package.json
├── processAudio.ts
├── splitFilePath.ts
├── tsconfig.json
└── util.ts

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/FUNDING.yml
================================================
# These are supported funding model platforms

github: tejasq # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
patreon: # Replace with a single Patreon username
open_collective: # Replace with a single Open Collective username
ko_fi: # Replace with a single Ko-fi username
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']


================================================
FILE: .gitignore
================================================
# Created by https://www.toptal.com/developers/gitignore/api/node,macos,next,typescript,react,visualstudiocode,nextjs,solidjs,qwik,mitosis
# Edit at https://www.toptal.com/developers/gitignore?templates=node,macos,next,typescript,react,visualstudiocode,nextjs,solidjs,qwik,mitosis

### macOS ###
# General
.DS_Store
.AppleDouble
.LSOverride

# Icon must end with two \r
Icon


# Thumbnails
._*

# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent

# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

### macOS Patch ###
# iCloud generated files
*.icloud

#!! ERROR: mitosis is undefined. Use list command to see defined gitignore types !!#

#!! ERROR: next is undefined. Use list command to see defined gitignore types !!#

### NextJS ###
# dependencies
/node_modules
/.pnp
.pnp.js

# testing
/coverage

# next.js
/.next/
/out/

# production
/build

# misc
*.pem

# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.pnpm-debug.log*

# local env files
.env*.local

# vercel
.vercel

# typescript
*.tsbuildinfo
next-env.d.ts

### Node ###
# Logs
logs
*.log
lerna-debug.log*

# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json

# Runtime data
pids
*.pid
*.seed
*.pid.lock

# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov

# Coverage directory used by tools like istanbul
coverage
*.lcov

# nyc test coverage
.nyc_output

# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt

# Bower dependency directory (https://bower.io/)
bower_components

# node-waf configuration
.lock-wscript

# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release

# Dependency directories
node_modules/
jspm_packages/

# Snowpack dependency directory (https://snowpack.dev/)
web_modules/

# TypeScript cache

# Optional npm cache directory
.npm

# Optional eslint cache
.eslintcache

# Optional stylelint cache
.stylelintcache

# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/

# Optional REPL history
.node_repl_history

# Output of 'npm pack'
*.tgz

# Yarn Integrity file
.yarn-integrity

# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local

# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache

# Next.js build output
.next
out

# Nuxt.js build / generate output
.nuxt
dist

# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public

# vuepress build output
.vuepress/dist

# vuepress v2.x temp and cache directory
.temp

# Docusaurus cache and generated files
.docusaurus

# Serverless directories
.serverless/

# FuseBox cache
.fusebox/

# DynamoDB Local files
.dynamodb/

# TernJS port file
.tern-port

# Stores VSCode versions used for testing VSCode extensions
.vscode-test

# yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*

### Node Patch ###
# Serverless Webpack directories
.webpack/

# Optional stylelint cache

# SvelteKit build / generate output
.svelte-kit

#!! ERROR: qwik is undefined. Use list command to see defined gitignore types !!#

### react ###
.DS_*
**/*.backup.*
**/*.back.*

node_modules

*.sublime*

psd
thumb
sketch

#!! ERROR: solidjs is undefined. Use list command to see defined gitignore types !!#

#!! ERROR: typescript is undefined. Use list command to see defined gitignore types !!#

### VisualStudioCode ###
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets

# Local History for Visual Studio Code
.history/

# Built Visual Studio Code Extensions
*.vsix

### VisualStudioCode Patch ###
# Ignore all local history of files
.history
.ionide

# End of https://www.toptal.com/developers/gitignore/api/node,macos,next,typescript,react,visualstudiocode,nextjs,solidjs,qwik,mitosis
models
*.mp4
*.wav
model
*.mov
postcss.config.js
postcss.config.cjs


================================================
FILE: .prettierrc
================================================
{
  "printWidth": 80
}


================================================
FILE: README.md
================================================
# `gen-subs`

This project uses on-device machine learning models to generate subtitles for your videos.

https://github.com/TejasQ/gen-subs/assets/9947422/bc8df523-b62a-4123-a62d-2df17832e2ac

## Features

- 🔒 **Secure and offline** - All machine learning models are downloaded and run locally on your device. No data is sent to any server. There is zero dependency on OpenAI or other cloud services.
- 🌐 **Multilingual** - Supports a wide variety of languages. Namely,
  | Languages | | |
  | --------- | --------- | --------- |
  | 🇺🇸 English | 🇮🇳 Indian English | 🇨🇳 Chinese |
  | 🇷🇺 Russian | 🇫🇷 French | 🇩🇪 German |
  | 🇪🇸 Spanish | 🇵🇹 Portuguese/Brazilian | 🇬🇷 Greek |
  | 🇹🇷 Turkish | 🇻🇳 Vietnamese | 🇮🇹 Italian |
  | 🇳🇱 Dutch | 🇪🇸 Catalan | 🇸🇦 Arabic |
  | 🇮🇷 Farsi | 🇵🇭 Filipino | 🇺🇦 Ukrainian |
  | 🇰🇿 Kazakh | 🇸🇪 Swedish | 🇯🇵 Japanese |
  | 🇪🇸 Esperanto | 🇮🇳 Hindi | 🇨🇿 Czech |
  | 🇵🇱 Polish | 🇺🇿 Uzbek | 🇰🇷 Korean |
  | 🇫🇷 Breton | | |
- 🎨 **Customizable** - Choose from getting just an `srt` file, having the subtitles burned in to your video, and even embedding the subtitles in your video's metadata. You can also have **focus words** where the active word is highlighted in a different color.
- 🎧 **Multi-modal** - Supports both audio and video files and generates subtitles for each.
- 📊 **Multi-model** - Choose from a variety of machine learning models ranging from 40MB to >2GB in size. The larger the model, the more accurate the subtitles, but smaller models are also quite capable.

## Usage

You can generate subtitles for any video using the following command:

```bash
npx gen-subs for ./your/video.mp4
```

If you run this for the first time, you will be required to download a machine learning model to generate your subtitles. This needs to be done at least one time. Then, the program will generate a `.srt` file in your current working directory containing the subtitles for your video.

### Inaccuracies

Please note that you may get inaccurate results with the default, basic English model. This model is 40MB and is meant to be a quick way to get started. It's not very smart, so your mileage may vary. If you'd like more accurate results, you can download a larger model by running the following command:

```bash
npx gen-subs models
```

This will have you choose a language and then show you a collection of models, their sizes, and intended use cases (like podcasting, content, etc.). You can then choose a model and download it. Once downloaded, you can use it to generate subtitles for your video. You only download models once, and can remove them any time by running `npx gen-subs models purge`. You can also list all your downloaded models by running `npx gen-subs models ls`.

### Other Languages

You can install a wide variety of models that can "hear" different languages. To generate subs for any language, follow these steps:

1. First, install a model with `npx gen-subs models`. You will be asked to choose a language here.
2. Then, run `npx gen-subs for ./your/video.mp4` to generate subtitles for your video, You will be asked which model to use.
3. Enjoy!

## API

This project has a few options that you can use to customize your subtitles. Let's enumerate them here. Each command comes after `npx gen-subs` and is followed by a list of options.

| Command                              | Description                                                          |
| ------------------------------------ | -------------------------------------------------------------------- |
| `for <mediaFile>`                    | Generate subtitles for a given video or audio file.                  |
| `models`                             | Manage models                                                        |
| `models purge`                       | Delete all downloaded models.                                        |
| `models ls`                          | Show a list of all models downloaded to the system.                  |
| `burn-in <videoFile> <subtitleFile>` | Burns subtitles into the video and gives you a new video.            |
| `embed <videoFile> <subtitleFile>`   | Adds subtitles to the video's metadata but does not alter the video. |

### `gen-subs for [media]`

| Option                    | Description                                                                                                                                                         | Default                       |
| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------- |
| `-m, --model [modelName]` | The name of the machine learning model you'd like to use to generate subtitles.                                                                                     | `vosk-model-small-en-us-0.15` |
| `-b, --burn-in`           | Whether to layer subtitles atop the video (burn them in).                                                                                                           | None                          |
| `-e, --embed`             | Whether to embed subtitles in the video's metadata.                                                                                                                 | None                          |
| `-o, --out-dir [path]`    | Where to output the subtitle and final video files.                                                                                                                 | `process.cwd()`               |
| `-f, --format [format]`   | Choose between `srt` or `ass` formats. `ass` lets you do more cool stuff like focus words. (Default `srt`)                                                          | `srt`                         |
| `-h --highlight [color]`  | (`ass` subtitles only) Highlight the active word with a color. ⚠️ Use double quotes (`""`) when entering hex codes into your terminal because `#` starts a comment! | `"#048BA8"`                   |

## Contributing

Please feel free to open issues and pull requests as needed and I'll try to get to them as soon as possible.

## Sustainability

This is all free and open source software. If it has helped you, please consider [sponsoring me on GitHub](https://github.com/sponsors/TejasQ) so I can make more stuff like this and teach about it full-time.


================================================
FILE: actions/burnIn.ts
================================================
import { join } from "path";
import { burnInSubtitles } from "../burnInSubtitles";
import ora from "ora";
import { formatDuration } from "../formatDuration";

export async function burnInAction(video: string, subtitles: string) {
    const started = Date.now();
    const spinner = ora().start("Burning in subtitles...");
    await burnInSubtitles(join(process.cwd(), video), join(process.cwd(), subtitles))
    spinner.succeed(`Subtitles burned in, took ${formatDuration(Date.now() - started)}.`);
}

================================================
FILE: actions/embed.ts
================================================
import { join } from "path";
import ora from "ora";
import { embedSubtitles } from "../embedSubtitles";
import { formatDuration } from "../formatDuration";

export async function embedAction(video: string, subtitles: string) {
    const started = Date.now();
    const spinner = ora().start("Embedding subtitles...");
    await embedSubtitles(join(process.cwd(), video), join(process.cwd(), subtitles))
    spinner.succeed(`Subtitles embedded, took ${formatDuration(Date.now() - started)}.`);
}

================================================
FILE: actions/for.ts
================================================
import { join } from "path";
import { lstat, readdir, writeFile } from "fs/promises";
import { mkdirp } from "mkdirp";
import ora from "ora";
import inquirer from "inquirer";

import { splitFilePath } from "../splitFilePath";
import { formatSize, getModelDir, models, videoExtensions, workingDir } from "../util";
import { extractAudio } from "../extractAudio";
import { processAudio } from "../processAudio";
import { createTextFromAudioFile } from "../createTextFromAudioFile";
import { createSrtFromRecognitionResults } from "../createSrtFromRecognitionResults";
import { burnInSubtitles } from "../burnInSubtitles";
import { embedSubtitles } from "../embedSubtitles";
import { downloadFile, unzipFile } from "../downloadAndUnzip";
import { createAssFromRecognitionResults } from "../createAssfromRecognitionResults";

type Options = {
  outDir?: string;
  burnIn?: boolean;
  embed?: boolean;
  format?: "srt" | "ass"
  highlight?: string
};

export async function forAction(relativeTarget: string, options: Options) {
  const target = relativeTarget.startsWith('/') ? relativeTarget : join(process.cwd(), relativeTarget);
  const { pathWithoutExtension, fileName } = splitFilePath(target);
  const format = (options.format ?? "srt").toLowerCase()

  const getOutputFile = (extension: string) =>
    options.outDir
      ? join(options.outDir, `${fileName}.${extension}`)
      : `${pathWithoutExtension}.${extension}`;
  const spinner = ora();
  spinner.start("Checking file...");

  if (format !== 'ass' && options.highlight) {
    spinner.fail("The `highlight` option can only be used with `ass` format subtitles. Please use `-f ass` to set a highlight color.");
    process.exit(1);
  }

  if (!target) {
    spinner.fail("Please specify a file path.");
    process.exit(1);
  }

  try {
    await lstat(target);
  } catch {
    spinner.fail(`File does not exist at ${target}.`);
    process.exit(1);
  }

  const extension = target.split(".").pop();

  if (!extension) {
    spinner.fail(`File does not have an extension.`);
    process.exit(1);
  }

  const isVideo = videoExtensions.includes(extension)
  spinner.text = `Extension is ${extension}. Processing...`;
  let audioFilePath: string;

  if (!isVideo && options.burnIn) {
    spinner.fail(`You're trying to burn-in subtitles to a non-video file. Please only use -b with videos.`);
    process.exit(1);
  }

  if (isVideo) {
    spinner.text = "Creating workspace...";
    await mkdirp(join(workingDir, "from-video"));
    spinner.text = "Converting to audio...";
    audioFilePath = await extractAudio(target);
  } else {
    audioFilePath = target;
  }

  spinner.text = "Processing audio...";
  const processedAudioFilePath = await processAudio(audioFilePath);
  spinner.text = "Checking available models..."
  const availableModels = (await readdir(await getModelDir())).filter(dir => models.map(m => m.name).includes(dir));
  let model;

  spinner.stop();
  if (availableModels.length > 1) {
    const { selectedModel } = await inquirer.prompt([
      {
        type: "list",
        name: "selectedModel",
        message: "Please choose a model. To download more models, please run `models`.",
        choices: models.filter(m => availableModels.includes(m.name)).map(m => ({ name: `(${formatSize(m.size)}, ${m.language}) ${m.notes}`, value: m.name })),
      },
    ]);
    model = selectedModel;
  }

  if (availableModels.length === 1) {
    model = availableModels[0];
  }

  if (availableModels.length === 0) {
    const { shouldDownloadModel } = await inquirer.prompt([
      {
        type: "confirm",
        name: "shouldDownloadModel",
        message: "You don't seem to have any models downloaded. Would you like to download a basic one? You can run `models` to see all available models and add more.",
      },
    ]);
    if (shouldDownloadModel) {
      const modelPath = join(workingDir, "models", models[0].name);
      const zipFile = await downloadFile(models[0].url, modelPath, models[0].notes);
      spinner.start("Unzipping model...");
      await unzipFile(zipFile, await getModelDir());
      spinner.succeed("Model downloaded.");
      model = models[0].name;
    }
  }

  spinner.start("Loading model...");
  const results = await createTextFromAudioFile(
    spinner,
    processedAudioFilePath,
    model
  );
  spinner.text = "Creating subtitles...";
  let subs;

  try {
    if (format === "srt") {
      subs = await createSrtFromRecognitionResults(results);
    } else {
      subs = createAssFromRecognitionResults(results, options.highlight);
    }
  } catch (e: any) {
    spinner.fail(e.message);
    process.exit(1);
  }

  spinner.succeed("Transcribed audio.");
  await writeFile(getOutputFile(format), subs);
  spinner.succeed(`Subtitles created at ${getOutputFile(format)}`);


  if (options.embed) {
    spinner.start("Embedding subtitles into media...");
    const result = await embedSubtitles(target, getOutputFile(format));
    spinner.succeed("File with embedded subtitles available at " + result)
  }

  if (!isVideo) {
    process.exit(0);
  }

  if (options.burnIn) {
    spinner.start("Burning-in subtitles to video...");
    const result = await burnInSubtitles(target, getOutputFile(format));
    spinner.succeed("File with burn-in subtitles available at " + result)
  }

  spinner.succeed("Done.");
}


================================================
FILE: actions/models.ts
================================================
import inquirer from "inquirer";
import ora from "ora";
import { rimraf } from "rimraf"
import { lstat } from "fs/promises";
import { join } from "path";

import { formatSize, getModelDir, isModelDownloaded, models, workingDir } from "../util";
import { downloadFile, unzipFile } from "../downloadAndUnzip";

export async function modelsAction() {
    const languages = new Set(models.map((model) => model.language));

    const choices = await Promise.all(models.map(async (model) => ({
        name: `(${formatSize(model.size)}) ${model.notes}`,
        value: model.name,
        checked: await isModelDownloaded(model.name),
        language: model.language,
        notes: model.notes,
        url: model.url,
    })));

    const { language } = await inquirer.prompt([
        {
            type: "list",
            name: "language",
            message: "Please choose a language",
            choices: Array.from(languages),
        }
    ]);

    const scopedChoices = choices.filter(c => c.language === language);
    const { desiredModels } = await inquirer.prompt([{
        type: "checkbox",
        name: "desiredModels",
        message: "Here are your models",
        choices: scopedChoices,
    }]);

    const spinner = ora().start("Processing models...");
    for (const choice of scopedChoices) {
        if (!desiredModels.includes(choice.value)) {
            try {
                await rimraf(join(workingDir, "models", choice.value));
            } catch (e) {
            }
            continue;
        }
        const doesModelExist = await lstat(join(workingDir, "models", choice.value)).then(() => true).catch(() => false);
        if (!doesModelExist) {
            spinner.stop();
            const zipFilePath = await downloadFile(choice.url, join(workingDir, "models", choice.value), choice.notes);
            spinner.start(`Unzipping model ${choice.value}...`);
            await unzipFile(zipFilePath, await getModelDir());
        }
    }

    spinner.succeed("Models updated.");
}


================================================
FILE: actions/modelsLs.ts
================================================
import { readdir } from "fs/promises";
import ora from "ora";
import { formatSize, getModelDir, models } from "../util";

export async function modelsLs() {
    const spinner = ora().start("Getting models...");
    try {
        const modelsOnDisk = await readdir(await getModelDir())
        const filteredModels = modelsOnDisk.filter(model => models.find(m => m.name === model));

        if (!filteredModels.length) {
            spinner.succeed("No models available.");
            process.exit(0);
        }

        spinner.succeed("Models available:");
        console.log(filteredModels.map((model) => {
            const actualModel = models.find(m => m.name === model)!;
            return `- (${formatSize(actualModel.size)}, ${actualModel.language}) ${actualModel.notes}`
        }).join("\n"));
        process.exit(0);
    } catch {
        spinner.succeed("No models available.");
        process.exit(0);
    }
}

================================================
FILE: actions/modelsPurge.ts
================================================
import ora from "ora";
import inquirer from "inquirer";
import { rimraf } from "rimraf";
import { getModelDir } from "../util";

export async function modelsPurgeAction() {
    const confirm = await inquirer.prompt([{
        type: "confirm",
        name: "confirm",
        message: "Are you sure you want to purge all models?",
    }]);

    if (!confirm.confirm) {
        process.exit(0);
    }

    const spinner = ora().start("Deleting models...");
    await rimraf(await getModelDir());
    spinner.succeed("Models purged.");
}

================================================
FILE: burnInSubtitles.ts
================================================
import ffmpeg from "fluent-ffmpeg";
import ffmpegInstaller from "@ffmpeg-installer/ffmpeg";
import { splitFilePath } from "./splitFilePath";
import { join } from "path";

ffmpeg.setFfmpegPath(ffmpegInstaller.path);

export function burnInSubtitles(filePath: string, subPath: string) {
  const { pathWithoutExtension } = splitFilePath(subPath);
  const { extension } = splitFilePath(filePath);
  const outFile = `${pathWithoutExtension}-with-burned-subs${extension}`;
  return new Promise((resolve, reject) => {
    ffmpeg(filePath)
      .videoFilter("subtitles=" + subPath)
      .on("error", function (err) {
        reject(err);
      })
      .save(outFile)
      .on("end", function () {
        resolve(outFile);
      });
  });
}


================================================
FILE: cli.ts
================================================
#!/usr/bin/env node

import { program } from "commander";
import pkg from "./package.json";
import { forAction } from "./actions/for";
import { modelsAction } from "./actions/models";
import { modelsPurgeAction } from "./actions/modelsPurge";
import { modelsLs } from "./actions/modelsLs";
import { burnInAction } from "./actions/burnIn";
import { embedAction } from "./actions/embed";

program.name(pkg.name).description(pkg.description).version(pkg.version);

program
    .command("for <path>")
    .description("Generate subtitles for a given video or audio file.")
    .option(
        "-m, --model [modelName]",
        "The name of the machine learning model you'd like to use to generate subtitles.",
        "vosk-model-small-en-us-0.15",
    )
    .option(
        "-b, --burn-in",
        "Whether to layer subtitles atop the video (burn them in).",
        false,
    )
    .option(
        "-e, --embed",
        "Whether to embed subtitles in the video's metadata.",
        false,
    )
    .option(
        "-o, --out-dir [path]",
        "Where to output the subtitles file.",
        process.cwd(),
    )
    .option(
        "-f, --format [format]",
        "Choose between `srt` or `ass` formats. (Default `srt`)",
        "srt",
    )
    .option(
        '-h --highlight [color]',
        "(`ass` subtitles only) Highlight the active word with a color. (Default `#048BA8`)",
    )
    .action(forAction);

const models = program
    .command("models")
    .description("Manage models")
    .action(modelsAction);

models.command("purge")
    .action(modelsPurgeAction)
    .description("Delete all downloaded models.")

models.command("ls")
    .description(
        "Show a list of all models downloaded to the system.",
    )
    .action(modelsLs);

program
    .command("burn-in <video> <subtitles>")
    .description("Burn subtitles into a video. Video is output in the same directiory with a suffix added.")
    .action(burnInAction);

program
    .command("embed <video> <subtitles>")
    .description("Embed subtitles to a video. Video is output in the same directiory with a suffix added.")
    .action(embedAction);

program.parse(process.argv);


================================================
FILE: createAssfromRecognitionResults.ts
================================================
import { RecognitionResults } from "vosk";

type CssStyle = {
    color?: string; // Text color
    backgroundColor?: string; // Background color (simulated with border)
    box?: boolean; // Toggle for box-like background
    // ASS doesn't support padding and rounded corners in the CSS sense
};

function formatTime(time: number): string {
    const hours = Math.floor(time / 3600).toString().padStart(2, '0');
    const minutes = Math.floor((time % 3600) / 60).toString().padStart(2, '0');
    const seconds = Math.floor(time % 60).toString().padStart(2, '0');
    const centiseconds = Math.floor((time % 1) * 100).toString().padStart(2, '0');
    return `${hours}:${minutes}:${seconds}.${centiseconds}`;
}

function generateAssHeader(): string {
    return `[Script Info]
Title: Generated ASS
ScriptType: v4.00+
WrapStyle: 0
ScaledBorderAndShadow: yes
YCbCr Matrix: None

[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default, Arial, 20, &H00FFFFFF, &H00FFFFFF, &H00000000, &H00000000, 0, 0, 0, 0, 100, 100, 0, 0, 1, 1, 0, 2, 10, 10, 10, 1

[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
`;
}

function cssHexColorToAssBgr(hexColor: string): string {
    const bgr = hexColor.slice(1).match(/.{2}/g)?.reverse().join('');
    return bgr ? `&H00${bgr}&` : `&H00FFFFFF&`; // Default to white if invalid
}

function generateHighlightedWord(word: string, css: CssStyle): string {
    const textColor = css.color ? cssHexColorToAssBgr(css.color) : '&H00FFFFFF'; // Default: white
    const backgroundColor = css.backgroundColor ? cssHexColorToAssBgr(css.backgroundColor) : '&H00000000'; // Default: transparent
    const boxEffect = css.box ? '{\\bord4}{\\3c' + backgroundColor + '}' : '{\\bord0}';
    const defaultStyle = '{\\bord1}{\\shad0}{\\3c&H000000&}{\\1c&HFFFFFF&}'; // Default style

    // Apply styles to the highlighted word and reset immediately after
    return `${boxEffect}{\\1c${textColor}}${word}${defaultStyle}`;
}


export function createAssFromRecognitionResults(results: RecognitionResults[], highlightColor = '#048BA8'): string {
    let dialogues = '';

    if (!results.length) {
        throw new Error("No words identified to create subtitles from.");
    }

    let previousEndTime = 0;

    for (let r = 0; r < results.length; r++) {
        for (let i = 0; i < results[r].result.length; i++) {
            const word = results[r].result[i];
            let startTime = Math.max(word.start, previousEndTime); // Start time is either the word start time or the end of the previous subtitle, whichever is later
            let endTime = word.end;

            // Extend endTime to be at least 1 second, but avoid overlapping with the start of the next word
            endTime = Math.max(endTime, startTime + 1); // Ensure at least 1 second duration
            if (i < results[r].result.length - 1) {
                const nextWordStart = results[r].result[i + 1].start;
                if (endTime > nextWordStart) {
                    endTime = nextWordStart; // Adjust to prevent overlap with the next word
                }
            }

            previousEndTime = endTime; // Update previousEndTime for the next iteration

            // Format times for the subtitle
            const formattedStartTime = formatTime(startTime);
            const formattedEndTime = formatTime(endTime);

            // Prepare text for the subtitle line
            const textBefore = results[r].result.slice(Math.max(0, i - 5), i).map(w => w.word).join(' ');
            const highlightedWord = generateHighlightedWord(word.word, {
                backgroundColor: highlightColor,
                color: '#ffffff',
                box: true,
            });
            const textAfter = results[r].result.slice(i + 1, i + 6).map(w => w.word).join(' ');
            const lineText = textBefore + ' ' + highlightedWord + ' ' + textAfter;

            dialogues += `Dialogue: 0,${formattedStartTime},${formattedEndTime},Default,,0,0,0,,${lineText.trim()}\n`;
        }
    }
    return `${generateAssHeader()}
${dialogues}`;
}


================================================
FILE: createCueFromWords.ts
================================================
export function createCueFromWords(
  words: Word[],
  start: number,
  end: number,
): SubtitleCue {
  const text = words
    .slice(start, end + 1)
    .map((w) => w.word)
    .join(" ");
  return {
    type: "cue",
    data: {
      start: words[start].start * 1000,
      end: words[end].end * 1000,
      text: text,
    },
  };
}


================================================
FILE: createSrtFromRecognitionResults.ts
================================================
import { stringifySync } from "subtitle";
import { createCueFromWords } from "./createCueFromWords";
import { RecognitionResults } from "vosk";

export async function createSrtFromRecognitionResults(results: RecognitionResults[]) {
  const WORDS_PER_LINE = 7;
  const subtitles: SubtitleCue[] = [];

  if (!results.length) {
    throw new Error("No words identified to create subtitles from.");
  }

  results.forEach(({ result: words }) => {
    if (!words) return;
    for (let start = 0; start < words.length; start += WORDS_PER_LINE) {
      const end = Math.min(start + WORDS_PER_LINE - 1, words.length - 1);
      const cue = createCueFromWords(words, start, end);
      subtitles.push(cue);
    }
  });

  return stringifySync(subtitles, { format: "SRT" });
}


================================================
FILE: createTextFromAudioFile.ts
================================================
import * as wav from "wav";
import { Readable } from "stream";
import { createReadStream } from "fs";
import vosk, { RecognitionResults } from "vosk";
import { Ora } from "ora";
import { loadModel } from "./loadModel";
import { stat } from "fs/promises";
import { join } from "path";
import { __dirname, workingDir } from "./util";

export const createTextFromAudioFile = (
  spinner: Ora,
  fileName: string,
  modelName: string
): Promise<RecognitionResults[]> =>
  new Promise(async (resolve, reject) => {
    const model = await loadModel(join(workingDir, "models", modelName));
    spinner.text = "Listening...";

    if (!fileName) {
      throw new Error("Source audio file name is not provided.");
    }

    const fileStats = await stat(fileName);
    const totalSize = fileStats.size;
    let bytesRead = 0;
    let lastLoggedPercentage = 0;

    const updateProgressBar = (currentSize: number) => {
      const percentage = Math.round((currentSize / totalSize) * 100);
      const totalResults = results.length;
      const preview = results.map((result) => result.text).join(" ");
      if (percentage !== lastLoggedPercentage) {
        spinner.text = `Listening... (${percentage}%).\nHeard so far:\n\n\t${totalResults > 1 ? `[...] ${preview.slice(-360)} [...]` : preview}`;
        lastLoggedPercentage = percentage;
      }
    };

    const wfReader = new wav.Reader();
    const wfReadable = new Readable().wrap(wfReader);
    let results: RecognitionResults[] = [];

    wfReader.on("format", async (format: AudioFormat) => {
      if (format.audioFormat !== 1 || format.channels !== 1) {
        throw new Error("Audio file must be WAV format mono PCM.");
      }
      spinner.text = "Creating recognizer...";
      const recognizer = new vosk.Recognizer({
        model,
        sampleRate: format.sampleRate,
      });
      recognizer.setWords(true);

      spinner.text = "Listening. Heard so far: ";

      for await (const data of wfReadable) {
        bytesRead += data.length;
        updateProgressBar(bytesRead);
        const endOfSpeech = recognizer.acceptWaveform(data);
        if (endOfSpeech) {
          const result = recognizer.result();
          results.push(result);
        }
      }
      spinner.clear();
      recognizer.free();
      model.free();
      resolve(results);
    });

    createReadStream(fileName, { highWaterMark: 4096 })
      .pipe(wfReader)
      .on("error", reject);
  });


================================================
FILE: downloadAndUnzip.ts
================================================
import https from 'https';
import fs from 'fs';
import yauzl from "yauzl";
import { mkdirp } from 'mkdirp';
import { dirname, join } from 'path';
import cliProgress from 'cli-progress';

import { workingDir } from './util';

export async function downloadFile(url: string, dest: string, name: string): Promise<string> {
    await mkdirp(join(workingDir, 'models'))
    await mkdirp(dest)
    const tempModel = join(workingDir, 'models', 'temp.zip');
    return new Promise((resolve, reject) => {
        const file = fs.createWriteStream(tempModel);
        const progressBar = new cliProgress.SingleBar({}, cliProgress.Presets.rect);

        https.get(url, (response) => {
            if (response.statusCode !== 200) {
                reject('Failed to download file, status code: ' + response.statusCode);
                return;
            }

            const totalSize = parseInt(response.headers['content-length'] ?? "0", 10);
            let receivedBytes = 0;
            console.log(`Downloading ${name}...`);
            progressBar.start(totalSize, 0);

            response.on('data', (chunk) => {
                receivedBytes += chunk.length;
                progressBar.update(receivedBytes);
            });

            response.pipe(file);

            file.on('finish', () => {
                progressBar.stop();
                file.close(() => resolve(tempModel));
            });
        }).on('error', (err) => {
            fs.unlink(dest, () => reject(err));
        });

        file.on('error', (err) => {
            fs.unlink(dest, () => reject(err));
        });
    });
}

export async function unzipFile(zipFilePath: string, outputPath: string) {
    return new Promise(async (resolve, reject) => {
        try {
            yauzl.open(zipFilePath, { lazyEntries: true }, (err, zipfile) => {
                if (err) reject(err);

                zipfile.readEntry();
                zipfile.on("entry", (entry) => {
                    if (/\/$/.test(entry.fileName)) {
                        // Directory file names end with '/'
                        fs.mkdir(join(outputPath, entry.fileName), { recursive: true }, (err) => {
                            if (err) reject(err);
                            zipfile.readEntry();
                        });
                    } else {
                        // File entry
                        zipfile.openReadStream(entry, (err, readStream) => {
                            if (err) reject(err);
                            const filePath = join(outputPath, entry.fileName);
                            fs.mkdir(dirname(filePath), { recursive: true }, (err) => {
                                if (err) reject(err);
                                readStream.pipe(fs.createWriteStream(filePath));
                                readStream.on("end", () => {
                                    zipfile.readEntry();
                                });
                            });
                        });
                    }
                });
                zipfile.on("end", () => {
                    resolve(outputPath)
                });
            });
        } catch (err) {
            reject(err);
        }
    });
}


================================================
FILE: embedSubtitles.ts
================================================
import { splitFilePath } from "./splitFilePath";
import ffmpeg from "fluent-ffmpeg";
import ffmpegInstaller from "@ffmpeg-installer/ffmpeg";

ffmpeg.setFfmpegPath(ffmpegInstaller.path);

export function embedSubtitles(videoPath: string, subtitlesPath: string) {
  const { extension } = splitFilePath(videoPath);
  const { pathWithoutExtension } = splitFilePath(subtitlesPath);
  const outFile = `${pathWithoutExtension}-with-meta-subtitles${extension}`;
  return new Promise((resolve, reject) => {
    ffmpeg(videoPath)
      .outputOptions("-c copy") // Copy the video and audio streams without re-encoding
      .outputOptions("-c:s mov_text") // Specify the codec for subtitles (if needed)
      .outputOptions(`-metadata:s:s:0 language=eng`) // Optional: set subtitle language
      .addInput(subtitlesPath)
      .on("end", () => {
        resolve(outFile);
      })
      .on("error", (err) => {
        reject(err);
      })
      .save(outFile);
  });
}


================================================
FILE: extractAudio.ts
================================================
import ffmpeg from "fluent-ffmpeg";
import ffmpegInstaller from "@ffmpeg-installer/ffmpeg";
import { join } from "path";
import { workingDir } from "./util";

ffmpeg.setFfmpegPath(ffmpegInstaller.path);

export function extractAudio(filePath: string): Promise<string> {
  return new Promise((resolve, reject) => {
    const fileName = filePath.split("/").pop() ?? "";
    const fileNameWithoutExtension = fileName.split(".")[0];
    const extractedAudioTarget = join(
      workingDir,
      "from-video",
      `${Date.now()}-${fileNameWithoutExtension}.wav`,
    );
    ffmpeg(filePath)
      .noVideo()
      .audioCodec("pcm_s16le")
      .format("wav")
      .on("end", () => {
        resolve(extractedAudioTarget);
      })
      .on("error", (err) => {
        reject("Error: " + err);
      })
      .save(extractedAudioTarget);
  });
}


================================================
FILE: formatDuration.ts
================================================
export function formatDuration(milliseconds: number): string {
    const seconds = Math.floor(milliseconds / 1000);
    const minutes = Math.floor(seconds / 60);
    const hours = Math.floor(minutes / 60);

    const secondsPart = seconds % 60;
    const minutesPart = minutes % 60;

    if (hours > 0) {
        return `${hours}h ${minutesPart}m ${secondsPart}s`;
    } else if (minutes > 0) {
        return `${minutesPart}m ${secondsPart}s`;
    } else {
        return `${secondsPart}s`;
    }
}

================================================
FILE: loadModel.ts
================================================
import { lstat } from "fs/promises";
import vosk, { Model } from "vosk";

export const loadModel = (path: string): Promise<Model> =>
  new Promise(async (resolve, reject) => {
    if (!path) {
      throw new Error("Model path is not provided.");
    }

    try {
      await lstat(path);
    } catch (e) {
      reject(`You don't seem to have this model downloaded. Please run the \`models\` command to download it.`);
    }

    process.nextTick(() => {
      vosk.setLogLevel(-1);
      const model = new vosk.Model(path);
      resolve(model);
    });
  });


================================================
FILE: package.json
================================================
{
  "name": "gen-subs",
  "version": "1.0.6",
  "description": "A CLI tool to generate subtitles from audio and video files and then either burn them in or add them as a separate track.",
  "main": "./dist/cli.js",
  "type": "module",
  "bin": {
    "gen-subs": "./dist/cli.js"
  },
  "scripts": {
    "prebuild": "rimraf dist",
    "build": "tsup-node ./cli.ts --format cjs,esm --out-dir dist",
    "prepublishOnly": "npm run build"
  },
  "files": [
    "dist"
  ],
  "keywords": [
    "machine learning",
    "ai",
    "artificial intelligence",
    "subtitles",
    "video subtitles",
    "closed captions",
    "accessibility"
  ],
  "author": {
    "name": "Tejas Kumar",
    "email": "tejas@tejas.qa"
  },
  "license": "GPL-3.0-or-later",
  "dependencies": {
    "@ffmpeg-installer/ffmpeg": "^1.1.0",
    "cli-progress": "^3.12.0",
    "commander": "^11.1.0",
    "fluent-ffmpeg": "^2.1.2",
    "inquirer": "^9.2.12",
    "mkdirp": "^3.0.1",
    "ora": "^7.0.1",
    "subtitle": "^4.2.1",
    "vosk": "^0.3.39",
    "wav": "^1.0.2",
    "rimraf": "^5.0.5",
    "yauzl": "^2.10.0"
  },
  "devDependencies": {
    "@types/cli-progress": "^3.11.5",
    "@types/fluent-ffmpeg": "^2.1.24",
    "@types/inquirer": "^9.0.7",
    "@types/node": "^20.9.1",
    "@types/vosk": "link:../../DefinitelyTyped/types/vosk",
    "@types/wav": "^1.0.4",
    "@types/yauzl": "^2.10.3",
    "prettier": "^3.1.0",
    "tsup": "^8.0.0"
  }
}


================================================
FILE: processAudio.ts
================================================
import ffmpeg from "fluent-ffmpeg";
import ffmpegInstaller from "@ffmpeg-installer/ffmpeg";
import { join } from "path";
import { workingDir } from "./util";

ffmpeg.setFfmpegPath(ffmpegInstaller.path);

export function processAudio(inputPath: string): Promise<string> {
  const fileName = inputPath.split("/").pop() ?? "";
  const fileNameWithoutExtension = fileName.split(".")[0];
  const outputPath = join(
    workingDir, "from-video", `${Date.now()}-${fileNameWithoutExtension}.wav`,
  );
  return new Promise((resolve, reject) => {
    ffmpeg(inputPath)
      .audioChannels(1) // Set to mono
      .format("s16le") // Set format to s16le (16-bit signed little-endian)
      .audioFrequency(16000) // Set audio frequency to 16kHz
      .format("wav")
      .on("end", () => {
        resolve(outputPath);
      })
      .on("error", (err) => {
        reject("Error: " + err);
      })
      .save(outputPath);
  });
}


================================================
FILE: splitFilePath.ts
================================================
export function splitFilePath(filePath: string) {
  // Extract the base name (the last part of the path)
  const baseName = filePath.split("/").pop();

  if (!baseName) throw new Error("Invalid file path");

  // Handling cases where there is no extension or the file is hidden
  if (baseName === "" || baseName.startsWith(".") || !baseName.includes(".")) {
    return {
      pathWithoutExtension: filePath,
      extension: "",
      fileName: baseName,
    };
  }

  // Finding the last dot to separate the extension
  const lastDotIndex = baseName.lastIndexOf(".");

  // Extracting the path without extension and the extension
  const pathWithoutExtension =
    filePath.substring(0, filePath.lastIndexOf(".")) ||
    baseName.substring(0, lastDotIndex);
  const extension = baseName.substring(lastDotIndex);

  // Extracting just the file name without the extension
  const fileName = baseName.substring(0, lastDotIndex);

  return { pathWithoutExtension, extension, fileName };
}


================================================
FILE: tsconfig.json
================================================
{
  "compilerOptions": {
    /* Visit https://aka.ms/tsconfig to read more about this file */

    /* Projects */
    // "incremental": true,                              /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
    // "composite": true,                                /* Enable constraints that allow a TypeScript project to be used with project references. */
    // "tsBuildInfoFile": "./.tsbuildinfo",              /* Specify the path to .tsbuildinfo incremental compilation file. */
    // "disableSourceOfProjectReferenceRedirect": true,  /* Disable preferring source files instead of declaration files when referencing composite projects. */
    // "disableSolutionSearching": true,                 /* Opt a project out of multi-project reference checking when editing. */
    // "disableReferencedProjectLoad": true,             /* Reduce the number of projects loaded automatically by TypeScript. */

    /* Language and Environment */
    "target": "ESNext" /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */,
    // "lib": [],                                        /* Specify a set of bundled library declaration files that describe the target runtime environment. */
    // "jsx": "preserve",                                /* Specify what JSX code is generated. */
    // "experimentalDecorators": true,                   /* Enable experimental support for legacy experimental decorators. */
    // "emitDecoratorMetadata": true,                    /* Emit design-type metadata for decorated declarations in source files. */
    // "jsxFactory": "",                                 /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
    // "jsxFragmentFactory": "",                         /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
    // "jsxImportSource": "",                            /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
    // "reactNamespace": "",                             /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
    // "noLib": true,                                    /* Disable including any library files, including the default lib.d.ts. */
    // "useDefineForClassFields": true,                  /* Emit ECMAScript-standard-compliant class fields. */
    // "moduleDetection": "auto",                        /* Control what method is used to detect module-format JS files. */

    /* Modules */
    "module": "ESNext",
    // "rootDir": "./",                                  /* Specify the root folder within your source files. */
    "moduleResolution": "Bundler" /* Specify how TypeScript looks up a file from a given module specifier. */,
    // "baseUrl": "./",                                  /* Specify the base directory to resolve non-relative module names. */
    // "paths": {},                                      /* Specify a set of entries that re-map imports to additional lookup locations. */
    // "rootDirs": [],                                   /* Allow multiple folders to be treated as one when resolving modules. */
    // "typeRoots": [],                                  /* Specify multiple folders that act like './node_modules/@types'. */
    // "types": [],                                      /* Specify type package names to be included without being referenced in a source file. */
    // "allowUmdGlobalAccess": true,                     /* Allow accessing UMD globals from modules. */
    // "moduleSuffixes": [],                             /* List of file name suffixes to search when resolving a module. */
    "allowImportingTsExtensions": true /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */,
    // "resolvePackageJsonExports": true,                /* Use the package.json 'exports' field when resolving package imports. */
    // "resolvePackageJsonImports": true,                /* Use the package.json 'imports' field when resolving imports. */
    // "customConditions": [],                           /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
    // "resolveJsonModule": true,                        /* Enable importing .json files. */
    // "allowArbitraryExtensions": true,                 /* Enable importing files with any extension, provided a declaration file is present. */
    // "noResolve": true,                                /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */

    /* JavaScript Support */
    // "allowJs": true,                                  /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
    // "checkJs": true,                                  /* Enable error reporting in type-checked JavaScript files. */
    // "maxNodeModuleJsDepth": 1,                        /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */

    /* Emit */
    "declaration": true /* Generate .d.ts files from TypeScript and JavaScript files in your project. */,
    // "declarationMap": true,                           /* Create sourcemaps for d.ts files. */
    "emitDeclarationOnly": true /* Only output d.ts files and not JavaScript files. */,
    // "sourceMap": true,                                /* Create source map files for emitted JavaScript files. */
    // "inlineSourceMap": true,                          /* Include sourcemap files inside the emitted JavaScript. */
    // "outFile": "./",                                  /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
    // "outDir": "./",                                   /* Specify an output folder for all emitted files. */
    // "removeComments": true,                           /* Disable emitting comments. */
    // "noEmit": true,                                   /* Disable emitting files from a compilation. */
    // "importHelpers": true,                            /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
    // "importsNotUsedAsValues": "remove",               /* Specify emit/checking behavior for imports that are only used for types. */
    // "downlevelIteration": true,                       /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
    // "sourceRoot": "",                                 /* Specify the root path for debuggers to find the reference source code. */
    // "mapRoot": "",                                    /* Specify the location where debugger should locate map files instead of generated locations. */
    // "inlineSources": true,                            /* Include source code in the sourcemaps inside the emitted JavaScript. */
    // "emitBOM": true,                                  /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
    // "newLine": "crlf",                                /* Set the newline character for emitting files. */
    // "stripInternal": true,                            /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
    // "noEmitHelpers": true,                            /* Disable generating custom helper functions like '__extends' in compiled output. */
    // "noEmitOnError": true,                            /* Disable emitting files if any type checking errors are reported. */
    // "preserveConstEnums": true,                       /* Disable erasing 'const enum' declarations in generated code. */
    // "declarationDir": "./",                           /* Specify the output directory for generated declaration files. */
    // "preserveValueImports": true,                     /* Preserve unused imported values in the JavaScript output that would otherwise be removed. */

    /* Interop Constraints */
    // "isolatedModules": true,                          /* Ensure that each file can be safely transpiled without relying on other imports. */
    // "verbatimModuleSyntax": true,                     /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
    "allowSyntheticDefaultImports": true /* Allow 'import x from y' when a module doesn't have a default export. */,
    "esModuleInterop": true /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */,
    // "preserveSymlinks": true,                         /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
    "forceConsistentCasingInFileNames": true /* Ensure that casing is correct in imports. */,

    /* Type Checking */
    "strict": true /* Enable all strict type-checking options. */,
    // "noImplicitAny": true,                            /* Enable error reporting for expressions and declarations with an implied 'any' type. */
    // "strictNullChecks": true,                         /* When type checking, take into account 'null' and 'undefined'. */
    // "strictFunctionTypes": true,                      /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
    // "strictBindCallApply": true,                      /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
    // "strictPropertyInitialization": true,             /* Check for class properties that are declared but not set in the constructor. */
    // "noImplicitThis": true,                           /* Enable error reporting when 'this' is given the type 'any'. */
    // "useUnknownInCatchVariables": true,               /* Default catch clause variables as 'unknown' instead of 'any'. */
    // "alwaysStrict": true,                             /* Ensure 'use strict' is always emitted. */
    // "noUnusedLocals": true,                           /* Enable error reporting when local variables aren't read. */
    // "noUnusedParameters": true,                       /* Raise an error when a function parameter isn't read. */
    // "exactOptionalPropertyTypes": true,               /* Interpret optional property types as written, rather than adding 'undefined'. */
    // "noImplicitReturns": true,                        /* Enable error reporting for codepaths that do not explicitly return in a function. */
    // "noFallthroughCasesInSwitch": true,               /* Enable error reporting for fallthrough cases in switch statements. */
    // "noUncheckedIndexedAccess": true,                 /* Add 'undefined' to a type when accessed using an index. */
    // "noImplicitOverride": true,                       /* Ensure overriding members in derived classes are marked with an override modifier. */
    // "noPropertyAccessFromIndexSignature": true,       /* Enforces using indexed accessors for keys declared using an indexed type. */
    // "allowUnusedLabels": true,                        /* Disable error reporting for unused labels. */
    // "allowUnreachableCode": true,                     /* Disable error reporting for unreachable code. */

    /* Completeness */
    // "skipDefaultLibCheck": true,                      /* Skip type checking .d.ts files that are included with TypeScript. */
    "skipLibCheck": true /* Skip type checking all .d.ts files. */
  }
}


================================================
FILE: util.ts
================================================
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
import { tmpdir } from 'os';
import { lstat } from 'fs/promises';
import { mkdirp } from 'mkdirp';

type Model = {
  "language": string
  "url": string
  "name": string
  "label": string
  "size": number
  "notes": string
  "type": string
}

export const videoExtensions = ["mp4", "mov", "mkv", "avi", "webm"];
export const audioExtensions = [
  "wav",
  "mp3",
  "ogg",
  "flac",
  "aac",
  "wma",
  "m4a",
];

export const __filename = fileURLToPath(import.meta.url);
export const __dirname = dirname(__filename);
export const workingDir = join(tmpdir(), "gen-subs")
export const getModelDir = async () => {
  try {
    await lstat(join(workingDir, "models"))
    return join(workingDir, "models")
  } catch {
    await mkdirp(join(workingDir, "models"))
    return join(workingDir, "models")
  }
}

export const isModelDownloaded = async (name: string) => {
  return await lstat(join(workingDir, "models", name))
    .then(() => true)
    .catch(() => false);
}

export const formatSize = (size: number) => {
  if (size < 1024) {
    return size + 'MB';
  } else {
    // Divide by 1024 and fix to 2 decimal places
    return (size / 1024).toFixed(2) + 'GB';
  }
}

export const models: Model[] = [
  {
    "language": "English",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip",
    "name": "vosk-model-small-en-us-0.15",
    "label": "small-en-us-0.15",
    "size": 40,
    "notes": "Lightweight wideband model for Android and RPi",
    "type": "local"
  },
  {
    "language": "English",
    "url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.22.zip",
    "name": "vosk-model-en-us-0.22",
    "label": "en-us-0.22",
    "size": 1800,
    "notes": "Accurate generic US English model",
    "type": "local"
  },
  {
    "language": "English",
    "url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.22-lgraph.zip",
    "name": "vosk-model-en-us-0.22-lgraph",
    "label": "en-us-0.22-lgraph",
    "size": 128,
    "notes": "Big US English model with dynamic graph",
    "type": "local"
  },
  {
    "language": "English",
    "url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.42-gigaspeech.zip",
    "name": "vosk-model-en-us-0.42-gigaspeech",
    "label": "en-us-0.42-gigaspeech",
    "size": 2300,
    "notes": "Accurate generic US English model trained by Kaldi on Gigaspeech. Mostly for podcasts, not for telephony",
    "type": "local"
  },
  {
    "language": "English",
    "url": "https://alphacephei.com/vosk/models/vosk-model-en-us-daanzu-20200905.zip",
    "name": "vosk-model-en-us-daanzu-20200905",
    "label": "en-us-daanzu-20200905",
    "size": 1000,
    "notes": "Wideband model for dictation from Kaldi-active-grammar project",
    "type": "local"
  },
  {
    "language": "English",
    "url": "https://alphacephei.com/vosk/models/vosk-model-en-us-daanzu-20200905-lgraph.zip",
    "name": "vosk-model-en-us-daanzu-20200905-lgraph",
    "label": "en-us-daanzu-20200905-lgraph",
    "size": 129,
    "notes": "Wideband model for dictation from Kaldi-active-grammar project with configurable graph",
    "type": "local"
  },
  {
    "language": "English",
    "url": "https://alphacephei.com/vosk/models/vosk-model-en-us-librispeech-0.2.zip",
    "name": "vosk-model-en-us-librispeech-0.2",
    "label": "en-us-librispeech-0.2",
    "size": 845,
    "notes": "Repackaged Librispeech model from Kaldi, not very accurate",
    "type": "local"
  },
  {
    "language": "English",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-en-us-zamia-0.5.zip",
    "name": "vosk-model-small-en-us-zamia-0.5",
    "label": "small-en-us-zamia-0.5",
    "size": 49,
    "notes": "Repackaged Zamia model f_250, mainly for research",
    "type": "local"
  },
  {
    "language": "English",
    "url": "https://alphacephei.com/vosk/models/vosk-model-en-us-aspire-0.2.zip",
    "name": "vosk-model-en-us-aspire-0.2",
    "label": "en-us-aspire-0.2",
    "size": 1400,
    "notes": "Kaldi original ASPIRE model, not very accurate",
    "type": "local"
  },
  {
    "language": "English",
    "url": "https://alphacephei.com/vosk/models/vosk-model-en-us-0.21.zip",
    "name": "vosk-model-en-us-0.21",
    "label": "en-us-0.21",
    "size": 1600,
    "notes": "Wideband model previous generation",
    "type": "local"
  },
  {
    "language": "Indian English",
    "url": "https://alphacephei.com/vosk/models/vosk-model-en-in-0.5.zip",
    "name": "vosk-model-en-in-0.5",
    "label": "en-in-0.5",
    "size": 1000,
    "notes": "Generic Indian English model for telecom and broadcast",
    "type": "local"
  },
  {
    "language": "Indian English",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-en-in-0.4.zip",
    "name": "vosk-model-small-en-in-0.4",
    "label": "small-en-in-0.4",
    "size": 36,
    "notes": "Lightweight Indian English model for mobile applications",
    "type": "local"
  },
  {
    "language": "Chinese",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip",
    "name": "vosk-model-small-cn-0.22",
    "label": "small-cn-0.22",
    "size": 42,
    "notes": "Lightweight model for Android and RPi",
    "type": "local"
  },
  {
    "language": "Chinese",
    "url": "https://alphacephei.com/vosk/models/vosk-model-cn-0.22.zip",
    "name": "vosk-model-cn-0.22",
    "label": "cn-0.22",
    "size": 1300,
    "notes": "Big generic Chinese model for server processing",
    "type": "local"
  },
  {
    "language": "Chinese",
    "url": "https://alphacephei.com/vosk/models/vosk-model-cn-kaldi-multicn-0.15.zip",
    "name": "vosk-model-cn-kaldi-multicn-0.15",
    "label": "cn-kaldi-multicn-0.15",
    "size": 1500,
    "notes": "Original Wideband Kaldi multi-cn model from Kaldi with Vosk LM",
    "type": "local"
  },
  {
    "language": "Russian",
    "url": "https://alphacephei.com/vosk/models/vosk-model-ru-0.42.zip",
    "name": "vosk-model-ru-0.42",
    "label": "ru-0.42",
    "size": 1800,
    "notes": "Big mixed band Russian model for servers",
    "type": "local"
  },
  {
    "language": "Russian",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-ru-0.22.zip",
    "name": "vosk-model-small-ru-0.22",
    "label": "small-ru-0.22",
    "size": 45,
    "notes": "Lightweight wideband model for Android/iOS and RPi",
    "type": "local"
  },
  {
    "language": "Russian",
    "url": "https://alphacephei.com/vosk/models/vosk-model-ru-0.22.zip",
    "name": "vosk-model-ru-0.22",
    "label": "ru-0.22",
    "size": 1500,
    "notes": "Big mixed band Russian model for servers",
    "type": "local"
  },
  {
    "language": "Russian",
    "url": "https://alphacephei.com/vosk/models/vosk-model-ru-0.10.zip",
    "name": "vosk-model-ru-0.10",
    "label": "ru-0.10",
    "size": 2500,
    "notes": "Big narrowband Russian model for servers",
    "type": "local"
  },
  {
    "language": "French",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-fr-0.22.zip",
    "name": "vosk-model-small-fr-0.22",
    "label": "small-fr-0.22",
    "size": 41,
    "notes": "Lightweight wideband model for Android/iOS and RPi",
    "type": "local"
  },
  {
    "language": "French",
    "url": "https://alphacephei.com/vosk/models/vosk-model-fr-0.22.zip",
    "name": "vosk-model-fr-0.22",
    "label": "fr-0.22",
    "size": 1400,
    "notes": "Big accurate model for servers",
    "type": "local"
  },
  {
    "language": "French",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-fr-pguyot-0.3.zip",
    "name": "vosk-model-small-fr-pguyot-0.3",
    "label": "small-fr-pguyot-0.3",
    "size": 39,
    "notes": "Lightweight wideband model for Android and RPi trained by Paul Guyot",
    "type": "local"
  },
  {
    "language": "French",
    "url": "https://alphacephei.com/vosk/models/vosk-model-fr-0.6-linto-2.2.0.zip",
    "name": "vosk-model-fr-0.6-linto-2.2.0",
    "label": "fr-0.6-linto-2.2.0",
    "size": 1500,
    "notes": "Model from LINTO project",
    "type": "local"
  },
  {
    "language": "German",
    "url": "https://alphacephei.com/vosk/models/vosk-model-de-0.21.zip",
    "name": "vosk-model-de-0.21",
    "label": "de-0.21",
    "size": 1900,
    "notes": "Big German model for telephony and server",
    "type": "local"
  },
  {
    "language": "German",
    "url": "https://alphacephei.com/vosk/models/vosk-model-de-tuda-0.6-900k.zip",
    "name": "vosk-model-de-tuda-0.6-900k",
    "label": "de-tuda-0.6-900k",
    "size": 4400,
    "notes": "Latest big wideband model from Tuda-DE project",
    "type": "local"
  },
  {
    "language": "German",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-de-zamia-0.3.zip",
    "name": "vosk-model-small-de-zamia-0.3",
    "label": "small-de-zamia-0.3",
    "size": 49,
    "notes": "Zamia f_250 small model repackaged (not recommended)",
    "type": "local"
  },
  {
    "language": "German",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-de-0.15.zip",
    "name": "vosk-model-small-de-0.15",
    "label": "small-de-0.15",
    "size": 45,
    "notes": "Lightweight wideband model for Android and RPi",
    "type": "local"
  },
  {
    "language": "Spanish",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-es-0.42.zip",
    "name": "vosk-model-small-es-0.42",
    "label": "small-es-0.42",
    "size": 39,
    "notes": "Lightweight wideband model for Android and RPi",
    "type": "local"
  },
  {
    "language": "Spanish",
    "url": "https://alphacephei.com/vosk/models/vosk-model-es-0.42.zip",
    "name": "vosk-model-es-0.42",
    "label": "es-0.42",
    "size": 1400,
    "notes": "Big model for Spanish",
    "type": "local"
  },
  {
    "language": "Portuguese/Brazilian",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-pt-0.3.zip",
    "name": "vosk-model-small-pt-0.3",
    "label": "small-pt-0.3",
    "size": 31,
    "notes": "Lightweight wideband model for Android and RPi",
    "type": "local"
  },
  {
    "language": "Portuguese/Brazilian",
    "url": "https://alphacephei.com/vosk/models/vosk-model-pt-fb-v0.1.1-20220516_2113.zip",
    "name": "vosk-model-pt-fb-v0.1.1-20220516_2113",
    "label": "pt-fb-v0.1.1-20220516_2113",
    "size": 1600,
    "notes": "Big model from FalaBrazil",
    "type": "local"
  },
  {
    "language": "Greek",
    "url": "https://alphacephei.com/vosk/models/vosk-model-el-gr-0.7.zip",
    "name": "vosk-model-el-gr-0.7",
    "label": "el-gr-0.7",
    "size": 1100,
    "notes": "Big narrowband Greek model for server processing, not extremely accurate though",
    "type": "local"
  },
  {
    "language": "Turkish",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-tr-0.3.zip",
    "name": "vosk-model-small-tr-0.3",
    "label": "small-tr-0.3",
    "size": 35,
    "notes": "Lightweight wideband model for Android and RPi",
    "type": "local"
  },
  {
    "language": "Vietnamese",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-vn-0.4.zip",
    "name": "vosk-model-small-vn-0.4",
    "label": "small-vn-0.4",
    "size": 32,
    "notes": "Lightweight Vietnamese model",
    "type": "local"
  },
  {
    "language": "Vietnamese",
    "url": "https://alphacephei.com/vosk/models/vosk-model-vn-0.4.zip",
    "name": "vosk-model-vn-0.4",
    "label": "vn-0.4",
    "size": 78,
    "notes": "Bigger Vietnamese model for server",
    "type": "local"
  },
  {
    "language": "Italian",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-it-0.22.zip",
    "name": "vosk-model-small-it-0.22",
    "label": "small-it-0.22",
    "size": 48,
    "notes": "Lightweight model for Android and RPi",
    "type": "local"
  },
  {
    "language": "Italian",
    "url": "https://alphacephei.com/vosk/models/vosk-model-it-0.22.zip",
    "name": "vosk-model-it-0.22",
    "label": "it-0.22",
    "size": 1200,
    "notes": "Big generic Italian model for servers",
    "type": "local"
  },
  {
    "language": "Dutch",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-nl-0.22.zip",
    "name": "vosk-model-small-nl-0.22",
    "label": "small-nl-0.22",
    "size": 39,
    "notes": "Lightweight model for Dutch",
    "type": "local"
  },
  {
    "language": "Dutch",
    "url": "https://alphacephei.com/vosk/models/vosk-model-nl-spraakherkenning-0.6.zip",
    "name": "vosk-model-nl-spraakherkenning-0.6",
    "label": "nl-spraakherkenning-0.6",
    "size": 860,
    "notes": "Medium Dutch model from Kaldi_NL",
    "type": "local"
  },
  {
    "language": "Dutch",
    "url": "https://alphacephei.com/vosk/models/vosk-model-nl-spraakherkenning-0.6-lgraph.zip",
    "name": "vosk-model-nl-spraakherkenning-0.6-lgraph",
    "label": "nl-spraakherkenning-0.6-lgraph",
    "size": 100,
    "notes": "Smaller model with dynamic graph",
    "type": "local"
  },
  {
    "language": "Catalan",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-ca-0.4.zip",
    "name": "vosk-model-small-ca-0.4",
    "label": "small-ca-0.4",
    "size": 42,
    "notes": "Lightweight wideband model for Android and RPi for Catalan",
    "type": "local"
  },
  {
    "language": "Arabic",
    "url": "https://alphacephei.com/vosk/models/vosk-model-ar-mgb2-0.4.zip",
    "name": "vosk-model-ar-mgb2-0.4",
    "label": "ar-mgb2-0.4",
    "size": 318,
    "notes": "Repackaged Arabic model trained on MGB2 dataset from Kaldi",
    "type": "local"
  },
  {
    "language": "Arabic",
    "url": "https://alphacephei.com/vosk/models/vosk-model-ar-0.22-linto-1.1.0.zip",
    "name": "vosk-model-ar-0.22-linto-1.1.0",
    "label": "ar-0.22-linto-1.1.0",
    "size": 1300,
    "notes": "Big model from LINTO project",
    "type": "local"
  },
  {
    "language": "Farsi",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-fa-0.4.zip",
    "name": "vosk-model-small-fa-0.4",
    "label": "small-fa-0.4",
    "size": 47,
    "notes": "Lightweight wideband model for Android and RPi for Farsi (Persian)",
    "type": "local"
  },
  {
    "language": "Farsi",
    "url": "https://alphacephei.com/vosk/models/vosk-model-fa-0.5.zip",
    "name": "vosk-model-fa-0.5",
    "label": "fa-0.5",
    "size": 1000,
    "notes": "Model with large vocabulary, not yet accurate but better than before (Persian)",
    "type": "local"
  },
  {
    "language": "Farsi",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-fa-0.5.zip",
    "name": "vosk-model-small-fa-0.5",
    "label": "small-fa-0.5",
    "size": 60,
    "notes": "Bigger small model for desktop application (Persian)",
    "type": "local"
  },
  {
    "language": "Filipino",
    "url": "https://alphacephei.com/vosk/models/vosk-model-tl-ph-generic-0.6.zip",
    "name": "vosk-model-tl-ph-generic-0.6",
    "label": "tl-ph-generic-0.6",
    "size": 320,
    "notes": "Medium wideband model for Filipino (Tagalog) by feddybear",
    "type": "local"
  },
  {
    "language": "Ukrainian",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-uk-v3-nano.zip",
    "name": "vosk-model-small-uk-v3-nano",
    "label": "small-uk-v3-nano",
    "size": 73,
    "notes": "Nano model from Speech Recognition for Ukrainian",
    "type": "local"
  },
  {
    "language": "Ukrainian",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-uk-v3-small.zip",
    "name": "vosk-model-small-uk-v3-small",
    "label": "small-uk-v3-small",
    "size": 133,
    "notes": "Small model from Speech Recognition for Ukrainian",
    "type": "local"
  },
  {
    "language": "Ukrainian",
    "url": "https://alphacephei.com/vosk/models/vosk-model-uk-v3.zip",
    "name": "vosk-model-uk-v3",
    "label": "uk-v3",
    "size": 343,
    "notes": "Bigger model from Speech Recognition for Ukrainian",
    "type": "local"
  },
  {
    "language": "Ukrainian",
    "url": "https://alphacephei.com/vosk/models/vosk-model-uk-v3-lgraph.zip",
    "name": "vosk-model-uk-v3-lgraph",
    "label": "uk-v3-lgraph",
    "size": 325,
    "notes": "Big dynamic model from Speech Recognition for Ukrainian",
    "type": "local"
  },
  {
    "language": "Kazakh",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-kz-0.15.zip",
    "name": "vosk-model-small-kz-0.15",
    "label": "small-kz-0.15",
    "size": 42,
    "notes": "Small mobile model from SAIDA_Kazakh",
    "type": "local"
  },
  {
    "language": "Kazakh",
    "url": "https://alphacephei.com/vosk/models/vosk-model-kz-0.15.zip",
    "name": "vosk-model-kz-0.15",
    "label": "kz-0.15",
    "size": 378,
    "notes": "Bigger wideband model SAIDA_Kazakh",
    "type": "local"
  },
  {
    "language": "Swedish",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-sv-rhasspy-0.15.zip",
    "name": "vosk-model-small-sv-rhasspy-0.15",
    "label": "small-sv-rhasspy-0.15",
    "size": 289,
    "notes": "Repackaged model from Rhasspy project",
    "type": "local"
  },
  {
    "language": "Japanese",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-ja-0.22.zip",
    "name": "vosk-model-small-ja-0.22",
    "label": "small-ja-0.22",
    "size": 48,
    "notes": "Lightweight wideband model for Japanese",
    "type": "local"
  },
  {
    "language": "Japanese",
    "url": "https://alphacephei.com/vosk/models/vosk-model-ja-0.22.zip",
    "name": "vosk-model-ja-0.22",
    "label": "ja-0.22",
    "size": 1024,
    "notes": "Big model for Japanese",
    "type": "local"
  },
  {
    "language": "Esperanto",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-eo-0.42.zip",
    "name": "vosk-model-small-eo-0.42",
    "label": "small-eo-0.42",
    "size": 42,
    "notes": "Lightweight model for Esperanto",
    "type": "local"
  },
  {
    "language": "Hindi",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-hi-0.22.zip",
    "name": "vosk-model-small-hi-0.22",
    "label": "small-hi-0.22",
    "size": 42,
    "notes": "Lightweight model for Hindi",
    "type": "local"
  },
  {
    "language": "Hindi",
    "url": "https://alphacephei.com/vosk/models/vosk-model-hi-0.22.zip",
    "name": "vosk-model-hi-0.22",
    "label": "hi-0.22",
    "size": 1536,
    "notes": "Big accurate model for servers",
    "type": "local"
  },
  {
    "language": "Czech",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-cs-0.4-rhasspy.zip",
    "name": "vosk-model-small-cs-0.4-rhasspy",
    "label": "small-cs-0.4-rhasspy",
    "size": 44,
    "notes": "Lightweight model for Czech from Rhasspy project",
    "type": "local"
  },
  {
    "language": "Polish",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-pl-0.22.zip",
    "name": "vosk-model-small-pl-0.22",
    "label": "small-pl-0.22",
    "size": 50,
    "notes": "Lightweight model for Polish",
    "type": "local"
  },
  {
    "language": "Uzbek",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-uz-0.22.zip",
    "name": "vosk-model-small-uz-0.22",
    "label": "small-uz-0.22",
    "size": 49,
    "notes": "Lightweight model for Uzbek",
    "type": "local"
  },
  {
    "language": "Korean",
    "url": "https://alphacephei.com/vosk/models/vosk-model-small-ko-0.22.zip",
    "name": "vosk-model-small-ko-0.22",
    "label": "small-ko-0.22",
    "size": 82,
    "notes": "Lightweight model for Korean",
    "type": "local"
  },
  {
    "language": "Breton",
    "url": "https://alphacephei.com/vosk/models/vosk-model-br-0.8.zip",
    "name": "vosk-model-br-0.8",
    "label": "br-0.8",
    "size": 70,
    "notes": "Breton model from vosk-br project",
    "type": "local"
  },
  {
    "language": "Speaker identification model",
    "url": "https://alphacephei.com/vosk/models/vosk-model-spk-0.4.zip",
    "name": "vosk-model-spk-0.4",
    "label": "spk-0.4",
    "size": 13,
    "notes": "Model for speaker identification, should work for all languages",
    "type": "local"
  }
];