Repository: toon-format/toon Branch: main Commit: d5f50a2ce5e0 Files: 114 Total size: 1.5 MB Directory structure: gitextract_qnn9ipfr/ ├── .editorconfig ├── .github/ │ └── workflows/ │ ├── ci.yml │ ├── deploy.yml │ ├── pr-title.yml │ └── release.yml ├── .gitignore ├── .npmrc ├── .vscode/ │ ├── extensions.json │ └── settings.json ├── LICENSE ├── SPEC.md ├── automd.config.ts ├── benchmarks/ │ ├── README.md │ ├── data/ │ │ └── github-repos.json │ ├── package.json │ ├── results/ │ │ ├── accuracy/ │ │ │ └── models/ │ │ │ ├── claude-haiku-4-5-20251001 │ │ │ ├── gemini-3-flash-preview │ │ │ ├── gpt-5-nano │ │ │ └── grok-4-1-fast-non-reasoning │ │ ├── retrieval-accuracy.md │ │ └── token-efficiency.md │ ├── scripts/ │ │ ├── accuracy-benchmark.ts │ │ ├── fetch-github-repos.ts │ │ └── token-efficiency-benchmark.ts │ └── src/ │ ├── constants.ts │ ├── datasets.ts │ ├── evaluate.ts │ ├── formatters.ts │ ├── normalize.ts │ ├── questions/ │ │ ├── analytics.ts │ │ ├── event-logs.ts │ │ ├── github.ts │ │ ├── index.ts │ │ ├── nested-config.ts │ │ ├── nested.ts │ │ ├── structural-validation.ts │ │ ├── structure.ts │ │ ├── tabular.ts │ │ └── utils.ts │ ├── report.ts │ ├── storage.ts │ ├── types.ts │ └── utils.ts ├── commitlint.config.ts ├── docs/ │ ├── .vitepress/ │ │ ├── config.ts │ │ ├── meta.ts │ │ └── theme/ │ │ ├── components/ │ │ │ ├── PlaygroundLayout.vue │ │ │ └── VPInput.vue │ │ ├── index.ts │ │ ├── overrides.css │ │ └── vars.css │ ├── cli/ │ │ └── index.md │ ├── ecosystem/ │ │ ├── implementations.md │ │ └── tools-and-playgrounds.md │ ├── guide/ │ │ ├── benchmarks.md │ │ ├── format-overview.md │ │ ├── getting-started.md │ │ └── llm-prompts.md │ ├── index.md │ ├── package.json │ ├── playground.md │ ├── reference/ │ │ ├── api.md │ │ ├── efficiency-formalization.md │ │ ├── spec.md │ │ └── syntax-cheatsheet.md │ ├── uno.config.ts │ └── wrangler.toml ├── eslint.config.ts ├── package.json ├── packages/ │ ├── cli/ │ │ ├── README.md │ │ ├── bin/ │ │ │ └── toon.mjs │ │ ├── package.json │ │ ├── src/ │ │ │ ├── cli-entry.ts │ │ │ ├── conversion.ts │ │ │ ├── index.ts │ │ │ ├── json-from-events.ts │ │ │ ├── json-stringify-stream.ts │ │ │ ├── types.ts │ │ │ └── utils.ts │ │ ├── test/ │ │ │ ├── index.test.ts │ │ │ ├── json-from-events.test.ts │ │ │ ├── json-stringify-stream.test.ts │ │ │ └── utils.ts │ │ └── tsdown.config.ts │ └── toon/ │ ├── README.md │ ├── package.json │ ├── src/ │ │ ├── constants.ts │ │ ├── decode/ │ │ │ ├── decoders.ts │ │ │ ├── event-builder.ts │ │ │ ├── expand.ts │ │ │ ├── parser.ts │ │ │ ├── scanner.ts │ │ │ └── validation.ts │ │ ├── encode/ │ │ │ ├── encoders.ts │ │ │ ├── folding.ts │ │ │ ├── normalize.ts │ │ │ ├── primitives.ts │ │ │ └── replacer.ts │ │ ├── index.ts │ │ ├── shared/ │ │ │ ├── literal-utils.ts │ │ │ ├── string-utils.ts │ │ │ └── validation.ts │ │ └── types.ts │ ├── test/ │ │ ├── decode.test.ts │ │ ├── decodeStream.test.ts │ │ ├── decodeStreamAsync.test.ts │ │ ├── encode.test.ts │ │ ├── encodeLines.test.ts │ │ ├── normalization.test.ts │ │ ├── replacer.test.ts │ │ └── types.ts │ └── tsdown.config.ts ├── pnpm-workspace.yaml └── tsconfig.json ================================================ FILE CONTENTS ================================================ ================================================ FILE: .editorconfig ================================================ root = true [*] charset = utf-8 indent_style = space indent_size = 2 end_of_line = lf insert_final_newline = true trim_trailing_whitespace = true ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: push: branches: - main pull_request: branches: - main concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true permissions: contents: read jobs: ci: runs-on: ubuntu-slim timeout-minutes: 10 steps: - name: Checkout uses: actions/checkout@v5 with: persist-credentials: false - name: Setup pnpm uses: pnpm/action-setup@v4 - name: Setup Node.js uses: actions/setup-node@v6 with: node-version: 24 - name: Get pnpm store directory id: pnpm-cache run: echo "pnpm_cache_dir=$(pnpm store path)" >> $GITHUB_OUTPUT - name: Cache pnpm dependencies uses: actions/cache@v4 with: path: ${{ steps.pnpm-cache.outputs.pnpm_cache_dir }} key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }} restore-keys: | ${{ runner.os }}-pnpm-store- - name: Install dependencies run: pnpm install --frozen-lockfile - name: Lint run: pnpm run lint - name: Typecheck run: pnpm run test:types - name: Test run: pnpm run test ================================================ FILE: .github/workflows/deploy.yml ================================================ name: Deploy Docs on: push: branches: - main paths: - docs/** - automd.config.ts - package.json - eslint.config.mjs concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true permissions: {} jobs: deploy: name: Deploy Docs runs-on: ubuntu-slim steps: - name: Checkout uses: actions/checkout@v5 with: persist-credentials: false - name: Setup pnpm uses: pnpm/action-setup@v4 - name: Setup Node.js uses: actions/setup-node@v6 with: node-version: 24 - name: Get pnpm store directory id: pnpm-cache run: echo "pnpm_cache_dir=$(pnpm store path)" >> $GITHUB_OUTPUT - name: Cache pnpm dependencies uses: actions/cache@v4 with: path: ${{ steps.pnpm-cache.outputs.pnpm_cache_dir }} key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }} restore-keys: | ${{ runner.os }}-pnpm-store- - name: Install dependencies run: pnpm install --frozen-lockfile - name: Build docs run: pnpm run docs:build - name: Deploy to Cloudflare run: cd docs && npx wrangler deploy env: CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} ================================================ FILE: .github/workflows/pr-title.yml ================================================ name: Check PR Title on: pull_request: types: [opened, edited] permissions: contents: read concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: lint-pr-title: name: Lint PR title runs-on: ubuntu-slim if: ${{ (github.event.action == 'opened' || github.event.changes.title != null) && github.actor != 'renovate[bot]' }} steps: - name: Checkout uses: actions/checkout@v5 with: persist-credentials: false # Only fetch the config file from the repository sparse-checkout-cone-mode: false sparse-checkout: commitlint.config.ts - name: Install dependencies run: npm install -D @commitlint/cli @commitlint/config-conventional - name: Validate PR title with commitlint run: echo "$PR_TITLE" | npx commitlint env: PR_TITLE: ${{ github.event.pull_request.title }} ================================================ FILE: .github/workflows/release.yml ================================================ name: Release + Publish on: push: tags: - 'v*' concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: release: name: Release runs-on: ubuntu-slim permissions: id-token: write contents: write steps: - name: Checkout uses: actions/checkout@v5 with: fetch-depth: 0 # Required for fetching tags and generating release notes persist-credentials: true - name: Setup pnpm uses: pnpm/action-setup@v4 - name: Setup Node.js uses: actions/setup-node@v6 with: node-version: 24 registry-url: https://registry.npmjs.org/ cache: pnpm - name: Generate changelog and create GitHub release run: npx changelogithub env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Install dependencies run: pnpm install --frozen-lockfile - name: Build packages run: pnpm run build - name: Publish packages to npm run: npm install -g npm@latest && pnpm -r publish --access public --no-git-checks env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} ================================================ FILE: .gitignore ================================================ dist node_modules .DS_Store .env docs/.vitepress/dist docs/.vitepress/cache packages/toon/test/fixtures/*.json packages/toon/test/fixtures/*.toon ================================================ FILE: .npmrc ================================================ shamefully-hoist=true ================================================ FILE: .vscode/extensions.json ================================================ { "recommendations": [ "dbaeumer.vscode-eslint" ] } ================================================ FILE: .vscode/settings.json ================================================ { // Disable the default formatter, use ESLint instead "prettier.enable": false, "editor.formatOnSave": false, // Auto-fix "editor.codeActionsOnSave": { "source.fixAll.eslint": "explicit", "source.organizeImports": "never" }, // Silent the stylistic rules in you IDE, but still auto-fix them "eslint.rules.customizations": [ { "rule": "style/*", "severity": "off" }, { "rule": "format/*", "severity": "off" }, { "rule": "*-indent", "severity": "off" }, { "rule": "*-spacing", "severity": "off" }, { "rule": "*-spaces", "severity": "off" }, { "rule": "*-order", "severity": "off" }, { "rule": "*-dangle", "severity": "off" }, { "rule": "*-newline", "severity": "off" }, { "rule": "*quotes", "severity": "off" }, { "rule": "*semi", "severity": "off" } ], // Enable ESLint for all supported languages "eslint.validate": [ "javascript", "javascriptreact", "typescript", "typescriptreact", "vue", "html", "markdown", "json", "jsonc", "yaml", "toml", "xml", "gql", "graphql", "astro", "svelte", "css", "less", "scss", "pcss", "postcss" ] } ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2025-PRESENT Johann Schopplich Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: SPEC.md ================================================ # TOON Specification The TOON specification has moved to a dedicated repository: [github.com/toon-format/spec](https://github.com/toon-format/spec) ## Current Version **Version 3.0** (2025-11-24) ## Quick Links - **[Full Specification](https://github.com/toon-format/spec/blob/main/SPEC.md)** - Complete technical specification - **[Changelog](https://github.com/toon-format/spec/blob/main/CHANGELOG.md)** - Version history - **[Examples](https://github.com/toon-format/spec/tree/main/examples)** - Example TOON files - **[Conformance Tests](https://github.com/toon-format/spec/tree/main/tests)** - Language-agnostic test fixtures for implementations - **[Contributing](https://github.com/toon-format/spec/blob/main/CONTRIBUTING.md)** - How to propose spec changes ## Why a Separate Repo? The specification has been moved to `toon-format/spec` to: - Provide a canonical, language-agnostic source of truth - Enable independent versioning of spec and implementations - Support the growing community of TOON implementations across multiple languages - Facilitate collaboration on spec evolution through a dedicated RFC process ## This Repository This repository (`toon-format/toon`) remains the **reference implementation** in TypeScript/JavaScript. For specification discussions, issues, and contributions, please use the spec repository. ================================================ FILE: automd.config.ts ================================================ import type { Config } from 'automd' const config: Config = { input: ['docs/guide/benchmarks.md'], } export default config ================================================ FILE: benchmarks/README.md ================================================ # TOON Benchmarks Benchmarks measuring TOON's **token efficiency** and **retrieval accuracy** compared to JSON, XML, YAML, and CSV. > [!NOTE] > Results are automatically embedded in the [main README](https://github.com/toon-format/toon/#benchmarks). This guide focuses on running the benchmarks locally. ## Quick Start ```bash # Run token efficiency benchmark pnpm benchmark:tokens # Run retrieval accuracy benchmark (requires API keys) pnpm benchmark:accuracy ``` ## Token Efficiency Benchmark Measures token count reduction across JSON, XML, YAML, CSV, and TOON: 1. Generate datasets (GitHub repos, analytics, orders) 2. Convert to all formats (TOON, JSON, XML, YAML, CSV) 3. Tokenize using `gpt-tokenizer` (`o200k_base` encoding) 4. Calculate savings and generate report ```bash pnpm benchmark:tokens ``` Results are saved to `results/token-efficiency.md`. ## Retrieval Accuracy Benchmark Tests how well LLMs can answer questions about data in different formats (TOON, JSON, JSON compact, XML, YAML, CSV): 1. Generate 209 questions across 11 datasets (6 primary + 5 structural validation; CSV only included for datasets with flat/tabular structure) 2. Convert each dataset to all supported formats 3. Query each LLM with formatted data + question 4. Validate answers deterministically using type-aware comparison (no LLM judge needed) 5. Aggregate metrics and generate report ### Setup 1. Edit [`src/evaluate.ts`](./src/evaluate.ts) and add models to the exported `models` array: ```ts export const models: LanguageModelV3[] = [ openai('gpt-5-nano'), anthropic('claude-haiku-4-5-20251001'), google('gemini-3-flash-preview'), xai('grok-4-1-fast-non-reasoning'), // Add your models here ] ``` 2. Duplicate `.env.example` to `.env` and add your API keys: ```bash cp .env.example .env ``` ### Usage ```bash # Full benchmark pnpm benchmark:accuracy # Dry run (10 questions only, for testing setup) DRY_RUN=true pnpm benchmark:accuracy ``` Running the script will: 1. Prompt you to select which models to test. 2. Skip models with existing results (rerun to overwrite). 3. Show progress with rate limiting. 4. Save results to `results/accuracy/models/{model-id}.json`. 5. Generate report at `results/retrieval-accuracy.md`. ### Configuration Edit [`src/constants.ts`](./src/constants.ts) to adjust: - `MODEL_RPM_LIMITS` – Rate limits per model - `DEFAULT_CONCURRENCY` – Parallel tasks (default: 10) - `DRY_RUN_LIMITS` – Questions per dry run (default: 10) ## Project Structure ``` scripts/ ├── accuracy-benchmark.ts # Retrieval accuracy benchmark ├── token-efficiency-benchmark.ts # Token counting benchmark └── fetch-github-repos.ts # Update GitHub dataset src/ ├── constants.ts # Configuration ├── datasets.ts # Test data generators ├── evaluate.ts # LLM evaluation ├── formatters.ts # Format converters ├── normalize.ts # Answer normalization ├── report.ts # Markdown reports ├── storage.ts # Result caching ├── types.ts # Type definitions ├── utils.ts # Helpers └── questions/ # Question generators ├── analytics.ts ├── event-logs.ts ├── github.ts ├── index.ts ├── nested-config.ts ├── nested.ts ├── structural-validation.ts ├── structure.ts ├── tabular.ts └── utils.ts data/ └── github-repos.json # Top 100 GitHub repos results/ ├── token-efficiency.md # Token savings report ├── retrieval-accuracy.md # Accuracy report └── accuracy/models/ # Per-model results (JSON) ``` ================================================ FILE: benchmarks/data/github-repos.json ================================================ [ { "id": 28457823, "name": "freeCodeCamp", "repo": "freeCodeCamp/freeCodeCamp", "description": "freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming, and computer science for free.", "createdAt": "2014-12-24T17:49:19Z", "updatedAt": "2025-10-28T11:58:08Z", "pushedAt": "2025-10-28T10:17:16Z", "stars": 430886, "watchers": 8583, "forks": 42146, "defaultBranch": "main" }, { "id": 132750724, "name": "build-your-own-x", "repo": "codecrafters-io/build-your-own-x", "description": "Master programming by recreating your favorite technologies from scratch.", "createdAt": "2018-05-09T12:03:18Z", "updatedAt": "2025-10-28T12:37:11Z", "pushedAt": "2025-10-10T18:45:01Z", "stars": 430877, "watchers": 6332, "forks": 40453, "defaultBranch": "master" }, { "id": 21737465, "name": "awesome", "repo": "sindresorhus/awesome", "description": "😎 Awesome lists about all kinds of interesting topics", "createdAt": "2014-07-11T13:42:37Z", "updatedAt": "2025-10-28T12:40:21Z", "pushedAt": "2025-10-27T17:57:31Z", "stars": 410052, "watchers": 8017, "forks": 32029, "defaultBranch": "main" }, { "id": 13491895, "name": "free-programming-books", "repo": "EbookFoundation/free-programming-books", "description": ":books: Freely available programming books", "createdAt": "2013-10-11T06:50:37Z", "updatedAt": "2025-10-28T12:16:59Z", "pushedAt": "2025-10-28T01:52:13Z", "stars": 375307, "watchers": 9786, "forks": 65199, "defaultBranch": "main" }, { "id": 54346799, "name": "public-apis", "repo": "public-apis/public-apis", "description": "A collective list of free APIs", "createdAt": "2016-03-20T23:49:42Z", "updatedAt": "2025-10-28T12:33:14Z", "pushedAt": "2025-05-20T15:56:34Z", "stars": 374003, "watchers": 4400, "forks": 39473, "defaultBranch": "master" }, { "id": 85077558, "name": "developer-roadmap", "repo": "kamranahmedse/developer-roadmap", "description": "Interactive roadmaps, guides and other educational content to help developers grow in their careers.", "createdAt": "2017-03-15T13:45:52Z", "updatedAt": "2025-10-28T12:31:02Z", "pushedAt": "2025-10-28T11:09:58Z", "stars": 342136, "watchers": 6886, "forks": 43234, "defaultBranch": "master" }, { "id": 60493101, "name": "coding-interview-university", "repo": "jwasham/coding-interview-university", "description": "A complete computer science study plan to become a software engineer.", "createdAt": "2016-06-06T02:34:12Z", "updatedAt": "2025-10-28T12:21:02Z", "pushedAt": "2025-08-28T14:42:47Z", "stars": 331947, "watchers": 8511, "forks": 81057, "defaultBranch": "main" }, { "id": 83222441, "name": "system-design-primer", "repo": "donnemartin/system-design-primer", "description": "Learn how to design large-scale systems. Prep for the system design interview. Includes Anki flashcards.", "createdAt": "2017-02-26T16:15:28Z", "updatedAt": "2025-10-28T12:32:56Z", "pushedAt": "2025-05-21T11:13:33Z", "stars": 324409, "watchers": 6819, "forks": 52904, "defaultBranch": "master" }, { "id": 177736533, "name": "996.ICU", "repo": "996icu/996.ICU", "description": "Repo for counting stars and contributing. Press F to pay respect to glorious developers.", "createdAt": "2019-03-26T07:31:14Z", "updatedAt": "2025-10-28T11:07:13Z", "pushedAt": "2025-08-22T06:01:29Z", "stars": 274706, "watchers": 4216, "forks": 21029, "defaultBranch": "master" }, { "id": 21289110, "name": "awesome-python", "repo": "vinta/awesome-python", "description": "An opinionated list of awesome Python frameworks, libraries, software and resources.", "createdAt": "2014-06-27T21:00:06Z", "updatedAt": "2025-10-28T12:28:13Z", "pushedAt": "2025-10-16T13:40:58Z", "stars": 266661, "watchers": 6128, "forks": 26604, "defaultBranch": "master" }, { "id": 36633370, "name": "awesome-selfhosted", "repo": "awesome-selfhosted/awesome-selfhosted", "description": "A list of Free Software network services and web applications which can be hosted on your own servers", "createdAt": "2015-06-01T02:33:17Z", "updatedAt": "2025-10-28T12:24:53Z", "pushedAt": "2025-10-27T21:40:26Z", "stars": 255143, "watchers": 2990, "forks": 11802, "defaultBranch": "master" }, { "id": 88011908, "name": "project-based-learning", "repo": "practical-tutorials/project-based-learning", "description": "Curated list of project-based tutorials", "createdAt": "2017-04-12T05:07:46Z", "updatedAt": "2025-10-28T12:22:51Z", "pushedAt": "2024-08-15T05:33:54Z", "stars": 248050, "watchers": 3446, "forks": 32431, "defaultBranch": "master" }, { "id": 10270250, "name": "react", "repo": "facebook/react", "description": "The library for web and native user interfaces.", "createdAt": "2013-05-24T16:15:54Z", "updatedAt": "2025-10-28T12:24:55Z", "pushedAt": "2025-10-28T01:25:20Z", "stars": 240100, "watchers": 6686, "forks": 49682, "defaultBranch": "main" }, { "id": 63476337, "name": "Python", "repo": "TheAlgorithms/Python", "description": "All Algorithms implemented in Python", "createdAt": "2016-07-16T09:44:01Z", "updatedAt": "2025-10-28T12:25:22Z", "pushedAt": "2025-10-20T00:59:36Z", "stars": 212119, "watchers": 5975, "forks": 49025, "defaultBranch": "master" }, { "id": 11730342, "name": "vue", "repo": "vuejs/vue", "description": "This is the repo for Vue 2. For Vue 3, go to https://github.com/vuejs/core", "createdAt": "2013-07-29T03:24:51Z", "updatedAt": "2025-10-28T10:39:45Z", "pushedAt": "2024-10-10T07:24:15Z", "stars": 209636, "watchers": 5786, "forks": 33795, "defaultBranch": "main" }, { "id": 2325298, "name": "linux", "repo": "torvalds/linux", "description": "Linux kernel source tree", "createdAt": "2011-09-04T22:48:12Z", "updatedAt": "2025-10-28T12:39:23Z", "pushedAt": "2025-10-27T18:11:32Z", "stars": 205858, "watchers": 7743, "forks": 58047, "defaultBranch": "master" }, { "id": 19415064, "name": "computer-science", "repo": "ossu/computer-science", "description": "🎓 Path to a free self-taught education in Computer Science!", "createdAt": "2014-05-04T00:18:39Z", "updatedAt": "2025-10-28T12:41:20Z", "pushedAt": "2025-08-23T18:48:52Z", "stars": 196086, "watchers": 5936, "forks": 24474, "defaultBranch": "master" }, { "id": 126577260, "name": "javascript-algorithms", "repo": "trekhleb/javascript-algorithms", "description": "📝 Algorithms and data structures implemented in JavaScript with explanations and links to further readings", "createdAt": "2018-03-24T07:47:04Z", "updatedAt": "2025-10-28T12:37:32Z", "pushedAt": "2025-10-22T15:03:29Z", "stars": 193744, "watchers": 4268, "forks": 30929, "defaultBranch": "master" }, { "id": 45717250, "name": "tensorflow", "repo": "tensorflow/tensorflow", "description": "An Open Source Machine Learning Framework for Everyone", "createdAt": "2015-11-07T01:19:20Z", "updatedAt": "2025-10-28T11:56:54Z", "pushedAt": "2025-10-28T12:37:04Z", "stars": 192240, "watchers": 7431, "forks": 74932, "defaultBranch": "master" }, { "id": 138393139, "name": "the-book-of-secret-knowledge", "repo": "trimstray/the-book-of-secret-knowledge", "description": "A collection of inspiring lists, manuals, cheatsheets, blogs, hacks, one-liners, cli/web tools and more.", "createdAt": "2018-06-23T10:43:14Z", "updatedAt": "2025-10-28T12:40:20Z", "pushedAt": "2024-11-19T14:00:38Z", "stars": 191487, "watchers": 2678, "forks": 11764, "defaultBranch": "master" }, { "id": 14440270, "name": "You-Dont-Know-JS", "repo": "getify/You-Dont-Know-JS", "description": "A book series (2 published editions) on the JS language.", "createdAt": "2013-11-16T02:37:24Z", "updatedAt": "2025-10-28T11:34:43Z", "pushedAt": "2025-05-20T14:22:36Z", "stars": 183653, "watchers": 5803, "forks": 33671, "defaultBranch": "2nd-ed" }, { "id": 121395510, "name": "CS-Notes", "repo": "CyC2018/CS-Notes", "description": ":books: 技术面试必备基础知识、Leetcode、计算机操作系统、计算机网络、系统设计", "createdAt": "2018-02-13T14:56:24Z", "updatedAt": "2025-10-28T11:56:57Z", "pushedAt": "2024-08-21T09:40:10Z", "stars": 182661, "watchers": 5249, "forks": 51249, "defaultBranch": "master" }, { "id": 291137, "name": "ohmyzsh", "repo": "ohmyzsh/ohmyzsh", "description": "🙃 A delightful community-driven (with 2,400+ contributors) framework for managing your zsh configuration. Includes 300+ optional plugins (rails, git, macOS, hub, docker, homebrew, node, php, python, etc), 140+ themes to spice up your morning, and an auto-update tool that makes it easy to keep up with the latest updates from the community.", "createdAt": "2009-08-28T18:15:37Z", "updatedAt": "2025-10-28T12:39:19Z", "pushedAt": "2025-10-27T18:37:07Z", "stars": 182331, "watchers": 2620, "forks": 26261, "defaultBranch": "master" }, { "id": 614765452, "name": "AutoGPT", "repo": "Significant-Gravitas/AutoGPT", "description": "AutoGPT is the vision of accessible AI for everyone, to use and to build on. Our mission is to provide the tools, so that you can focus on what matters.", "createdAt": "2023-03-16T09:21:07Z", "updatedAt": "2025-10-28T12:01:03Z", "pushedAt": "2025-10-28T11:50:06Z", "stars": 179337, "watchers": 1547, "forks": 46094, "defaultBranch": "master" }, { "id": 41881900, "name": "vscode", "repo": "microsoft/vscode", "description": "Visual Studio Code", "createdAt": "2015-09-03T20:23:38Z", "updatedAt": "2025-10-28T12:22:53Z", "pushedAt": "2025-10-28T12:33:55Z", "stars": 177962, "watchers": 3366, "forks": 35810, "defaultBranch": "main" }, { "id": 123458551, "name": "Python-100-Days", "repo": "jackfrued/Python-100-Days", "description": "Python - 100天从新手到大师", "createdAt": "2018-03-01T16:05:52Z", "updatedAt": "2025-10-28T12:40:38Z", "pushedAt": "2025-03-28T10:29:23Z", "stars": 173818, "watchers": 6098, "forks": 54782, "defaultBranch": "master" }, { "id": 2126244, "name": "bootstrap", "repo": "twbs/bootstrap", "description": "The most popular HTML, CSS, and JavaScript framework for developing responsive, mobile first projects on the web.", "createdAt": "2011-07-29T21:19:00Z", "updatedAt": "2025-10-28T12:25:19Z", "pushedAt": "2025-10-28T10:02:33Z", "stars": 173612, "watchers": 6680, "forks": 79159, "defaultBranch": "main" }, { "id": 31792824, "name": "flutter", "repo": "flutter/flutter", "description": "Flutter makes it easy and fast to build beautiful apps for mobile and beyond", "createdAt": "2015-03-06T22:54:58Z", "updatedAt": "2025-10-28T12:35:50Z", "pushedAt": "2025-10-28T12:35:51Z", "stars": 173572, "watchers": 3481, "forks": 29419, "defaultBranch": "master" }, { "id": 1062897, "name": "gitignore", "repo": "github/gitignore", "description": "A collection of useful .gitignore templates", "createdAt": "2010-11-08T20:17:14Z", "updatedAt": "2025-10-28T12:36:17Z", "pushedAt": "2025-09-10T18:42:03Z", "stars": 170327, "watchers": 3367, "forks": 82996, "defaultBranch": "main" }, { "id": 35955666, "name": "the-art-of-command-line", "repo": "jlevy/the-art-of-command-line", "description": "Master the command line, in one page", "createdAt": "2015-05-20T15:11:03Z", "updatedAt": "2025-10-28T10:16:58Z", "pushedAt": "2024-06-25T18:13:44Z", "stars": 158603, "watchers": 2812, "forks": 14753, "defaultBranch": "master" }, { "id": 527591471, "name": "stable-diffusion-webui", "repo": "AUTOMATIC1111/stable-diffusion-webui", "description": "Stable Diffusion web UI", "createdAt": "2022-08-22T14:05:26Z", "updatedAt": "2025-10-28T12:41:21Z", "pushedAt": "2025-10-07T20:06:10Z", "stars": 157629, "watchers": 1156, "forks": 29254, "defaultBranch": "master" }, { "id": 21540759, "name": "awesome-go", "repo": "avelino/awesome-go", "description": "A curated list of awesome Go frameworks, libraries and software", "createdAt": "2014-07-06T13:42:15Z", "updatedAt": "2025-10-28T12:41:20Z", "pushedAt": "2025-10-22T12:15:14Z", "stars": 155912, "watchers": 2820, "forks": 12712, "defaultBranch": "main" }, { "id": 658928958, "name": "ollama", "repo": "ollama/ollama", "description": "Get up and running with OpenAI gpt-oss, DeepSeek-R1, Gemma 3 and other models.", "createdAt": "2023-06-26T19:39:32Z", "updatedAt": "2025-10-28T12:05:06Z", "pushedAt": "2025-10-28T08:16:13Z", "stars": 154883, "watchers": 876, "forks": 13480, "defaultBranch": "main" }, { "id": 233472199, "name": "Microsoft-Activation-Scripts", "repo": "massgravel/Microsoft-Activation-Scripts", "description": "Open-source Windows and Office activator featuring HWID, Ohook, TSforge, KMS38, and Online KMS activation methods, along with advanced troubleshooting.", "createdAt": "2020-01-12T23:03:34Z", "updatedAt": "2025-10-28T12:40:24Z", "pushedAt": "2025-09-30T22:22:59Z", "stars": 154022, "watchers": 1319, "forks": 14869, "defaultBranch": "master" }, { "id": 132464395, "name": "JavaGuide", "repo": "Snailclimb/JavaGuide", "description": "「Java学习+面试指南」一份涵盖大部分 Java 程序员所需要掌握的核心知识。准备 Java 面试,首选 JavaGuide!", "createdAt": "2018-05-07T13:27:00Z", "updatedAt": "2025-10-28T12:01:53Z", "pushedAt": "2025-10-27T11:09:05Z", "stars": 152325, "watchers": 4469, "forks": 46021, "defaultBranch": "main" }, { "id": 193215554, "name": "n8n", "repo": "n8n-io/n8n", "description": "Fair-code workflow automation platform with native AI capabilities. Combine visual building with custom code, self-host or cloud, 400+ integrations.", "createdAt": "2019-06-22T09:24:21Z", "updatedAt": "2025-10-28T12:41:23Z", "pushedAt": "2025-10-28T12:34:50Z", "stars": 152300, "watchers": 889, "forks": 48578, "defaultBranch": "master" }, { "id": 155220641, "name": "transformers", "repo": "huggingface/transformers", "description": "🤗 Transformers: the model-definition framework for state-of-the-art machine learning models in text, vision, audio, and multimodal models, for both inference and training. ", "createdAt": "2018-10-29T13:56:00Z", "updatedAt": "2025-10-28T12:41:10Z", "pushedAt": "2025-10-28T12:38:18Z", "stars": 151745, "watchers": 1167, "forks": 30971, "defaultBranch": "main" }, { "id": 6498492, "name": "javascript", "repo": "airbnb/javascript", "description": "JavaScript Style Guide", "createdAt": "2012-11-01T23:13:50Z", "updatedAt": "2025-10-28T11:07:36Z", "pushedAt": "2025-09-17T18:12:44Z", "stars": 147700, "watchers": 3702, "forks": 26795, "defaultBranch": "master" }, { "id": 1039520, "name": "youtube-dl", "repo": "ytdl-org/youtube-dl", "description": "Command-line program to download videos from YouTube.com and other video sites", "createdAt": "2010-10-31T14:35:07Z", "updatedAt": "2025-10-28T12:01:08Z", "pushedAt": "2025-10-18T10:02:28Z", "stars": 138581, "watchers": 2160, "forks": 10527, "defaultBranch": "master" }, { "id": 599320067, "name": "langflow", "repo": "langflow-ai/langflow", "description": "Langflow is a powerful tool for building and deploying AI-powered agents and workflows.", "createdAt": "2023-02-08T22:28:03Z", "updatedAt": "2025-10-28T12:04:14Z", "pushedAt": "2025-10-28T11:44:40Z", "stars": 136336, "watchers": 454, "forks": 7859, "defaultBranch": "main" }, { "id": 574523116, "name": "awesome-chatgpt-prompts", "repo": "f/awesome-chatgpt-prompts", "description": "This repo includes ChatGPT prompt curation to use ChatGPT and other LLM tools better.", "createdAt": "2022-12-05T13:54:13Z", "updatedAt": "2025-10-28T12:32:02Z", "pushedAt": "2025-10-14T17:23:13Z", "stars": 135843, "watchers": 1563, "forks": 18078, "defaultBranch": "main" }, { "id": 70107786, "name": "next.js", "repo": "vercel/next.js", "description": "The React Framework", "createdAt": "2016-10-05T23:32:51Z", "updatedAt": "2025-10-28T12:19:30Z", "pushedAt": "2025-10-28T12:22:48Z", "stars": 135333, "watchers": 1495, "forks": 29693, "defaultBranch": "canary" }, { "id": 307260205, "name": "yt-dlp", "repo": "yt-dlp/yt-dlp", "description": "A feature-rich command-line audio/video downloader", "createdAt": "2020-10-26T04:22:55Z", "updatedAt": "2025-10-28T12:38:42Z", "pushedAt": "2025-10-27T23:21:38Z", "stars": 132949, "watchers": 678, "forks": 10668, "defaultBranch": "master" }, { "id": 58028038, "name": "HelloGitHub", "repo": "521xueweihan/HelloGitHub", "description": ":octocat: 分享 GitHub 上有趣、入门级的开源项目。Share interesting, entry-level open source projects on GitHub.", "createdAt": "2016-05-04T06:24:11Z", "updatedAt": "2025-10-28T12:13:38Z", "pushedAt": "2025-10-28T00:14:25Z", "stars": 132365, "watchers": 4187, "forks": 10822, "defaultBranch": "master" }, { "id": 62607227, "name": "tech-interview-handbook", "repo": "yangshun/tech-interview-handbook", "description": "💯 Curated coding interview preparation materials for busy software engineers", "createdAt": "2016-07-05T05:00:48Z", "updatedAt": "2025-10-28T09:33:23Z", "pushedAt": "2025-08-27T00:17:33Z", "stars": 131430, "watchers": 2182, "forks": 15945, "defaultBranch": "main" }, { "id": 23096959, "name": "go", "repo": "golang/go", "description": "The Go programming language", "createdAt": "2014-08-19T04:33:40Z", "updatedAt": "2025-10-28T11:52:10Z", "pushedAt": "2025-10-28T06:29:46Z", "stars": 130554, "watchers": 3347, "forks": 18419, "defaultBranch": "master" }, { "id": 111583593, "name": "scrcpy", "repo": "Genymobile/scrcpy", "description": "Display and control your Android device", "createdAt": "2017-11-21T18:00:27Z", "updatedAt": "2025-10-28T12:05:50Z", "pushedAt": "2025-10-27T08:59:41Z", "stars": 130304, "watchers": 1322, "forks": 12194, "defaultBranch": "master" }, { "id": 241576270, "name": "fucking-algorithm", "repo": "labuladong/fucking-algorithm", "description": "刷算法全靠套路,认准 labuladong 就够了!English version supported! Crack LeetCode, not only how, but also why. ", "createdAt": "2020-02-19T09:01:23Z", "updatedAt": "2025-10-28T08:35:53Z", "pushedAt": "2025-10-08T04:06:00Z", "stars": 129669, "watchers": 2283, "forks": 23452, "defaultBranch": "master" }, { "id": 112507086, "name": "30-seconds-of-code", "repo": "Chalarangelo/30-seconds-of-code", "description": "Coding articles to level up your development skills", "createdAt": "2017-11-29T17:35:03Z", "updatedAt": "2025-10-28T09:14:02Z", "pushedAt": "2025-10-22T12:51:11Z", "stars": 125639, "watchers": 2594, "forks": 12362, "defaultBranch": "master" }, { "id": 184456251, "name": "PowerToys", "repo": "microsoft/PowerToys", "description": "Microsoft PowerToys is a collection of utilities that help you customize Windows and streamline everyday tasks", "createdAt": "2019-05-01T17:44:02Z", "updatedAt": "2025-10-28T12:21:07Z", "pushedAt": "2025-10-28T10:55:13Z", "stars": 125271, "watchers": 1166, "forks": 7454, "defaultBranch": "main" }, { "id": 29028775, "name": "react-native", "repo": "facebook/react-native", "description": "A framework for building native applications using React", "createdAt": "2015-01-09T18:10:16Z", "updatedAt": "2025-10-28T12:36:00Z", "pushedAt": "2025-10-28T12:25:56Z", "stars": 124334, "watchers": 3563, "forks": 24916, "defaultBranch": "main" }, { "id": 9384267, "name": "electron", "repo": "electron/electron", "description": ":electron: Build cross-platform desktop apps with JavaScript, HTML, and CSS", "createdAt": "2013-04-12T01:47:36Z", "updatedAt": "2025-10-28T11:35:46Z", "pushedAt": "2025-10-28T09:28:32Z", "stars": 118860, "watchers": 2801, "forks": 16584, "defaultBranch": "main" }, { "id": 552661142, "name": "langchain", "repo": "langchain-ai/langchain", "description": "🦜🔗 Build context-aware reasoning applications", "createdAt": "2022-10-17T02:58:36Z", "updatedAt": "2025-10-28T12:37:33Z", "pushedAt": "2025-10-27T23:47:43Z", "stars": 118261, "watchers": 776, "forks": 19476, "defaultBranch": "master" }, { "id": 20580498, "name": "kubernetes", "repo": "kubernetes/kubernetes", "description": "Production-Grade Container Scheduling and Management", "createdAt": "2014-06-06T22:56:04Z", "updatedAt": "2025-10-28T12:19:38Z", "pushedAt": "2025-10-28T10:29:37Z", "stars": 118246, "watchers": 3189, "forks": 41587, "defaultBranch": "master" }, { "id": 561730219, "name": "hello-algo", "repo": "krahets/hello-algo", "description": "《Hello 算法》:动画图解、一键运行的数据结构与算法教程。支持 Python, Java, C++, C, C#, JS, Go, Swift, Rust, Ruby, Kotlin, TS, Dart 代码。简体版和繁体版同步更新,English version in translation", "createdAt": "2022-11-04T11:08:34Z", "updatedAt": "2025-10-28T12:30:36Z", "pushedAt": "2025-10-16T21:33:36Z", "stars": 118105, "watchers": 583, "forks": 14500, "defaultBranch": "main" }, { "id": 626805178, "name": "dify", "repo": "langgenius/dify", "description": "Production-ready platform for agentic workflow development.", "createdAt": "2023-04-12T07:40:24Z", "updatedAt": "2025-10-28T12:18:46Z", "pushedAt": "2025-10-28T10:48:12Z", "stars": 117486, "watchers": 698, "forks": 18151, "defaultBranch": "main" }, { "id": 14098069, "name": "free-programming-books-zh_CN", "repo": "justjavac/free-programming-books-zh_CN", "description": ":books: 免费的计算机编程类中文书籍,欢迎投稿", "createdAt": "2013-11-04T01:59:19Z", "updatedAt": "2025-10-28T09:19:09Z", "pushedAt": "2024-07-15T08:55:20Z", "stars": 115543, "watchers": 5859, "forks": 28362, "defaultBranch": "main" }, { "id": 32484381, "name": "free-for-dev", "repo": "ripienaar/free-for-dev", "description": "A list of SaaS, PaaS and IaaS offerings that have free tiers of interest to devops and infradev", "createdAt": "2015-03-18T21:06:26Z", "updatedAt": "2025-10-28T11:38:56Z", "pushedAt": "2025-10-23T04:49:00Z", "stars": 114128, "watchers": 1735, "forks": 11684, "defaultBranch": "master" }, { "id": 27193779, "name": "node", "repo": "nodejs/node", "description": "Node.js JavaScript runtime ✨🐢🚀✨", "createdAt": "2014-11-26T19:57:11Z", "updatedAt": "2025-10-28T12:34:32Z", "pushedAt": "2025-10-28T11:29:04Z", "stars": 114019, "watchers": 2963, "forks": 33580, "defaultBranch": "main" }, { "id": 701547123, "name": "open-webui", "repo": "open-webui/open-webui", "description": "User-friendly AI Interface (Supports Ollama, OpenAI API, ...)", "createdAt": "2023-10-06T22:08:27Z", "updatedAt": "2025-10-28T12:22:47Z", "pushedAt": "2025-10-28T08:46:37Z", "stars": 113575, "watchers": 515, "forks": 15783, "defaultBranch": "main" }, { "id": 808144141, "name": "FreeDomain", "repo": "DigitalPlatDev/FreeDomain", "description": "DigitalPlat FreeDomain: Free Domain For Everyone", "createdAt": "2024-05-30T13:23:00Z", "updatedAt": "2025-10-28T12:40:49Z", "pushedAt": "2025-09-25T12:12:01Z", "stars": 111985, "watchers": 120, "forks": 2068, "defaultBranch": "main" }, { "id": 943149, "name": "d3", "repo": "d3/d3", "description": "Bring data to life with SVG, Canvas and HTML. :bar_chart::chart_with_upwards_trend::tada:", "createdAt": "2010-09-27T17:22:42Z", "updatedAt": "2025-10-28T09:47:08Z", "pushedAt": "2025-07-27T11:30:40Z", "stars": 111693, "watchers": 3558, "forks": 22850, "defaultBranch": "main" }, { "id": 231283452, "name": "excalidraw", "repo": "excalidraw/excalidraw", "description": "Virtual whiteboard for sketching hand-drawn like diagrams", "createdAt": "2020-01-02T01:04:43Z", "updatedAt": "2025-10-28T12:38:34Z", "pushedAt": "2025-10-28T11:43:31Z", "stars": 109315, "watchers": 467, "forks": 11345, "defaultBranch": "master" }, { "id": 576201, "name": "three.js", "repo": "mrdoob/three.js", "description": "JavaScript 3D Library.", "createdAt": "2010-03-23T18:58:01Z", "updatedAt": "2025-10-28T12:07:59Z", "pushedAt": "2025-10-28T12:13:11Z", "stars": 109143, "watchers": 2518, "forks": 36054, "defaultBranch": "dev" }, { "id": 23088740, "name": "axios", "repo": "axios/axios", "description": "Promise based HTTP client for the browser and node.js", "createdAt": "2014-08-18T22:30:27Z", "updatedAt": "2025-10-28T12:10:56Z", "pushedAt": "2025-10-27T19:08:10Z", "stars": 108032, "watchers": 1169, "forks": 11371, "defaultBranch": "v1.x" }, { "id": 724712, "name": "rust", "repo": "rust-lang/rust", "description": "Empowering everyone to build reliable and efficient software.", "createdAt": "2010-06-16T20:39:03Z", "updatedAt": "2025-10-28T12:40:15Z", "pushedAt": "2025-10-28T11:12:51Z", "stars": 107478, "watchers": 1468, "forks": 13900, "defaultBranch": "master" }, { "id": 20929025, "name": "TypeScript", "repo": "microsoft/TypeScript", "description": "TypeScript is a superset of JavaScript that compiles to clean JavaScript output.", "createdAt": "2014-06-17T15:28:39Z", "updatedAt": "2025-10-28T12:19:23Z", "pushedAt": "2025-10-27T23:52:12Z", "stars": 106557, "watchers": 2148, "forks": 13086, "defaultBranch": "main" }, { "id": 133442384, "name": "deno", "repo": "denoland/deno", "description": "A modern runtime for JavaScript and TypeScript.", "createdAt": "2018-05-15T01:34:26Z", "updatedAt": "2025-10-28T12:27:16Z", "pushedAt": "2025-10-28T09:10:45Z", "stars": 104939, "watchers": 1398, "forks": 5754, "defaultBranch": "main" }, { "id": 103633984, "name": "nodebestpractices", "repo": "goldbergyoni/nodebestpractices", "description": ":white_check_mark: The Node.js best practices list (July 2024)", "createdAt": "2017-09-15T08:33:19Z", "updatedAt": "2025-10-28T11:50:28Z", "pushedAt": "2025-04-15T21:52:42Z", "stars": 104455, "watchers": 1944, "forks": 10625, "defaultBranch": "master" }, { "id": 63537249, "name": "create-react-app", "repo": "facebook/create-react-app", "description": "Set up a modern web app by running one command.", "createdAt": "2016-07-17T14:55:11Z", "updatedAt": "2025-10-28T12:35:24Z", "pushedAt": "2025-02-15T01:32:11Z", "stars": 103813, "watchers": 1891, "forks": 27148, "defaultBranch": "main" }, { "id": 206462776, "name": "GitHub-Chinese-Top-Charts", "repo": "GrowingGit/GitHub-Chinese-Top-Charts", "description": ":cn: GitHub中文排行榜,各语言分设「软件 | 资料」榜单,精准定位中文好项目。各取所需,高效学习。", "createdAt": "2019-09-05T03:01:56Z", "updatedAt": "2025-10-28T10:36:09Z", "pushedAt": "2024-10-12T06:51:36Z", "stars": 103358, "watchers": 2607, "forks": 13363, "defaultBranch": "master" }, { "id": 15634981, "name": "godot", "repo": "godotengine/godot", "description": "Godot Engine – Multi-platform 2D and 3D game engine", "createdAt": "2014-01-04T16:05:36Z", "updatedAt": "2025-10-28T11:39:26Z", "pushedAt": "2025-10-28T08:43:09Z", "stars": 102655, "watchers": 1493, "forks": 23457, "defaultBranch": "master" }, { "id": 299354207, "name": "rustdesk", "repo": "rustdesk/rustdesk", "description": "An open-source remote desktop application designed for self-hosting, as an alternative to TeamViewer.", "createdAt": "2020-09-28T15:36:08Z", "updatedAt": "2025-10-28T12:27:03Z", "pushedAt": "2025-10-28T12:25:33Z", "stars": 101531, "watchers": 548, "forks": 14850, "defaultBranch": "master" }, { "id": 655806940, "name": "generative-ai-for-beginners", "repo": "microsoft/generative-ai-for-beginners", "description": "21 Lessons, Get Started Building with Generative AI ", "createdAt": "2023-06-19T16:28:59Z", "updatedAt": "2025-10-28T12:25:17Z", "pushedAt": "2025-10-27T03:19:39Z", "stars": 101010, "watchers": 887, "forks": 53526, "defaultBranch": "main" }, { "id": 100060912, "name": "terminal", "repo": "microsoft/terminal", "description": "The new Windows Terminal and the original Windows console host, all in the same place!", "createdAt": "2017-08-11T18:38:22Z", "updatedAt": "2025-10-28T12:08:57Z", "pushedAt": "2025-10-28T03:04:50Z", "stars": 100746, "watchers": 1334, "forks": 8879, "defaultBranch": "main" }, { "id": 48378947, "name": "frp", "repo": "fatedier/frp", "description": "A fast reverse proxy to help you expose a local server behind a NAT or firewall to the internet.", "createdAt": "2015-12-21T15:24:59Z", "updatedAt": "2025-10-28T11:57:26Z", "pushedAt": "2025-10-28T09:52:35Z", "stars": 100048, "watchers": 1564, "forks": 14567, "defaultBranch": "dev" }, { "id": 908531752, "name": "DeepSeek-V3", "repo": "deepseek-ai/DeepSeek-V3", "description": null, "createdAt": "2024-12-26T09:52:40Z", "updatedAt": "2025-10-28T12:11:53Z", "pushedAt": "2025-08-28T03:24:37Z", "stars": 100020, "watchers": 752, "forks": 16313, "defaultBranch": "main" }, { "id": 55076063, "name": "Awesome-Hacking", "repo": "Hack-with-Github/Awesome-Hacking", "description": "A collection of various awesome lists for hackers, pentesters and security researchers", "createdAt": "2016-03-30T15:47:10Z", "updatedAt": "2025-10-28T12:11:25Z", "pushedAt": "2025-01-18T01:48:02Z", "stars": 99746, "watchers": 3932, "forks": 9633, "defaultBranch": "master" }, { "id": 15204860, "name": "papers-we-love", "repo": "papers-we-love/papers-we-love", "description": "Papers from the computer science community to read and discuss.", "createdAt": "2013-12-15T14:31:41Z", "updatedAt": "2025-10-28T12:35:57Z", "pushedAt": "2025-10-10T15:35:14Z", "stars": 99660, "watchers": 3159, "forks": 6144, "defaultBranch": "main" }, { "id": 24195339, "name": "angular", "repo": "angular/angular", "description": "Deliver web apps with confidence 🚀", "createdAt": "2014-09-18T16:12:01Z", "updatedAt": "2025-10-28T11:07:05Z", "pushedAt": "2025-10-28T10:04:30Z", "stars": 99174, "watchers": 2980, "forks": 26730, "defaultBranch": "main" }, { "id": 585146387, "name": "ui", "repo": "shadcn-ui/ui", "description": "A set of beautifully-designed, accessible components and a code distribution platform. Works with your favorite frameworks. Open Source. Open Code.", "createdAt": "2023-01-04T12:43:27Z", "updatedAt": "2025-10-28T12:32:30Z", "pushedAt": "2025-10-28T12:41:17Z", "stars": 98552, "watchers": 307, "forks": 7046, "defaultBranch": "main" }, { "id": 196701619, "name": "tauri", "repo": "tauri-apps/tauri", "description": "Build smaller, faster, and more secure desktop and mobile applications with a web frontend.", "createdAt": "2019-07-13T09:09:37Z", "updatedAt": "2025-10-28T12:32:07Z", "pushedAt": "2025-10-28T10:29:35Z", "stars": 98262, "watchers": 530, "forks": 3139, "defaultBranch": "dev" }, { "id": 157616880, "name": "iptv", "repo": "iptv-org/iptv", "description": "Collection of publicly available IPTV channels from all over the world", "createdAt": "2018-11-14T22:00:57Z", "updatedAt": "2025-10-28T12:32:18Z", "pushedAt": "2025-10-28T00:11:46Z", "stars": 98083, "watchers": 1952, "forks": 4199, "defaultBranch": "master" }, { "id": 23083156, "name": "material-ui", "repo": "mui/material-ui", "description": "Material UI: Comprehensive React component library that implements Google's Material Design. Free forever.", "createdAt": "2014-08-18T19:11:54Z", "updatedAt": "2025-10-28T08:02:20Z", "pushedAt": "2025-10-28T06:08:34Z", "stars": 96887, "watchers": 1312, "forks": 32696, "defaultBranch": "master" }, { "id": 34526884, "name": "ant-design", "repo": "ant-design/ant-design", "description": "An enterprise-class UI design language and React UI library", "createdAt": "2015-04-24T15:37:24Z", "updatedAt": "2025-10-28T11:00:38Z", "pushedAt": "2025-10-28T10:52:44Z", "stars": 96472, "watchers": 236, "forks": 53890, "defaultBranch": "master" }, { "id": 243950408, "name": "HowToCook", "repo": "Anduin2017/HowToCook", "description": "程序员在家做饭方法指南。Programmer's guide about how to cook at home (Simplified Chinese only).", "createdAt": "2020-02-29T10:43:49Z", "updatedAt": "2025-10-28T12:35:03Z", "pushedAt": "2025-10-28T11:30:11Z", "stars": 95425, "watchers": 488, "forks": 10651, "defaultBranch": "master" }, { "id": 33614304, "name": "thefuck", "repo": "nvbn/thefuck", "description": "Magnificent app which corrects your previous console command.", "createdAt": "2015-04-08T15:08:04Z", "updatedAt": "2025-10-28T12:34:25Z", "pushedAt": "2024-07-19T14:56:13Z", "stars": 94497, "watchers": 825, "forks": 3792, "defaultBranch": "master" }, { "id": 65600975, "name": "pytorch", "repo": "pytorch/pytorch", "description": "Tensors and Dynamic neural networks in Python with strong GPU acceleration", "createdAt": "2016-08-13T05:26:41Z", "updatedAt": "2025-10-28T12:25:28Z", "pushedAt": "2025-10-28T12:40:19Z", "stars": 94326, "watchers": 1770, "forks": 25678, "defaultBranch": "main" }, { "id": 74791366, "name": "clean-code-javascript", "repo": "ryanmcdermott/clean-code-javascript", "description": "Clean Code concepts adapted for JavaScript", "createdAt": "2016-11-25T22:25:41Z", "updatedAt": "2025-10-28T08:55:17Z", "pushedAt": "2024-07-29T07:24:37Z", "stars": 93959, "watchers": 1744, "forks": 12495, "defaultBranch": "master" }, { "id": 101296881, "name": "every-programmer-should-know", "repo": "mtdvio/every-programmer-should-know", "description": "A collection of (mostly) technical things every software developer should know about", "createdAt": "2017-08-24T13:18:26Z", "updatedAt": "2025-10-28T11:51:44Z", "pushedAt": "2025-10-22T15:21:18Z", "stars": 93832, "watchers": 2011, "forks": 8437, "defaultBranch": "master" }, { "id": 16408992, "name": "neovim", "repo": "neovim/neovim", "description": "Vim-fork focused on extensibility and usability", "createdAt": "2014-01-31T13:39:22Z", "updatedAt": "2025-10-28T12:38:30Z", "pushedAt": "2025-10-28T08:45:46Z", "stars": 93768, "watchers": 972, "forks": 6376, "defaultBranch": "master" }, { "id": 943398999, "name": "system-prompts-and-models-of-ai-tools", "repo": "x1xhlol/system-prompts-and-models-of-ai-tools", "description": "FULL Augment Code, Claude Code, Cluely, CodeBuddy, Comet, Cursor, Devin AI, Junie, Kiro, Leap.new, Lovable, Manus Agent Tools, NotionAI, Orchids.app, Perplexity, Poke, Qoder, Replit, Same.dev, Trae, Traycer AI, VSCode Agent, Warp.dev, Windsurf, Xcode, Z.ai Code, dia & v0. (And other Open Sourced) System Prompts, Internal Tools & AI Models", "createdAt": "2025-03-05T16:38:29Z", "updatedAt": "2025-10-28T12:37:42Z", "pushedAt": "2025-10-19T18:44:24Z", "stars": 93450, "watchers": 1183, "forks": 25250, "defaultBranch": "main" }, { "id": 22790488, "name": "java-design-patterns", "repo": "iluwatar/java-design-patterns", "description": "Design patterns implemented in Java", "createdAt": "2014-08-09T16:45:18Z", "updatedAt": "2025-10-28T11:55:32Z", "pushedAt": "2025-10-21T21:30:34Z", "stars": 93230, "watchers": 3717, "forks": 27312, "defaultBranch": "master" }, { "id": 90796663, "name": "puppeteer", "repo": "puppeteer/puppeteer", "description": "JavaScript API for Chrome and Firefox", "createdAt": "2017-05-09T22:16:13Z", "updatedAt": "2025-10-28T11:55:21Z", "pushedAt": "2025-10-28T11:35:29Z", "stars": 92732, "watchers": 1184, "forks": 9314, "defaultBranch": "main" }, { "id": 311525798, "name": "Web-Dev-For-Beginners", "repo": "microsoft/Web-Dev-For-Beginners", "description": "24 Lessons, 12 Weeks, Get Started as a Web Developer", "createdAt": "2020-11-10T02:44:00Z", "updatedAt": "2025-10-28T12:11:24Z", "pushedAt": "2025-10-27T13:01:13Z", "stars": 92494, "watchers": 2690, "forks": 14334, "defaultBranch": "main" }, { "id": 589831718, "name": "ComfyUI", "repo": "comfyanonymous/ComfyUI", "description": "The most powerful and modular diffusion model GUI, api and backend with a graph/nodes interface.", "createdAt": "2023-01-17T03:15:56Z", "updatedAt": "2025-10-28T12:38:44Z", "pushedAt": "2025-10-28T08:45:49Z", "stars": 92150, "watchers": 615, "forks": 10367, "defaultBranch": "master" }, { "id": 63539055, "name": "awesome-mac", "repo": "jaywcjlove/awesome-mac", "description": " Now we have become very big, Different from the original idea. Collect premium software in various categories.", "createdAt": "2016-07-17T15:33:47Z", "updatedAt": "2025-10-28T12:29:52Z", "pushedAt": "2025-10-27T17:27:24Z", "stars": 91942, "watchers": 1517, "forks": 6956, "defaultBranch": "master" }, { "id": 919443098, "name": "DeepSeek-R1", "repo": "deepseek-ai/DeepSeek-R1", "description": null, "createdAt": "2025-01-20T11:57:28Z", "updatedAt": "2025-10-28T12:33:45Z", "pushedAt": "2025-06-27T08:35:54Z", "stars": 91406, "watchers": 607, "forks": 11768, "defaultBranch": "main" }, { "id": 160919119, "name": "fastapi", "repo": "fastapi/fastapi", "description": "FastAPI framework, high performance, easy to learn, fast to code, ready for production", "createdAt": "2018-12-08T08:21:47Z", "updatedAt": "2025-10-28T11:31:45Z", "pushedAt": "2025-10-28T07:50:29Z", "stars": 91252, "watchers": 721, "forks": 8135, "defaultBranch": "master" }, { "id": 106017343, "name": "tailwindcss", "repo": "tailwindlabs/tailwindcss", "description": "A utility-first CSS framework for rapid UI development.", "createdAt": "2017-10-06T14:59:14Z", "updatedAt": "2025-10-28T12:25:13Z", "pushedAt": "2025-10-28T12:25:08Z", "stars": 90816, "watchers": 615, "forks": 4766, "defaultBranch": "main" } ] ================================================ FILE: benchmarks/package.json ================================================ { "name": "@toon/benchmarks", "type": "module", "private": true, "scripts": { "benchmark:tokens": "node scripts/token-efficiency-benchmark.ts", "benchmark:accuracy": "node --env-file=.env scripts/accuracy-benchmark.ts", "fetch:github-repos": "node scripts/fetch-github-repos.ts" }, "devDependencies": { "@ai-sdk/anthropic": "^3.0.58", "@ai-sdk/google": "^3.0.43", "@ai-sdk/openai": "^3.0.41", "@ai-sdk/provider": "^3.0.8", "@ai-sdk/xai": "^3.0.67", "@clack/prompts": "^1.1.0", "@faker-js/faker": "^10.3.0", "ai": "^6.0.116", "csv-stringify": "^6.6.0", "fast-xml-parser": "^5.4.2", "gpt-tokenizer": "^3.4.0", "ofetch": "^1.5.1", "p-map": "^7.0.4", "p-queue": "^9.1.0", "unstorage": "^1.17.4", "yaml": "^2.8.2" } } ================================================ FILE: benchmarks/results/accuracy/models/claude-haiku-4-5-20251001 ================================================ [{"questionId":"q1","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":7939,"outputTokens":6,"latencyMs":1488.7717919999996},{"questionId":"q1","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":4830,"outputTokens":6,"latencyMs":1424.8316250000007},{"questionId":"q1","format":"toon","model":"claude-haiku-4-5-20251001","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":3079,"outputTokens":6,"latencyMs":1200.6324999999997},{"questionId":"q1","format":"csv","model":"claude-haiku-4-5-20251001","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":2925,"outputTokens":6,"latencyMs":1187.5327080000006},{"questionId":"q1","format":"xml","model":"claude-haiku-4-5-20251001","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":9424,"outputTokens":6,"latencyMs":1200.2132079999992},{"questionId":"q1","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":5830,"outputTokens":6,"latencyMs":1341.1957500000008},{"questionId":"q2","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":7938,"outputTokens":4,"latencyMs":1182.2189579999995},{"questionId":"q2","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":4829,"outputTokens":4,"latencyMs":1191.913125000001},{"questionId":"q2","format":"toon","model":"claude-haiku-4-5-20251001","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":3078,"outputTokens":4,"latencyMs":1393.160915999999},{"questionId":"q2","format":"csv","model":"claude-haiku-4-5-20251001","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":2924,"outputTokens":4,"latencyMs":1192.7132500000007},{"questionId":"q2","format":"xml","model":"claude-haiku-4-5-20251001","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":9423,"outputTokens":4,"latencyMs":1360.8396249999987},{"questionId":"q2","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":5829,"outputTokens":4,"latencyMs":1619.3704579999994},{"questionId":"q3","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":7943,"outputTokens":12,"latencyMs":1131.9942499999997},{"questionId":"q3","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":4834,"outputTokens":12,"latencyMs":1391.2939580000002},{"questionId":"q3","format":"toon","model":"claude-haiku-4-5-20251001","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":3083,"outputTokens":12,"latencyMs":1181.2237920000007},{"questionId":"q3","format":"csv","model":"claude-haiku-4-5-20251001","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":2929,"outputTokens":12,"latencyMs":1098.3214580000003},{"questionId":"q3","format":"xml","model":"claude-haiku-4-5-20251001","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":9428,"outputTokens":12,"latencyMs":1468.2502499999991},{"questionId":"q3","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":5834,"outputTokens":12,"latencyMs":1044.1940839999988},{"questionId":"q4","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"22","actual":"22","isCorrect":true,"inputTokens":7941,"outputTokens":5,"latencyMs":1145.9674579999992},{"questionId":"q4","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"22","actual":"22","isCorrect":true,"inputTokens":4832,"outputTokens":5,"latencyMs":1282.7140419999996},{"questionId":"q4","format":"toon","model":"claude-haiku-4-5-20251001","expected":"22","actual":"22","isCorrect":true,"inputTokens":3081,"outputTokens":5,"latencyMs":993.7237079999995},{"questionId":"q4","format":"csv","model":"claude-haiku-4-5-20251001","expected":"22","actual":"22","isCorrect":true,"inputTokens":2927,"outputTokens":5,"latencyMs":1029.5127499999999},{"questionId":"q4","format":"xml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"22","isCorrect":true,"inputTokens":9426,"outputTokens":5,"latencyMs":1170.4240829999999},{"questionId":"q4","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"22","isCorrect":true,"inputTokens":5832,"outputTokens":5,"latencyMs":1297.9727910000001},{"questionId":"q5","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"no","actual":"false","isCorrect":true,"inputTokens":7936,"outputTokens":4,"latencyMs":1060.0110000000004},{"questionId":"q5","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"no","actual":"false","isCorrect":true,"inputTokens":4827,"outputTokens":4,"latencyMs":932.3761250000007},{"questionId":"q5","format":"toon","model":"claude-haiku-4-5-20251001","expected":"no","actual":"false","isCorrect":true,"inputTokens":3076,"outputTokens":4,"latencyMs":967.2909170000003},{"questionId":"q5","format":"csv","model":"claude-haiku-4-5-20251001","expected":"no","actual":"0","isCorrect":true,"inputTokens":2922,"outputTokens":5,"latencyMs":989.7537499999999},{"questionId":"q5","format":"xml","model":"claude-haiku-4-5-20251001","expected":"no","actual":"false","isCorrect":true,"inputTokens":9421,"outputTokens":4,"latencyMs":1163.8299580000003},{"questionId":"q5","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"no","actual":"false","isCorrect":true,"inputTokens":5827,"outputTokens":4,"latencyMs":1012.1698340000003},{"questionId":"q6","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":7939,"outputTokens":6,"latencyMs":1043.6857499999987},{"questionId":"q6","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":4830,"outputTokens":6,"latencyMs":1741.9372920000005},{"questionId":"q6","format":"toon","model":"claude-haiku-4-5-20251001","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":3079,"outputTokens":6,"latencyMs":1061.195040999999},{"questionId":"q6","format":"csv","model":"claude-haiku-4-5-20251001","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":2925,"outputTokens":6,"latencyMs":897.5309579999994},{"questionId":"q6","format":"xml","model":"claude-haiku-4-5-20251001","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":9424,"outputTokens":6,"latencyMs":1091.2617499999997},{"questionId":"q6","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":5830,"outputTokens":6,"latencyMs":1198.945791},{"questionId":"q7","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":7939,"outputTokens":4,"latencyMs":1124.808833000001},{"questionId":"q7","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":4830,"outputTokens":4,"latencyMs":1004.0825409999998},{"questionId":"q7","format":"toon","model":"claude-haiku-4-5-20251001","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":3079,"outputTokens":4,"latencyMs":1026.612874999999},{"questionId":"q7","format":"csv","model":"claude-haiku-4-5-20251001","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":2925,"outputTokens":4,"latencyMs":900.933500000001},{"questionId":"q7","format":"xml","model":"claude-haiku-4-5-20251001","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":9424,"outputTokens":4,"latencyMs":1537.3743749999994},{"questionId":"q7","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":5830,"outputTokens":4,"latencyMs":1069.091042},{"questionId":"q8","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":7941,"outputTokens":12,"latencyMs":1463.4106250000004},{"questionId":"q8","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":4832,"outputTokens":12,"latencyMs":1045.6618749999998},{"questionId":"q8","format":"toon","model":"claude-haiku-4-5-20251001","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":3081,"outputTokens":12,"latencyMs":1144.8265419999989},{"questionId":"q8","format":"csv","model":"claude-haiku-4-5-20251001","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":2927,"outputTokens":12,"latencyMs":1266.5881250000002},{"questionId":"q8","format":"xml","model":"claude-haiku-4-5-20251001","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":9426,"outputTokens":12,"latencyMs":1094.5647079999999},{"questionId":"q8","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":5832,"outputTokens":12,"latencyMs":1037.4817500000008},{"questionId":"q9","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":7941,"outputTokens":5,"latencyMs":1064.6803340000006},{"questionId":"q9","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":4832,"outputTokens":5,"latencyMs":1234.0882500000007},{"questionId":"q9","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":3081,"outputTokens":5,"latencyMs":1072.323041999989},{"questionId":"q9","format":"csv","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":2927,"outputTokens":5,"latencyMs":1155.7975410000072},{"questionId":"q9","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":9426,"outputTokens":5,"latencyMs":1583.6992499999906},{"questionId":"q9","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":5832,"outputTokens":5,"latencyMs":1564.031124999994},{"questionId":"q10","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"yes","actual":"true","isCorrect":true,"inputTokens":7938,"outputTokens":4,"latencyMs":1184.0812499999884},{"questionId":"q10","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"yes","actual":"true","isCorrect":true,"inputTokens":4829,"outputTokens":4,"latencyMs":1815.7772499999992},{"questionId":"q10","format":"toon","model":"claude-haiku-4-5-20251001","expected":"yes","actual":"true","isCorrect":true,"inputTokens":3078,"outputTokens":4,"latencyMs":1103.1678749999992},{"questionId":"q10","format":"csv","model":"claude-haiku-4-5-20251001","expected":"yes","actual":"1","isCorrect":true,"inputTokens":2924,"outputTokens":5,"latencyMs":1059.914082999996},{"questionId":"q10","format":"xml","model":"claude-haiku-4-5-20251001","expected":"yes","actual":"true","isCorrect":true,"inputTokens":9423,"outputTokens":4,"latencyMs":1852.225999999995},{"questionId":"q10","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"yes","actual":"true","isCorrect":true,"inputTokens":5829,"outputTokens":4,"latencyMs":1238.4458329999907},{"questionId":"q11","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":7938,"outputTokens":6,"latencyMs":1264.0889580000076},{"questionId":"q11","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":4829,"outputTokens":6,"latencyMs":984.5701249999984},{"questionId":"q11","format":"toon","model":"claude-haiku-4-5-20251001","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":3078,"outputTokens":6,"latencyMs":1400.2183340000047},{"questionId":"q11","format":"csv","model":"claude-haiku-4-5-20251001","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":2924,"outputTokens":6,"latencyMs":891.1542500000069},{"questionId":"q11","format":"xml","model":"claude-haiku-4-5-20251001","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":9423,"outputTokens":6,"latencyMs":1176.492333000002},{"questionId":"q11","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":5829,"outputTokens":6,"latencyMs":1310.523291999998},{"questionId":"q12","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":7935,"outputTokens":4,"latencyMs":1027.5788330000069},{"questionId":"q12","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":4826,"outputTokens":4,"latencyMs":992.2010420000006},{"questionId":"q12","format":"toon","model":"claude-haiku-4-5-20251001","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":3075,"outputTokens":4,"latencyMs":1090.145749999996},{"questionId":"q12","format":"csv","model":"claude-haiku-4-5-20251001","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":2921,"outputTokens":4,"latencyMs":867.523457999996},{"questionId":"q12","format":"xml","model":"claude-haiku-4-5-20251001","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":9420,"outputTokens":4,"latencyMs":1272.2972080000036},{"questionId":"q12","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":5826,"outputTokens":4,"latencyMs":1223.0700840000063},{"questionId":"q13","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":7934,"outputTokens":5,"latencyMs":1089.1235420000012},{"questionId":"q13","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":4825,"outputTokens":5,"latencyMs":992.0438330000034},{"questionId":"q13","format":"toon","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":3074,"outputTokens":5,"latencyMs":1052.5001670000056},{"questionId":"q13","format":"csv","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":2920,"outputTokens":5,"latencyMs":893.6204160000052},{"questionId":"q13","format":"xml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":9419,"outputTokens":5,"latencyMs":1324.1672920000128},{"questionId":"q13","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":5825,"outputTokens":5,"latencyMs":1415.1354579999897},{"questionId":"q14","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"17","actual":"13","isCorrect":false,"inputTokens":7934,"outputTokens":5,"latencyMs":1215.046291000006},{"questionId":"q14","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":4825,"outputTokens":5,"latencyMs":1065.2862919999898},{"questionId":"q14","format":"toon","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":3074,"outputTokens":5,"latencyMs":973.8248339999991},{"questionId":"q14","format":"csv","model":"claude-haiku-4-5-20251001","expected":"17","actual":"14","isCorrect":false,"inputTokens":2920,"outputTokens":5,"latencyMs":1048.0387499999924},{"questionId":"q14","format":"xml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":9419,"outputTokens":5,"latencyMs":4246.3034999999945},{"questionId":"q14","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":5825,"outputTokens":5,"latencyMs":3089.0154579999944},{"questionId":"q15","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"17","actual":"13","isCorrect":false,"inputTokens":7934,"outputTokens":5,"latencyMs":1041.0287499999977},{"questionId":"q15","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":4825,"outputTokens":5,"latencyMs":1126.9441249999945},{"questionId":"q15","format":"toon","model":"claude-haiku-4-5-20251001","expected":"17","actual":"12","isCorrect":false,"inputTokens":3074,"outputTokens":5,"latencyMs":1174.3665829999954},{"questionId":"q15","format":"csv","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":2920,"outputTokens":5,"latencyMs":1421.4708329999994},{"questionId":"q15","format":"xml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":9419,"outputTokens":5,"latencyMs":1255.9135000000097},{"questionId":"q15","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"14","isCorrect":false,"inputTokens":5825,"outputTokens":5,"latencyMs":1288.064916000003},{"questionId":"q16","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"91","actual":"85","isCorrect":false,"inputTokens":7939,"outputTokens":5,"latencyMs":1213.8851250000007},{"questionId":"q16","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"91","actual":"78","isCorrect":false,"inputTokens":4830,"outputTokens":5,"latencyMs":1034.356375000003},{"questionId":"q16","format":"toon","model":"claude-haiku-4-5-20251001","expected":"91","actual":"85","isCorrect":false,"inputTokens":3079,"outputTokens":5,"latencyMs":1150.7799589999922},{"questionId":"q16","format":"csv","model":"claude-haiku-4-5-20251001","expected":"91","actual":"76","isCorrect":false,"inputTokens":2925,"outputTokens":5,"latencyMs":927.4066250000033},{"questionId":"q16","format":"xml","model":"claude-haiku-4-5-20251001","expected":"91","actual":"92","isCorrect":false,"inputTokens":9424,"outputTokens":5,"latencyMs":989.8042080000014},{"questionId":"q16","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"91","actual":"91","isCorrect":true,"inputTokens":5830,"outputTokens":5,"latencyMs":1249.2909999999974},{"questionId":"q17","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"67","actual":"57","isCorrect":false,"inputTokens":7939,"outputTokens":5,"latencyMs":1313.7873749999999},{"questionId":"q17","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"67","actual":"60","isCorrect":false,"inputTokens":4830,"outputTokens":5,"latencyMs":1045.1794999999984},{"questionId":"q17","format":"toon","model":"claude-haiku-4-5-20251001","expected":"67","actual":"46","isCorrect":false,"inputTokens":3079,"outputTokens":5,"latencyMs":985.7277500000055},{"questionId":"q17","format":"csv","model":"claude-haiku-4-5-20251001","expected":"67","actual":"42","isCorrect":false,"inputTokens":2925,"outputTokens":5,"latencyMs":964.684500000003},{"questionId":"q17","format":"xml","model":"claude-haiku-4-5-20251001","expected":"67","actual":"58","isCorrect":false,"inputTokens":9424,"outputTokens":5,"latencyMs":1365.7662500000006},{"questionId":"q17","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"67","actual":"61","isCorrect":false,"inputTokens":5830,"outputTokens":5,"latencyMs":1559.340124999988},{"questionId":"q18","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"41","actual":"34","isCorrect":false,"inputTokens":7939,"outputTokens":5,"latencyMs":1418.1319579999981},{"questionId":"q18","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"41","actual":"26","isCorrect":false,"inputTokens":4830,"outputTokens":5,"latencyMs":1229.3413750000036},{"questionId":"q18","format":"toon","model":"claude-haiku-4-5-20251001","expected":"41","actual":"26","isCorrect":false,"inputTokens":3079,"outputTokens":5,"latencyMs":1237.7892920000013},{"questionId":"q18","format":"csv","model":"claude-haiku-4-5-20251001","expected":"41","actual":"26","isCorrect":false,"inputTokens":2925,"outputTokens":5,"latencyMs":1149.4620410000061},{"questionId":"q18","format":"xml","model":"claude-haiku-4-5-20251001","expected":"41","actual":"34","isCorrect":false,"inputTokens":9424,"outputTokens":5,"latencyMs":1498.104582999993},{"questionId":"q18","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"41","actual":"28","isCorrect":false,"inputTokens":5830,"outputTokens":5,"latencyMs":1437.632666999998},{"questionId":"q19","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":7935,"outputTokens":5,"latencyMs":1284.2168329999986},{"questionId":"q19","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":4826,"outputTokens":5,"latencyMs":1304.1590419999993},{"questionId":"q19","format":"toon","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":3075,"outputTokens":5,"latencyMs":1151.4296670000185},{"questionId":"q19","format":"csv","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":2921,"outputTokens":5,"latencyMs":1547.9670420000039},{"questionId":"q19","format":"xml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":9420,"outputTokens":5,"latencyMs":1110.2993750000169},{"questionId":"q19","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":5826,"outputTokens":5,"latencyMs":1029.0622079999885},{"questionId":"q20","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"96503","actual":"99857.49","isCorrect":false,"inputTokens":7936,"outputTokens":8,"latencyMs":1178.934208000006},{"questionId":"q20","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"96503","actual":"100657.68","isCorrect":false,"inputTokens":4827,"outputTokens":8,"latencyMs":1070.5215410000092},{"questionId":"q20","format":"toon","model":"claude-haiku-4-5-20251001","expected":"96503","actual":"97474.27","isCorrect":false,"inputTokens":3076,"outputTokens":8,"latencyMs":1914.8881249999831},{"questionId":"q20","format":"csv","model":"claude-haiku-4-5-20251001","expected":"96503","actual":"98208.5","isCorrect":false,"inputTokens":2922,"outputTokens":8,"latencyMs":1134.84874999999},{"questionId":"q20","format":"xml","model":"claude-haiku-4-5-20251001","expected":"96503","actual":"100591.83","isCorrect":false,"inputTokens":9421,"outputTokens":8,"latencyMs":1201.0446670000092},{"questionId":"q20","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"96503","actual":"100560.31","isCorrect":false,"inputTokens":5827,"outputTokens":8,"latencyMs":1365.4649169999902},{"questionId":"q21","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"78","actual":"78","isCorrect":true,"inputTokens":7933,"outputTokens":5,"latencyMs":1057.5927919999813},{"questionId":"q21","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"78","actual":"73","isCorrect":false,"inputTokens":4824,"outputTokens":5,"latencyMs":984.6357499999867},{"questionId":"q21","format":"toon","model":"claude-haiku-4-5-20251001","expected":"78","actual":"80","isCorrect":false,"inputTokens":3073,"outputTokens":5,"latencyMs":961.1789580000041},{"questionId":"q21","format":"csv","model":"claude-haiku-4-5-20251001","expected":"78","actual":"75","isCorrect":false,"inputTokens":2919,"outputTokens":5,"latencyMs":1046.232457999984},{"questionId":"q21","format":"xml","model":"claude-haiku-4-5-20251001","expected":"78","actual":"76","isCorrect":false,"inputTokens":9418,"outputTokens":5,"latencyMs":1062.490832999989},{"questionId":"q21","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"78","actual":"76","isCorrect":false,"inputTokens":5824,"outputTokens":5,"latencyMs":1386.1599170000118},{"questionId":"q22","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":7933,"outputTokens":5,"latencyMs":1155.2419999999984},{"questionId":"q22","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":4824,"outputTokens":5,"latencyMs":984.3614579999994},{"questionId":"q22","format":"toon","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":3073,"outputTokens":5,"latencyMs":2339.569790999987},{"questionId":"q22","format":"csv","model":"claude-haiku-4-5-20251001","expected":"22","actual":"22","isCorrect":true,"inputTokens":2919,"outputTokens":5,"latencyMs":1645.8104999999923},{"questionId":"q22","format":"xml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":9418,"outputTokens":5,"latencyMs":1110.3421669999952},{"questionId":"q22","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":5824,"outputTokens":5,"latencyMs":1158.1035830000183},{"questionId":"q23","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"12","actual":"11","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":1850.1329160000023},{"questionId":"q23","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"12","actual":"9","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1923.266666999989},{"questionId":"q23","format":"toon","model":"claude-haiku-4-5-20251001","expected":"12","actual":"8","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":1092.6574580000015},{"questionId":"q23","format":"csv","model":"claude-haiku-4-5-20251001","expected":"12","actual":"10","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":993.8929580000113},{"questionId":"q23","format":"xml","model":"claude-haiku-4-5-20251001","expected":"12","actual":"11","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":1253.2862920000043},{"questionId":"q23","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"12","actual":"10","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":1106.7149579999968},{"questionId":"q24","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"11","actual":"6","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":1006.815042000002},{"questionId":"q24","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1115.0337080000027},{"questionId":"q24","format":"toon","model":"claude-haiku-4-5-20251001","expected":"11","actual":"6","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":1048.2592920000025},{"questionId":"q24","format":"csv","model":"claude-haiku-4-5-20251001","expected":"11","actual":"6","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":910.7634580000013},{"questionId":"q24","format":"xml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":119476.61179200001},{"questionId":"q24","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":982.0880420000176},{"questionId":"q25","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"11","actual":"9","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":1282.5784159999748},{"questionId":"q25","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"11","actual":"8","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1083.9630830000096},{"questionId":"q25","format":"toon","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":927.612374999997},{"questionId":"q25","format":"csv","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":954.8622909999976},{"questionId":"q25","format":"xml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"9","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":4318.050334},{"questionId":"q25","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":1295.8421249999956},{"questionId":"q26","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"12","actual":"8","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":1169.7239170000248},{"questionId":"q26","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"12","actual":"8","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1160.2663750000065},{"questionId":"q26","format":"toon","model":"claude-haiku-4-5-20251001","expected":"12","actual":"6","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":1324.3053749999963},{"questionId":"q26","format":"csv","model":"claude-haiku-4-5-20251001","expected":"12","actual":"6","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":1237.4985830000078},{"questionId":"q26","format":"xml","model":"claude-haiku-4-5-20251001","expected":"12","actual":"8","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":2209.9241660000116},{"questionId":"q26","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"12","actual":"7","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":1174.278707999998},{"questionId":"q27","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":1188.8322920000064},{"questionId":"q27","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"11","actual":"8","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1392.668875000003},{"questionId":"q27","format":"toon","model":"claude-haiku-4-5-20251001","expected":"11","actual":"6","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":1144.9836670000223},{"questionId":"q27","format":"csv","model":"claude-haiku-4-5-20251001","expected":"11","actual":"8","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":1185.1800420000218},{"questionId":"q27","format":"xml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":1109.5572499999835},{"questionId":"q27","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"6","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":1004.8929999999818},{"questionId":"q28","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"63","actual":"72","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":1536.55349999998},{"questionId":"q28","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"63","actual":"67","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1200.943041999999},{"questionId":"q28","format":"toon","model":"claude-haiku-4-5-20251001","expected":"63","actual":"72","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":1219.1552500000107},{"questionId":"q28","format":"csv","model":"claude-haiku-4-5-20251001","expected":"63","actual":"62","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":1005.1289589999942},{"questionId":"q28","format":"xml","model":"claude-haiku-4-5-20251001","expected":"63","actual":"62","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":1041.4887500000186},{"questionId":"q28","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"63","actual":"68","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":1143.78916700001},{"questionId":"q29","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"53","actual":"54","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":1129.352041999984},{"questionId":"q29","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"53","actual":"56","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1346.812042000005},{"questionId":"q29","format":"toon","model":"claude-haiku-4-5-20251001","expected":"53","actual":"57","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":1013.5596249999944},{"questionId":"q29","format":"csv","model":"claude-haiku-4-5-20251001","expected":"53","actual":"45","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":1137.8311660000181},{"questionId":"q29","format":"xml","model":"claude-haiku-4-5-20251001","expected":"53","actual":"57","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":1281.3305829999736},{"questionId":"q29","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"53","actual":"62","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":1146.0332919999782},{"questionId":"q30","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"39","actual":"28","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":962.1130420000118},{"questionId":"q30","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"39","actual":"34","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1271.9399580000027},{"questionId":"q30","format":"toon","model":"claude-haiku-4-5-20251001","expected":"39","actual":"33","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":1231.0171670000127},{"questionId":"q30","format":"csv","model":"claude-haiku-4-5-20251001","expected":"39","actual":"27","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":1907.3603749999893},{"questionId":"q30","format":"xml","model":"claude-haiku-4-5-20251001","expected":"39","actual":"32","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":1237.180583999987},{"questionId":"q30","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"39","actual":"34","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":1330.3151660000149},{"questionId":"q31","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"11","actual":"8","isCorrect":false,"inputTokens":7942,"outputTokens":5,"latencyMs":1211.030208000011},{"questionId":"q31","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"11","actual":"9","isCorrect":false,"inputTokens":4833,"outputTokens":5,"latencyMs":1125.4293749999779},{"questionId":"q31","format":"toon","model":"claude-haiku-4-5-20251001","expected":"11","actual":"9","isCorrect":false,"inputTokens":3082,"outputTokens":5,"latencyMs":1125.0806660000235},{"questionId":"q31","format":"csv","model":"claude-haiku-4-5-20251001","expected":"11","actual":"10","isCorrect":false,"inputTokens":2928,"outputTokens":5,"latencyMs":1203.0037089999823},{"questionId":"q31","format":"xml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"8","isCorrect":false,"inputTokens":9427,"outputTokens":5,"latencyMs":1321.5858330000192},{"questionId":"q31","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"9","isCorrect":false,"inputTokens":5833,"outputTokens":5,"latencyMs":1174.450708999997},{"questionId":"q32","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"8","actual":"7","isCorrect":false,"inputTokens":7942,"outputTokens":5,"latencyMs":1131.7181249999849},{"questionId":"q32","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"8","actual":"7","isCorrect":false,"inputTokens":4833,"outputTokens":5,"latencyMs":1292.2494589999842},{"questionId":"q32","format":"toon","model":"claude-haiku-4-5-20251001","expected":"8","actual":"8","isCorrect":true,"inputTokens":3082,"outputTokens":5,"latencyMs":1056.2060000000056},{"questionId":"q32","format":"csv","model":"claude-haiku-4-5-20251001","expected":"8","actual":"9","isCorrect":false,"inputTokens":2928,"outputTokens":5,"latencyMs":914.0282920000027},{"questionId":"q32","format":"xml","model":"claude-haiku-4-5-20251001","expected":"8","actual":"8","isCorrect":true,"inputTokens":9427,"outputTokens":5,"latencyMs":1058.9597080000094},{"questionId":"q32","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"8","actual":"9","isCorrect":false,"inputTokens":5833,"outputTokens":5,"latencyMs":1138.2416660000163},{"questionId":"q33","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"15","actual":"9","isCorrect":false,"inputTokens":7942,"outputTokens":5,"latencyMs":1159.8052090000128},{"questionId":"q33","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"15","actual":"8","isCorrect":false,"inputTokens":4833,"outputTokens":5,"latencyMs":1179.3050000000221},{"questionId":"q33","format":"toon","model":"claude-haiku-4-5-20251001","expected":"15","actual":"7","isCorrect":false,"inputTokens":3082,"outputTokens":5,"latencyMs":2072.5861670000013},{"questionId":"q33","format":"csv","model":"claude-haiku-4-5-20251001","expected":"15","actual":"8","isCorrect":false,"inputTokens":2928,"outputTokens":5,"latencyMs":1516.0497909999976},{"questionId":"q33","format":"xml","model":"claude-haiku-4-5-20251001","expected":"15","actual":"9","isCorrect":false,"inputTokens":9427,"outputTokens":5,"latencyMs":1098.749375000014},{"questionId":"q33","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"15","actual":"9","isCorrect":false,"inputTokens":5833,"outputTokens":5,"latencyMs":1028.6647499999963},{"questionId":"q34","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"12","actual":"14","isCorrect":false,"inputTokens":7935,"outputTokens":5,"latencyMs":1599.851416999998},{"questionId":"q34","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"12","actual":"11","isCorrect":false,"inputTokens":4826,"outputTokens":5,"latencyMs":1247.702500000014},{"questionId":"q34","format":"toon","model":"claude-haiku-4-5-20251001","expected":"12","actual":"14","isCorrect":false,"inputTokens":3075,"outputTokens":5,"latencyMs":1222.0808750000142},{"questionId":"q34","format":"csv","model":"claude-haiku-4-5-20251001","expected":"12","actual":"13","isCorrect":false,"inputTokens":2921,"outputTokens":5,"latencyMs":1043.5218340000138},{"questionId":"q34","format":"xml","model":"claude-haiku-4-5-20251001","expected":"12","actual":"13","isCorrect":false,"inputTokens":9420,"outputTokens":5,"latencyMs":1324.776125000004},{"questionId":"q34","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"12","actual":"13","isCorrect":false,"inputTokens":5826,"outputTokens":5,"latencyMs":1299.7890419999894},{"questionId":"q35","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"11","actual":"13","isCorrect":false,"inputTokens":7935,"outputTokens":5,"latencyMs":1194.0861659999937},{"questionId":"q35","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"11","actual":"11","isCorrect":true,"inputTokens":4826,"outputTokens":5,"latencyMs":1254.9089580000145},{"questionId":"q35","format":"toon","model":"claude-haiku-4-5-20251001","expected":"11","actual":"11","isCorrect":true,"inputTokens":3075,"outputTokens":5,"latencyMs":1161.8742499999935},{"questionId":"q35","format":"csv","model":"claude-haiku-4-5-20251001","expected":"11","actual":"11","isCorrect":true,"inputTokens":2921,"outputTokens":5,"latencyMs":1073.0098749999888},{"questionId":"q35","format":"xml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"12","isCorrect":false,"inputTokens":9420,"outputTokens":5,"latencyMs":2201.6162919999915},{"questionId":"q35","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"11","isCorrect":true,"inputTokens":5826,"outputTokens":5,"latencyMs":1558.6932920000108},{"questionId":"q36","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":13071,"outputTokens":7,"latencyMs":1283.411334000004},{"questionId":"q36","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":8032,"outputTokens":7,"latencyMs":1105.1879999999946},{"questionId":"q36","format":"toon","model":"claude-haiku-4-5-20251001","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":8349,"outputTokens":7,"latencyMs":1409.0812499999884},{"questionId":"q36","format":"xml","model":"claude-haiku-4-5-20251001","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":14576,"outputTokens":7,"latencyMs":1401.9225830000069},{"questionId":"q36","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":9474,"outputTokens":7,"latencyMs":1155.835207999975},{"questionId":"q37","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":13071,"outputTokens":4,"latencyMs":1765.247250000015},{"questionId":"q37","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":8032,"outputTokens":4,"latencyMs":1487.960958999989},{"questionId":"q37","format":"toon","model":"claude-haiku-4-5-20251001","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":8349,"outputTokens":4,"latencyMs":1007.5285840000142},{"questionId":"q37","format":"xml","model":"claude-haiku-4-5-20251001","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":14576,"outputTokens":4,"latencyMs":1259.9838750000054},{"questionId":"q37","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":9474,"outputTokens":4,"latencyMs":1105.1101250000065},{"questionId":"q38","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":13071,"outputTokens":7,"latencyMs":1630.5366249999788},{"questionId":"q38","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":8032,"outputTokens":7,"latencyMs":1085.6638749999984},{"questionId":"q38","format":"toon","model":"claude-haiku-4-5-20251001","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":8349,"outputTokens":7,"latencyMs":1176.3057919999992},{"questionId":"q38","format":"xml","model":"claude-haiku-4-5-20251001","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":14576,"outputTokens":7,"latencyMs":1291.9421250000014},{"questionId":"q38","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":9474,"outputTokens":7,"latencyMs":1621.5995840000105},{"questionId":"q39","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":13071,"outputTokens":4,"latencyMs":1089.3430830000143},{"questionId":"q39","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":8032,"outputTokens":4,"latencyMs":1120.2911670000176},{"questionId":"q39","format":"toon","model":"claude-haiku-4-5-20251001","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":8349,"outputTokens":4,"latencyMs":1409.0022079999908},{"questionId":"q39","format":"xml","model":"claude-haiku-4-5-20251001","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":14576,"outputTokens":4,"latencyMs":1679.7288340000086},{"questionId":"q39","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":9474,"outputTokens":4,"latencyMs":1494.9637079999957},{"questionId":"q40","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":13071,"outputTokens":8,"latencyMs":1279.6186250000028},{"questionId":"q40","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":8032,"outputTokens":8,"latencyMs":1300.0924999999988},{"questionId":"q40","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":8349,"outputTokens":8,"latencyMs":1365.5876669999852},{"questionId":"q40","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":14576,"outputTokens":8,"latencyMs":1800.0563749999856},{"questionId":"q40","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":9474,"outputTokens":8,"latencyMs":1211.8039999999746},{"questionId":"q41","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":13071,"outputTokens":4,"latencyMs":1131.8700000000244},{"questionId":"q41","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":8032,"outputTokens":4,"latencyMs":1012.6772080000082},{"questionId":"q41","format":"toon","model":"claude-haiku-4-5-20251001","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":8349,"outputTokens":4,"latencyMs":1297.319542000012},{"questionId":"q41","format":"xml","model":"claude-haiku-4-5-20251001","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":14576,"outputTokens":4,"latencyMs":1182.1698329999927},{"questionId":"q41","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":9474,"outputTokens":4,"latencyMs":1186.7259579999954},{"questionId":"q42","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":13071,"outputTokens":8,"latencyMs":1212.6475830000127},{"questionId":"q42","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":8032,"outputTokens":8,"latencyMs":1013.4181670000253},{"questionId":"q42","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":8349,"outputTokens":8,"latencyMs":1341.400834},{"questionId":"q42","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":14576,"outputTokens":8,"latencyMs":1428.899792000011},{"questionId":"q42","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":9474,"outputTokens":8,"latencyMs":1146.0207080000255},{"questionId":"q43","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":13071,"outputTokens":4,"latencyMs":1413.8295410000137},{"questionId":"q43","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8032,"outputTokens":4,"latencyMs":1371.2625839999819},{"questionId":"q43","format":"toon","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8349,"outputTokens":4,"latencyMs":1177.8167500000272},{"questionId":"q43","format":"xml","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":14576,"outputTokens":4,"latencyMs":1314.6559169999964},{"questionId":"q43","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":9474,"outputTokens":4,"latencyMs":1084.5133339999884},{"questionId":"q44","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":13072,"outputTokens":11,"latencyMs":1845.7391250000219},{"questionId":"q44","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":8033,"outputTokens":11,"latencyMs":1215.7845419999794},{"questionId":"q44","format":"toon","model":"claude-haiku-4-5-20251001","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":8350,"outputTokens":11,"latencyMs":1371.353415999969},{"questionId":"q44","format":"xml","model":"claude-haiku-4-5-20251001","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":14577,"outputTokens":11,"latencyMs":1405.688749999972},{"questionId":"q44","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":9475,"outputTokens":11,"latencyMs":1195.1462080000201},{"questionId":"q45","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":13072,"outputTokens":16,"latencyMs":1444.1725420000148},{"questionId":"q45","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":8033,"outputTokens":16,"latencyMs":1851.9470420000143},{"questionId":"q45","format":"toon","model":"claude-haiku-4-5-20251001","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":8350,"outputTokens":16,"latencyMs":1421.1135419999482},{"questionId":"q45","format":"xml","model":"claude-haiku-4-5-20251001","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":14577,"outputTokens":16,"latencyMs":2023.5860419999808},{"questionId":"q45","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":9475,"outputTokens":16,"latencyMs":1497.3693329999805},{"questionId":"q46","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":13072,"outputTokens":10,"latencyMs":1659.9631250000093},{"questionId":"q46","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":8033,"outputTokens":10,"latencyMs":918.8577919999952},{"questionId":"q46","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":8350,"outputTokens":10,"latencyMs":1379.7277499999618},{"questionId":"q46","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":14577,"outputTokens":10,"latencyMs":1363.199666999979},{"questionId":"q46","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":9475,"outputTokens":10,"latencyMs":1301.9265000000014},{"questionId":"q47","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":13071,"outputTokens":5,"latencyMs":1366.802708000003},{"questionId":"q47","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":8032,"outputTokens":5,"latencyMs":1218.0884169999626},{"questionId":"q47","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":8349,"outputTokens":5,"latencyMs":1183.3402910000295},{"questionId":"q47","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":14576,"outputTokens":5,"latencyMs":1063.356374999974},{"questionId":"q47","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":9474,"outputTokens":5,"latencyMs":1065.5065830000094},{"questionId":"q48","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":13072,"outputTokens":9,"latencyMs":1751.0218330000062},{"questionId":"q48","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":8033,"outputTokens":9,"latencyMs":1108.9815420000232},{"questionId":"q48","format":"toon","model":"claude-haiku-4-5-20251001","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":8350,"outputTokens":9,"latencyMs":1787.948665999982},{"questionId":"q48","format":"xml","model":"claude-haiku-4-5-20251001","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":14577,"outputTokens":9,"latencyMs":1295.9337500000256},{"questionId":"q48","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":9475,"outputTokens":9,"latencyMs":1736.0960830000113},{"questionId":"q49","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":13072,"outputTokens":13,"latencyMs":1199.4784590000054},{"questionId":"q49","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":8033,"outputTokens":13,"latencyMs":1075.0446249999804},{"questionId":"q49","format":"toon","model":"claude-haiku-4-5-20251001","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":8350,"outputTokens":13,"latencyMs":1981.9005830000388},{"questionId":"q49","format":"xml","model":"claude-haiku-4-5-20251001","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":14577,"outputTokens":13,"latencyMs":1281.1696249999804},{"questionId":"q49","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":9475,"outputTokens":13,"latencyMs":1370.431249999965},{"questionId":"q50","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":13072,"outputTokens":10,"latencyMs":1113.5716249999823},{"questionId":"q50","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":8033,"outputTokens":10,"latencyMs":1239.1278750000056},{"questionId":"q50","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":8350,"outputTokens":10,"latencyMs":1279.7286249999888},{"questionId":"q50","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":14577,"outputTokens":10,"latencyMs":1407.9125830000266},{"questionId":"q50","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":9475,"outputTokens":10,"latencyMs":1706.669125000015},{"questionId":"q51","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2","actual":"6","isCorrect":false,"inputTokens":13071,"outputTokens":5,"latencyMs":1270.9225829999777},{"questionId":"q51","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2","actual":"6","isCorrect":false,"inputTokens":8032,"outputTokens":5,"latencyMs":1130.0672090000007},{"questionId":"q51","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":8349,"outputTokens":5,"latencyMs":1345.5521249999874},{"questionId":"q51","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"6","isCorrect":false,"inputTokens":14576,"outputTokens":5,"latencyMs":1226.1289170000236},{"questionId":"q51","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"6","isCorrect":false,"inputTokens":9474,"outputTokens":5,"latencyMs":1119.2856669999892},{"questionId":"q52","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"10","actual":"8","isCorrect":false,"inputTokens":13067,"outputTokens":5,"latencyMs":1196.8826250000275},{"questionId":"q52","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"10","actual":"8","isCorrect":false,"inputTokens":8028,"outputTokens":5,"latencyMs":1126.873499999987},{"questionId":"q52","format":"toon","model":"claude-haiku-4-5-20251001","expected":"10","actual":"7","isCorrect":false,"inputTokens":8345,"outputTokens":5,"latencyMs":980.0860000000102},{"questionId":"q52","format":"xml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"9","isCorrect":false,"inputTokens":14572,"outputTokens":5,"latencyMs":1064.6709169999813},{"questionId":"q52","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"9","isCorrect":false,"inputTokens":9470,"outputTokens":5,"latencyMs":1166.8697909999755},{"questionId":"q53","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"10","actual":"8","isCorrect":false,"inputTokens":13067,"outputTokens":5,"latencyMs":1113.0990410000086},{"questionId":"q53","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"10","actual":"8","isCorrect":false,"inputTokens":8028,"outputTokens":5,"latencyMs":1303.7009589999798},{"questionId":"q53","format":"toon","model":"claude-haiku-4-5-20251001","expected":"10","actual":"7","isCorrect":false,"inputTokens":8345,"outputTokens":5,"latencyMs":1238.1067499999772},{"questionId":"q53","format":"xml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"7","isCorrect":false,"inputTokens":14572,"outputTokens":5,"latencyMs":1333.5686250000144},{"questionId":"q53","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"6","isCorrect":false,"inputTokens":9470,"outputTokens":5,"latencyMs":1225.3654999999562},{"questionId":"q54","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"10","actual":"8","isCorrect":false,"inputTokens":13067,"outputTokens":5,"latencyMs":1399.3465410000063},{"questionId":"q54","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"10","actual":"7","isCorrect":false,"inputTokens":8028,"outputTokens":5,"latencyMs":1113.1435410000267},{"questionId":"q54","format":"toon","model":"claude-haiku-4-5-20251001","expected":"10","actual":"6","isCorrect":false,"inputTokens":8345,"outputTokens":5,"latencyMs":1724.1194590000086},{"questionId":"q54","format":"xml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"7","isCorrect":false,"inputTokens":14572,"outputTokens":5,"latencyMs":1181.9311669999734},{"questionId":"q54","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"9","isCorrect":false,"inputTokens":9470,"outputTokens":5,"latencyMs":1049.9266670000507},{"questionId":"q55","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"34904.81","actual":"42861.75","isCorrect":false,"inputTokens":13067,"outputTokens":8,"latencyMs":1439.9581250000047},{"questionId":"q55","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"34904.81","actual":"40558.41","isCorrect":false,"inputTokens":8028,"outputTokens":8,"latencyMs":1148.0986250000424},{"questionId":"q55","format":"toon","model":"claude-haiku-4-5-20251001","expected":"34904.81","actual":"47834.79","isCorrect":false,"inputTokens":8345,"outputTokens":8,"latencyMs":1271.2527080000145},{"questionId":"q55","format":"xml","model":"claude-haiku-4-5-20251001","expected":"34904.81","actual":"38565.28","isCorrect":false,"inputTokens":14572,"outputTokens":8,"latencyMs":1461.959582999989},{"questionId":"q55","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"34904.81","actual":"41847.94","isCorrect":false,"inputTokens":9470,"outputTokens":8,"latencyMs":1480.8175409999676},{"questionId":"q56","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"698.10","actual":"756.89","isCorrect":false,"inputTokens":13065,"outputTokens":7,"latencyMs":1130.103833000001},{"questionId":"q56","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"698.10","actual":"822.94","isCorrect":false,"inputTokens":8026,"outputTokens":7,"latencyMs":1252.784875000012},{"questionId":"q56","format":"toon","model":"claude-haiku-4-5-20251001","expected":"698.10","actual":"791.84","isCorrect":false,"inputTokens":8343,"outputTokens":7,"latencyMs":1113.3430420000223},{"questionId":"q56","format":"xml","model":"claude-haiku-4-5-20251001","expected":"698.10","actual":"766.89","isCorrect":false,"inputTokens":14570,"outputTokens":7,"latencyMs":2177.307124999992},{"questionId":"q56","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"698.10","actual":"779.77","isCorrect":false,"inputTokens":9468,"outputTokens":7,"latencyMs":1105.3537500000093},{"questionId":"q57","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":13066,"outputTokens":5,"latencyMs":1392.8512499999488},{"questionId":"q57","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":8027,"outputTokens":5,"latencyMs":1126.0595420000027},{"questionId":"q57","format":"toon","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":8344,"outputTokens":5,"latencyMs":1194.2017089999863},{"questionId":"q57","format":"xml","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":14571,"outputTokens":5,"latencyMs":1171.6571669999976},{"questionId":"q57","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":9469,"outputTokens":5,"latencyMs":1206.3512079999782},{"questionId":"q58","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":13065,"outputTokens":8,"latencyMs":1393.6799589999719},{"questionId":"q58","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":8026,"outputTokens":8,"latencyMs":1519.2332499999902},{"questionId":"q58","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":8343,"outputTokens":8,"latencyMs":2102.0287920000264},{"questionId":"q58","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":14570,"outputTokens":8,"latencyMs":1612.3956250000047},{"questionId":"q58","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":9468,"outputTokens":8,"latencyMs":1793.879041999986},{"questionId":"q59","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"43","actual":"49","isCorrect":false,"inputTokens":13069,"outputTokens":5,"latencyMs":1420.1437499999884},{"questionId":"q59","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"43","actual":"47","isCorrect":false,"inputTokens":8030,"outputTokens":5,"latencyMs":1007.740334000031},{"questionId":"q59","format":"toon","model":"claude-haiku-4-5-20251001","expected":"43","actual":"46","isCorrect":false,"inputTokens":8347,"outputTokens":5,"latencyMs":1120.1873340000166},{"questionId":"q59","format":"xml","model":"claude-haiku-4-5-20251001","expected":"43","actual":"47","isCorrect":false,"inputTokens":14574,"outputTokens":5,"latencyMs":1570.4214159999974},{"questionId":"q59","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"43","actual":"49","isCorrect":false,"inputTokens":9472,"outputTokens":5,"latencyMs":1261.9378329999745},{"questionId":"q60","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"37","actual":"41","isCorrect":false,"inputTokens":13069,"outputTokens":5,"latencyMs":1163.1469999999972},{"questionId":"q60","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"37","actual":"35","isCorrect":false,"inputTokens":8030,"outputTokens":5,"latencyMs":1140.09945899999},{"questionId":"q60","format":"toon","model":"claude-haiku-4-5-20251001","expected":"37","actual":"38","isCorrect":false,"inputTokens":8347,"outputTokens":5,"latencyMs":1130.5380000000005},{"questionId":"q60","format":"xml","model":"claude-haiku-4-5-20251001","expected":"37","actual":"38","isCorrect":false,"inputTokens":14574,"outputTokens":5,"latencyMs":1129.3633750000154},{"questionId":"q60","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"37","actual":"36","isCorrect":false,"inputTokens":9472,"outputTokens":5,"latencyMs":1117.113416999986},{"questionId":"q61","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"28","actual":"31","isCorrect":false,"inputTokens":13069,"outputTokens":5,"latencyMs":1169.0952919999836},{"questionId":"q61","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"28","actual":"34","isCorrect":false,"inputTokens":8030,"outputTokens":5,"latencyMs":996.319291999971},{"questionId":"q61","format":"toon","model":"claude-haiku-4-5-20251001","expected":"28","actual":"32","isCorrect":false,"inputTokens":8347,"outputTokens":5,"latencyMs":1430.8997499999823},{"questionId":"q61","format":"xml","model":"claude-haiku-4-5-20251001","expected":"28","actual":"31","isCorrect":false,"inputTokens":14574,"outputTokens":5,"latencyMs":1613.020166000002},{"questionId":"q61","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"28","actual":"34","isCorrect":false,"inputTokens":9472,"outputTokens":5,"latencyMs":1436.6957920000423},{"questionId":"q62","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"8","actual":"8","isCorrect":true,"inputTokens":13073,"outputTokens":5,"latencyMs":1080.898833000043},{"questionId":"q62","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"8","actual":"7","isCorrect":false,"inputTokens":8034,"outputTokens":5,"latencyMs":1029.815249999985},{"questionId":"q62","format":"toon","model":"claude-haiku-4-5-20251001","expected":"8","actual":"7","isCorrect":false,"inputTokens":8351,"outputTokens":5,"latencyMs":1269.5112920000101},{"questionId":"q62","format":"xml","model":"claude-haiku-4-5-20251001","expected":"8","actual":"7","isCorrect":false,"inputTokens":14578,"outputTokens":5,"latencyMs":2015.9492079999764},{"questionId":"q62","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"8","actual":"8","isCorrect":true,"inputTokens":9476,"outputTokens":5,"latencyMs":1004.2100409999839},{"questionId":"q63","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"6","actual":"8","isCorrect":false,"inputTokens":13073,"outputTokens":5,"latencyMs":1069.4494169999962},{"questionId":"q63","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":8034,"outputTokens":5,"latencyMs":1287.5518329999759},{"questionId":"q63","format":"toon","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":8351,"outputTokens":5,"latencyMs":1275.4342079999624},{"questionId":"q63","format":"xml","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":14578,"outputTokens":5,"latencyMs":1156.013666999992},{"questionId":"q63","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"6","actual":"7","isCorrect":false,"inputTokens":9476,"outputTokens":5,"latencyMs":1259.9707500000368},{"questionId":"q64","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"10","actual":"7","isCorrect":false,"inputTokens":13073,"outputTokens":5,"latencyMs":1450.3827080000192},{"questionId":"q64","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"10","actual":"6","isCorrect":false,"inputTokens":8034,"outputTokens":5,"latencyMs":1151.1852920000092},{"questionId":"q64","format":"toon","model":"claude-haiku-4-5-20251001","expected":"10","actual":"6","isCorrect":false,"inputTokens":8351,"outputTokens":5,"latencyMs":1510.7578340000473},{"questionId":"q64","format":"xml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"6","isCorrect":false,"inputTokens":14578,"outputTokens":5,"latencyMs":1430.823583999998},{"questionId":"q64","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"7","isCorrect":false,"inputTokens":9476,"outputTokens":5,"latencyMs":1582.9399579999736},{"questionId":"q65","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"9","actual":"10","isCorrect":false,"inputTokens":13073,"outputTokens":5,"latencyMs":1475.282125000027},{"questionId":"q65","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"9","actual":"7","isCorrect":false,"inputTokens":8034,"outputTokens":5,"latencyMs":1132.6659169999766},{"questionId":"q65","format":"toon","model":"claude-haiku-4-5-20251001","expected":"9","actual":"8","isCorrect":false,"inputTokens":8351,"outputTokens":5,"latencyMs":1348.3977080000332},{"questionId":"q65","format":"xml","model":"claude-haiku-4-5-20251001","expected":"9","actual":"7","isCorrect":false,"inputTokens":14578,"outputTokens":5,"latencyMs":1487.2882499999832},{"questionId":"q65","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"9","actual":"8","isCorrect":false,"inputTokens":9476,"outputTokens":5,"latencyMs":1363.7185419999878},{"questionId":"q66","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3","actual":"4","isCorrect":false,"inputTokens":13074,"outputTokens":5,"latencyMs":1189.3430410000146},{"questionId":"q66","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":8035,"outputTokens":5,"latencyMs":1351.1734999999753},{"questionId":"q66","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3","actual":"4","isCorrect":false,"inputTokens":8352,"outputTokens":5,"latencyMs":1177.2700830000103},{"questionId":"q66","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"5","isCorrect":false,"inputTokens":14579,"outputTokens":5,"latencyMs":1242.0754999999772},{"questionId":"q66","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"4","isCorrect":false,"inputTokens":9477,"outputTokens":5,"latencyMs":1276.0302499999525},{"questionId":"q67","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3","actual":"5","isCorrect":false,"inputTokens":13074,"outputTokens":5,"latencyMs":1344.6504580000183},{"questionId":"q67","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3","actual":"5","isCorrect":false,"inputTokens":8035,"outputTokens":5,"latencyMs":1176.157292000018},{"questionId":"q67","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3","actual":"5","isCorrect":false,"inputTokens":8352,"outputTokens":5,"latencyMs":20701.426457999973},{"questionId":"q67","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"5","isCorrect":false,"inputTokens":14579,"outputTokens":5,"latencyMs":2367.8421249999665},{"questionId":"q67","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"6","isCorrect":false,"inputTokens":9477,"outputTokens":5,"latencyMs":1153.2113339999923},{"questionId":"q68","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":13074,"outputTokens":5,"latencyMs":1657.7776670000167},{"questionId":"q68","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5","actual":"4","isCorrect":false,"inputTokens":8035,"outputTokens":5,"latencyMs":1173.5819999999949},{"questionId":"q68","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":8352,"outputTokens":5,"latencyMs":1232.1174580000225},{"questionId":"q68","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":14579,"outputTokens":5,"latencyMs":1349.014124999987},{"questionId":"q68","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":9477,"outputTokens":5,"latencyMs":1086.6965840000194},{"questionId":"q69","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"20","actual":"23","isCorrect":false,"inputTokens":13077,"outputTokens":5,"latencyMs":1153.7664579999982},{"questionId":"q69","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"20","actual":"18","isCorrect":false,"inputTokens":8038,"outputTokens":5,"latencyMs":1132.6050000000396},{"questionId":"q69","format":"toon","model":"claude-haiku-4-5-20251001","expected":"20","actual":"23","isCorrect":false,"inputTokens":8355,"outputTokens":5,"latencyMs":1340.9702089999919},{"questionId":"q69","format":"xml","model":"claude-haiku-4-5-20251001","expected":"20","actual":"21","isCorrect":false,"inputTokens":14582,"outputTokens":5,"latencyMs":1389.9804579999764},{"questionId":"q69","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"20","actual":"23","isCorrect":false,"inputTokens":9480,"outputTokens":5,"latencyMs":1181.636083999998},{"questionId":"q70","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"19","actual":"15","isCorrect":false,"inputTokens":13077,"outputTokens":5,"latencyMs":1226.459166000015},{"questionId":"q70","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"19","actual":"23","isCorrect":false,"inputTokens":8038,"outputTokens":5,"latencyMs":1483.1681670000544},{"questionId":"q70","format":"toon","model":"claude-haiku-4-5-20251001","expected":"19","actual":"18","isCorrect":false,"inputTokens":8355,"outputTokens":5,"latencyMs":1059.183416999993},{"questionId":"q70","format":"xml","model":"claude-haiku-4-5-20251001","expected":"19","actual":"19","isCorrect":true,"inputTokens":14582,"outputTokens":5,"latencyMs":1345.0946669999976},{"questionId":"q70","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"19","actual":"24","isCorrect":false,"inputTokens":9480,"outputTokens":5,"latencyMs":1219.1354169999831},{"questionId":"q71","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":4146,"outputTokens":6,"latencyMs":1277.316541999986},{"questionId":"q71","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":2457,"outputTokens":6,"latencyMs":2851.0970419999794},{"questionId":"q71","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":1603,"outputTokens":6,"latencyMs":1085.2489160000114},{"questionId":"q71","format":"csv","model":"claude-haiku-4-5-20251001","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":1511,"outputTokens":6,"latencyMs":1086.2770000000019},{"questionId":"q71","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":4848,"outputTokens":6,"latencyMs":1117.6914580000448},{"questionId":"q71","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":3177,"outputTokens":6,"latencyMs":1230.9872919999762},{"questionId":"q72","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":4146,"outputTokens":8,"latencyMs":1094.448000000033},{"questionId":"q72","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":2457,"outputTokens":8,"latencyMs":1426.7915410000132},{"questionId":"q72","format":"toon","model":"claude-haiku-4-5-20251001","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":1603,"outputTokens":8,"latencyMs":900.4749999999767},{"questionId":"q72","format":"csv","model":"claude-haiku-4-5-20251001","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":1511,"outputTokens":8,"latencyMs":1126.0100419999799},{"questionId":"q72","format":"xml","model":"claude-haiku-4-5-20251001","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":4848,"outputTokens":8,"latencyMs":1176.668249999988},{"questionId":"q72","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":3177,"outputTokens":8,"latencyMs":999.8163330000243},{"questionId":"q73","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":4147,"outputTokens":7,"latencyMs":1333.652624999988},{"questionId":"q73","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":2458,"outputTokens":7,"latencyMs":1092.060541999992},{"questionId":"q73","format":"toon","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":1604,"outputTokens":7,"latencyMs":1007.1116670000483},{"questionId":"q73","format":"csv","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":1512,"outputTokens":7,"latencyMs":1013.337332999974},{"questionId":"q73","format":"xml","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":4849,"outputTokens":7,"latencyMs":1650.107040999981},{"questionId":"q73","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":3178,"outputTokens":7,"latencyMs":1439.3979579999577},{"questionId":"q74","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"32","actual":"32","isCorrect":true,"inputTokens":4148,"outputTokens":5,"latencyMs":1417.6148329999996},{"questionId":"q74","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"32","actual":"32","isCorrect":true,"inputTokens":2459,"outputTokens":5,"latencyMs":1242.6064170000027},{"questionId":"q74","format":"toon","model":"claude-haiku-4-5-20251001","expected":"32","actual":"32","isCorrect":true,"inputTokens":1605,"outputTokens":5,"latencyMs":1100.431958000001},{"questionId":"q74","format":"csv","model":"claude-haiku-4-5-20251001","expected":"32","actual":"32","isCorrect":true,"inputTokens":1513,"outputTokens":5,"latencyMs":986.0364159999881},{"questionId":"q74","format":"xml","model":"claude-haiku-4-5-20251001","expected":"32","actual":"32","isCorrect":true,"inputTokens":4850,"outputTokens":5,"latencyMs":1172.132042000012},{"questionId":"q74","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"32","actual":"32","isCorrect":true,"inputTokens":3179,"outputTokens":5,"latencyMs":1304.2830830000457},{"questionId":"q75","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":4146,"outputTokens":6,"latencyMs":1242.7824169999803},{"questionId":"q75","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":2457,"outputTokens":6,"latencyMs":1163.7357910000137},{"questionId":"q75","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":1603,"outputTokens":6,"latencyMs":1151.549875000026},{"questionId":"q75","format":"csv","model":"claude-haiku-4-5-20251001","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":1511,"outputTokens":6,"latencyMs":1063.787249999994},{"questionId":"q75","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":4848,"outputTokens":6,"latencyMs":1115.4328329999698},{"questionId":"q75","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":3177,"outputTokens":6,"latencyMs":1137.960791999998},{"questionId":"q76","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":4146,"outputTokens":8,"latencyMs":1201.6026249999995},{"questionId":"q76","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":2457,"outputTokens":8,"latencyMs":1124.2640000000247},{"questionId":"q76","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":1603,"outputTokens":8,"latencyMs":927.5135000000009},{"questionId":"q76","format":"csv","model":"claude-haiku-4-5-20251001","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":1511,"outputTokens":8,"latencyMs":1195.1520000000019},{"questionId":"q76","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":4848,"outputTokens":8,"latencyMs":1325.3894999999902},{"questionId":"q76","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":3177,"outputTokens":8,"latencyMs":1116.2845840000082},{"questionId":"q77","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":4147,"outputTokens":7,"latencyMs":1075.8688749999856},{"questionId":"q77","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":2458,"outputTokens":7,"latencyMs":1041.189167000004},{"questionId":"q77","format":"toon","model":"claude-haiku-4-5-20251001","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":1604,"outputTokens":7,"latencyMs":861.4979169999715},{"questionId":"q77","format":"csv","model":"claude-haiku-4-5-20251001","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":1512,"outputTokens":7,"latencyMs":1134.7716669999645},{"questionId":"q77","format":"xml","model":"claude-haiku-4-5-20251001","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":4849,"outputTokens":7,"latencyMs":1177.7597500000265},{"questionId":"q77","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":3178,"outputTokens":7,"latencyMs":1119.7470000000321},{"questionId":"q78","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"25","actual":"25","isCorrect":true,"inputTokens":4148,"outputTokens":5,"latencyMs":996.1894999999786},{"questionId":"q78","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"25","actual":"25","isCorrect":true,"inputTokens":2459,"outputTokens":5,"latencyMs":1080.7052919999696},{"questionId":"q78","format":"toon","model":"claude-haiku-4-5-20251001","expected":"25","actual":"25","isCorrect":true,"inputTokens":1605,"outputTokens":5,"latencyMs":935.0501249999506},{"questionId":"q78","format":"csv","model":"claude-haiku-4-5-20251001","expected":"25","actual":"25","isCorrect":true,"inputTokens":1513,"outputTokens":5,"latencyMs":1056.5405419999734},{"questionId":"q78","format":"xml","model":"claude-haiku-4-5-20251001","expected":"25","actual":"25","isCorrect":true,"inputTokens":4850,"outputTokens":5,"latencyMs":1489.8931659999653},{"questionId":"q78","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"25","actual":"25","isCorrect":true,"inputTokens":3179,"outputTokens":5,"latencyMs":79487.06733300001},{"questionId":"q79","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":4146,"outputTokens":6,"latencyMs":1313.9647910000058},{"questionId":"q79","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":2457,"outputTokens":6,"latencyMs":1343.1313749999972},{"questionId":"q79","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":1603,"outputTokens":6,"latencyMs":1334.5621249999967},{"questionId":"q79","format":"csv","model":"claude-haiku-4-5-20251001","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":1511,"outputTokens":6,"latencyMs":1067.4220000000205},{"questionId":"q79","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":4848,"outputTokens":6,"latencyMs":1150.077583999955},{"questionId":"q79","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":3177,"outputTokens":6,"latencyMs":981.6363750000019},{"questionId":"q80","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":4143,"outputTokens":5,"latencyMs":1220.5045420000097},{"questionId":"q80","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":2454,"outputTokens":5,"latencyMs":1120.3278329999885},{"questionId":"q80","format":"toon","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":1600,"outputTokens":5,"latencyMs":1426.4953749999986},{"questionId":"q80","format":"csv","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":1508,"outputTokens":5,"latencyMs":941.414792000025},{"questionId":"q80","format":"xml","model":"claude-haiku-4-5-20251001","expected":"60","actual":"59","isCorrect":false,"inputTokens":4845,"outputTokens":5,"latencyMs":1227.888500000001},{"questionId":"q80","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":3174,"outputTokens":5,"latencyMs":1216.2199170000385},{"questionId":"q81","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"328320","actual":"367840","isCorrect":false,"inputTokens":4144,"outputTokens":6,"latencyMs":1537.3239579999936},{"questionId":"q81","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"328320","actual":"340858","isCorrect":false,"inputTokens":2455,"outputTokens":6,"latencyMs":1150.2108749999898},{"questionId":"q81","format":"toon","model":"claude-haiku-4-5-20251001","expected":"328320","actual":"326657","isCorrect":false,"inputTokens":1601,"outputTokens":6,"latencyMs":804.5737499999814},{"questionId":"q81","format":"csv","model":"claude-haiku-4-5-20251001","expected":"328320","actual":"370775","isCorrect":false,"inputTokens":1509,"outputTokens":6,"latencyMs":840.2609170000069},{"questionId":"q81","format":"xml","model":"claude-haiku-4-5-20251001","expected":"328320","actual":"341506","isCorrect":false,"inputTokens":4846,"outputTokens":6,"latencyMs":1314.695624999993},{"questionId":"q81","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"328320","actual":"176916","isCorrect":false,"inputTokens":3175,"outputTokens":6,"latencyMs":1555.4265829999931},{"questionId":"q82","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1791","actual":"1434","isCorrect":false,"inputTokens":4145,"outputTokens":6,"latencyMs":1159.8954170000507},{"questionId":"q82","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1791","actual":"1389","isCorrect":false,"inputTokens":2456,"outputTokens":6,"latencyMs":1061.4540839999681},{"questionId":"q82","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1791","actual":"1577","isCorrect":false,"inputTokens":1602,"outputTokens":6,"latencyMs":1123.5626250000205},{"questionId":"q82","format":"csv","model":"claude-haiku-4-5-20251001","expected":"1791","actual":"1357","isCorrect":false,"inputTokens":1510,"outputTokens":6,"latencyMs":992.4902909999946},{"questionId":"q82","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1791","actual":"1347","isCorrect":false,"inputTokens":4847,"outputTokens":6,"latencyMs":1159.3279999999795},{"questionId":"q82","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1791","actual":"1325","isCorrect":false,"inputTokens":3176,"outputTokens":6,"latencyMs":1181.7296659999993},{"questionId":"q83","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"311695.88","actual":"343045.86","isCorrect":false,"inputTokens":4142,"outputTokens":8,"latencyMs":1219.903667000006},{"questionId":"q83","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"311695.88","actual":"412256.91","isCorrect":false,"inputTokens":2453,"outputTokens":8,"latencyMs":1145.0830829999177},{"questionId":"q83","format":"toon","model":"claude-haiku-4-5-20251001","expected":"311695.88","actual":"343945.11","isCorrect":false,"inputTokens":1599,"outputTokens":8,"latencyMs":1223.68987500004},{"questionId":"q83","format":"csv","model":"claude-haiku-4-5-20251001","expected":"311695.88","actual":"390626.67","isCorrect":false,"inputTokens":1507,"outputTokens":8,"latencyMs":1040.1668749999953},{"questionId":"q83","format":"xml","model":"claude-haiku-4-5-20251001","expected":"311695.88","actual":"382889.24","isCorrect":false,"inputTokens":4844,"outputTokens":8,"latencyMs":1409.154291999992},{"questionId":"q83","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"311695.88","actual":"356170.34","isCorrect":false,"inputTokens":3173,"outputTokens":8,"latencyMs":956.8994170000078},{"questionId":"q84","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":4140,"outputTokens":7,"latencyMs":1256.6058330000378},{"questionId":"q84","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.537","isCorrect":false,"inputTokens":2451,"outputTokens":7,"latencyMs":1293.1294590000762},{"questionId":"q84","format":"toon","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.5263","isCorrect":true,"inputTokens":1597,"outputTokens":8,"latencyMs":854.5393750000512},{"questionId":"q84","format":"csv","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.5246","isCorrect":false,"inputTokens":1505,"outputTokens":8,"latencyMs":1119.7116249999963},{"questionId":"q84","format":"xml","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":4842,"outputTokens":7,"latencyMs":1132.9079159999965},{"questionId":"q84","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":3171,"outputTokens":7,"latencyMs":1521.2640829999},{"questionId":"q85","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"22","actual":"20","isCorrect":false,"inputTokens":4145,"outputTokens":5,"latencyMs":1171.5495420000516},{"questionId":"q85","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"22","actual":"24","isCorrect":false,"inputTokens":2456,"outputTokens":5,"latencyMs":1003.8035830000881},{"questionId":"q85","format":"toon","model":"claude-haiku-4-5-20251001","expected":"22","actual":"20","isCorrect":false,"inputTokens":1602,"outputTokens":5,"latencyMs":972.5325840000296},{"questionId":"q85","format":"csv","model":"claude-haiku-4-5-20251001","expected":"22","actual":"18","isCorrect":false,"inputTokens":1510,"outputTokens":5,"latencyMs":905.6272500000196},{"questionId":"q85","format":"xml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"20","isCorrect":false,"inputTokens":4847,"outputTokens":5,"latencyMs":1082.7218340000836},{"questionId":"q85","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"20","isCorrect":false,"inputTokens":3176,"outputTokens":5,"latencyMs":997.8206250000512},{"questionId":"q86","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"42","actual":"25","isCorrect":false,"inputTokens":4145,"outputTokens":5,"latencyMs":1060.1338329999708},{"questionId":"q86","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"42","actual":"33","isCorrect":false,"inputTokens":2456,"outputTokens":5,"latencyMs":1412.0989999999292},{"questionId":"q86","format":"toon","model":"claude-haiku-4-5-20251001","expected":"42","actual":"42","isCorrect":true,"inputTokens":1602,"outputTokens":5,"latencyMs":980.2765409999993},{"questionId":"q86","format":"csv","model":"claude-haiku-4-5-20251001","expected":"42","actual":"41","isCorrect":false,"inputTokens":1510,"outputTokens":5,"latencyMs":943.2066659999546},{"questionId":"q86","format":"xml","model":"claude-haiku-4-5-20251001","expected":"42","actual":"24","isCorrect":false,"inputTokens":4847,"outputTokens":5,"latencyMs":1598.437624999904},{"questionId":"q86","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"42","actual":"31","isCorrect":false,"inputTokens":3176,"outputTokens":5,"latencyMs":939.6132920000236},{"questionId":"q87","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"20","actual":"15","isCorrect":false,"inputTokens":4153,"outputTokens":5,"latencyMs":1164.4256670000032},{"questionId":"q87","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"20","actual":"18","isCorrect":false,"inputTokens":2464,"outputTokens":5,"latencyMs":1529.9007079999428},{"questionId":"q87","format":"toon","model":"claude-haiku-4-5-20251001","expected":"20","actual":"16","isCorrect":false,"inputTokens":1610,"outputTokens":5,"latencyMs":1043.3165000000736},{"questionId":"q87","format":"csv","model":"claude-haiku-4-5-20251001","expected":"20","actual":"17","isCorrect":false,"inputTokens":1518,"outputTokens":5,"latencyMs":1014.4737080000341},{"questionId":"q87","format":"xml","model":"claude-haiku-4-5-20251001","expected":"20","actual":"15","isCorrect":false,"inputTokens":4855,"outputTokens":5,"latencyMs":1299.7567919999128},{"questionId":"q87","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"20","actual":"20","isCorrect":true,"inputTokens":3184,"outputTokens":5,"latencyMs":1036.0580000000773},{"questionId":"q88","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"14","actual":"11","isCorrect":false,"inputTokens":4153,"outputTokens":5,"latencyMs":1351.0695419999538},{"questionId":"q88","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"14","actual":"13","isCorrect":false,"inputTokens":2464,"outputTokens":5,"latencyMs":1451.123499999987},{"questionId":"q88","format":"toon","model":"claude-haiku-4-5-20251001","expected":"14","actual":"11","isCorrect":false,"inputTokens":1610,"outputTokens":5,"latencyMs":1011.8816250000382},{"questionId":"q88","format":"csv","model":"claude-haiku-4-5-20251001","expected":"14","actual":"9","isCorrect":false,"inputTokens":1518,"outputTokens":5,"latencyMs":1116.2810419999296},{"questionId":"q88","format":"xml","model":"claude-haiku-4-5-20251001","expected":"14","actual":"9","isCorrect":false,"inputTokens":4855,"outputTokens":5,"latencyMs":1202.6905839999672},{"questionId":"q88","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"14","actual":"10","isCorrect":false,"inputTokens":3184,"outputTokens":5,"latencyMs":1217.9919999999693},{"questionId":"q89","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"22","actual":"16","isCorrect":false,"inputTokens":4151,"outputTokens":5,"latencyMs":1086.266249999986},{"questionId":"q89","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":2462,"outputTokens":5,"latencyMs":1425.2647910000524},{"questionId":"q89","format":"toon","model":"claude-haiku-4-5-20251001","expected":"22","actual":"16","isCorrect":false,"inputTokens":1608,"outputTokens":5,"latencyMs":1165.0251670000143},{"questionId":"q89","format":"csv","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":1516,"outputTokens":5,"latencyMs":1302.6017080000602},{"questionId":"q89","format":"xml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":4853,"outputTokens":5,"latencyMs":1207.5639170000795},{"questionId":"q89","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"13","isCorrect":false,"inputTokens":3182,"outputTokens":5,"latencyMs":1003.2787090000929},{"questionId":"q90","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":4151,"outputTokens":5,"latencyMs":1314.3022080000956},{"questionId":"q90","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":2462,"outputTokens":5,"latencyMs":2278.123583000037},{"questionId":"q90","format":"toon","model":"claude-haiku-4-5-20251001","expected":"22","actual":"13","isCorrect":false,"inputTokens":1608,"outputTokens":5,"latencyMs":1040.3857919999864},{"questionId":"q90","format":"csv","model":"claude-haiku-4-5-20251001","expected":"22","actual":"16","isCorrect":false,"inputTokens":1516,"outputTokens":5,"latencyMs":1609.7861250001006},{"questionId":"q90","format":"xml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":4853,"outputTokens":5,"latencyMs":1177.3617499999236},{"questionId":"q90","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"13","isCorrect":false,"inputTokens":3182,"outputTokens":5,"latencyMs":1123.888500000001},{"questionId":"q91","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"20","actual":"13","isCorrect":false,"inputTokens":4151,"outputTokens":5,"latencyMs":2513.7377080000006},{"questionId":"q91","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"20","actual":"16","isCorrect":false,"inputTokens":2462,"outputTokens":5,"latencyMs":1089.2179999999935},{"questionId":"q91","format":"toon","model":"claude-haiku-4-5-20251001","expected":"20","actual":"16","isCorrect":false,"inputTokens":1608,"outputTokens":5,"latencyMs":1062.1664580000797},{"questionId":"q91","format":"csv","model":"claude-haiku-4-5-20251001","expected":"20","actual":"18","isCorrect":false,"inputTokens":1516,"outputTokens":5,"latencyMs":1236.656958000036},{"questionId":"q91","format":"xml","model":"claude-haiku-4-5-20251001","expected":"20","actual":"16","isCorrect":false,"inputTokens":4853,"outputTokens":5,"latencyMs":1146.2815420000115},{"questionId":"q91","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"20","actual":"14","isCorrect":false,"inputTokens":3182,"outputTokens":5,"latencyMs":1096.0875419999938},{"questionId":"q92","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"32","actual":"25","isCorrect":false,"inputTokens":4152,"outputTokens":5,"latencyMs":987.8946670000441},{"questionId":"q92","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"32","actual":"23","isCorrect":false,"inputTokens":2463,"outputTokens":5,"latencyMs":1220.6643329999642},{"questionId":"q92","format":"toon","model":"claude-haiku-4-5-20251001","expected":"32","actual":"32","isCorrect":true,"inputTokens":1609,"outputTokens":5,"latencyMs":937.5257920000004},{"questionId":"q92","format":"csv","model":"claude-haiku-4-5-20251001","expected":"32","actual":"24","isCorrect":false,"inputTokens":1517,"outputTokens":5,"latencyMs":2063.006832999992},{"questionId":"q92","format":"xml","model":"claude-haiku-4-5-20251001","expected":"32","actual":"26","isCorrect":false,"inputTokens":4854,"outputTokens":5,"latencyMs":1468.0255830000388},{"questionId":"q92","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"32","actual":"24","isCorrect":false,"inputTokens":3183,"outputTokens":5,"latencyMs":966.7985420000041},{"questionId":"q93","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"9","actual":"11","isCorrect":false,"inputTokens":4152,"outputTokens":5,"latencyMs":1278.894708000007},{"questionId":"q93","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"9","actual":"8","isCorrect":false,"inputTokens":2463,"outputTokens":5,"latencyMs":1107.823999999906},{"questionId":"q93","format":"toon","model":"claude-haiku-4-5-20251001","expected":"9","actual":"8","isCorrect":false,"inputTokens":1609,"outputTokens":5,"latencyMs":907.0621670000255},{"questionId":"q93","format":"csv","model":"claude-haiku-4-5-20251001","expected":"9","actual":"10","isCorrect":false,"inputTokens":1517,"outputTokens":5,"latencyMs":1304.2223330000415},{"questionId":"q93","format":"xml","model":"claude-haiku-4-5-20251001","expected":"9","actual":"10","isCorrect":false,"inputTokens":4854,"outputTokens":5,"latencyMs":1006.5030419999966},{"questionId":"q93","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"9","actual":"8","isCorrect":false,"inputTokens":3183,"outputTokens":5,"latencyMs":1042.183209000039},{"questionId":"q94","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":4153,"outputTokens":5,"latencyMs":1480.0997500000522},{"questionId":"q94","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":2464,"outputTokens":5,"latencyMs":1601.9094999999506},{"questionId":"q94","format":"toon","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":1610,"outputTokens":5,"latencyMs":1208.4925000000512},{"questionId":"q94","format":"csv","model":"claude-haiku-4-5-20251001","expected":"22","actual":"16","isCorrect":false,"inputTokens":1518,"outputTokens":5,"latencyMs":1313.0127909999574},{"questionId":"q94","format":"xml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"17","isCorrect":false,"inputTokens":4855,"outputTokens":5,"latencyMs":1156.5654589999467},{"questionId":"q94","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"16","isCorrect":false,"inputTokens":3184,"outputTokens":5,"latencyMs":1046.6653750000987},{"questionId":"q95","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":4153,"outputTokens":5,"latencyMs":1009.8270000000484},{"questionId":"q95","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"22","actual":"16","isCorrect":false,"inputTokens":2464,"outputTokens":5,"latencyMs":1112.5791250000475},{"questionId":"q95","format":"toon","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":1610,"outputTokens":5,"latencyMs":1165.6955840000883},{"questionId":"q95","format":"csv","model":"claude-haiku-4-5-20251001","expected":"22","actual":"16","isCorrect":false,"inputTokens":1518,"outputTokens":5,"latencyMs":1050.5519169999752},{"questionId":"q95","format":"xml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":4855,"outputTokens":5,"latencyMs":1023.872166999965},{"questionId":"q95","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":3184,"outputTokens":5,"latencyMs":1117.4546669999836},{"questionId":"q96","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":17482,"outputTokens":6,"latencyMs":1361.8696670000209},{"questionId":"q96","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":12617,"outputTokens":6,"latencyMs":1247.9789170000004},{"questionId":"q96","format":"toon","model":"claude-haiku-4-5-20251001","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":9380,"outputTokens":6,"latencyMs":1211.6023749999003},{"questionId":"q96","format":"csv","model":"claude-haiku-4-5-20251001","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":9198,"outputTokens":6,"latencyMs":1307.9147920000833},{"questionId":"q96","format":"xml","model":"claude-haiku-4-5-20251001","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":19872,"outputTokens":6,"latencyMs":1437.3064170000143},{"questionId":"q96","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":14557,"outputTokens":6,"latencyMs":1455.2815420000115},{"questionId":"q97","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":17481,"outputTokens":6,"latencyMs":1353.6013749999693},{"questionId":"q97","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":12616,"outputTokens":6,"latencyMs":1534.4137919999193},{"questionId":"q97","format":"toon","model":"claude-haiku-4-5-20251001","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":9379,"outputTokens":6,"latencyMs":2213.0383339999244},{"questionId":"q97","format":"csv","model":"claude-haiku-4-5-20251001","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":9197,"outputTokens":6,"latencyMs":1201.597165999934},{"questionId":"q97","format":"xml","model":"claude-haiku-4-5-20251001","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":19871,"outputTokens":6,"latencyMs":1513.969000000041},{"questionId":"q97","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":14556,"outputTokens":6,"latencyMs":1353.5847500000382},{"questionId":"q98","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":17476,"outputTokens":6,"latencyMs":1397.7247079999652},{"questionId":"q98","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":12611,"outputTokens":6,"latencyMs":1268.1517080001067},{"questionId":"q98","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":9374,"outputTokens":6,"latencyMs":1237.637166999979},{"questionId":"q98","format":"csv","model":"claude-haiku-4-5-20251001","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":9192,"outputTokens":6,"latencyMs":1559.2043330000015},{"questionId":"q98","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":19866,"outputTokens":6,"latencyMs":1554.237124999985},{"questionId":"q98","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":14551,"outputTokens":6,"latencyMs":4180.1094579999335},{"questionId":"q99","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"master","actual":"master","isCorrect":true,"inputTokens":17481,"outputTokens":4,"latencyMs":1550.9610000000102},{"questionId":"q99","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"master","actual":"master","isCorrect":true,"inputTokens":12616,"outputTokens":4,"latencyMs":1546.9043330000713},{"questionId":"q99","format":"toon","model":"claude-haiku-4-5-20251001","expected":"master","actual":"master","isCorrect":true,"inputTokens":9379,"outputTokens":4,"latencyMs":1625.3927080000285},{"questionId":"q99","format":"csv","model":"claude-haiku-4-5-20251001","expected":"master","actual":"master","isCorrect":true,"inputTokens":9197,"outputTokens":4,"latencyMs":1535.6835000000428},{"questionId":"q99","format":"xml","model":"claude-haiku-4-5-20251001","expected":"master","actual":"master","isCorrect":true,"inputTokens":19871,"outputTokens":4,"latencyMs":2670.4785830000183},{"questionId":"q99","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"master","actual":"master","isCorrect":true,"inputTokens":14556,"outputTokens":4,"latencyMs":1569.8371250000782},{"questionId":"q100","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":17476,"outputTokens":6,"latencyMs":1596.1594999999506},{"questionId":"q100","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":12611,"outputTokens":6,"latencyMs":1386.254958000034},{"questionId":"q100","format":"toon","model":"claude-haiku-4-5-20251001","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":9374,"outputTokens":6,"latencyMs":1336.2282079999568},{"questionId":"q100","format":"csv","model":"claude-haiku-4-5-20251001","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":9192,"outputTokens":6,"latencyMs":1287.5360420000507},{"questionId":"q100","format":"xml","model":"claude-haiku-4-5-20251001","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":19866,"outputTokens":6,"latencyMs":1648.8853339999914},{"questionId":"q100","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":14551,"outputTokens":6,"latencyMs":1202.9672089999076},{"questionId":"q101","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":17481,"outputTokens":6,"latencyMs":1287.3107910000253},{"questionId":"q101","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":12616,"outputTokens":6,"latencyMs":1320.3634160000365},{"questionId":"q101","format":"toon","model":"claude-haiku-4-5-20251001","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":9379,"outputTokens":6,"latencyMs":1191.4255419999827},{"questionId":"q101","format":"csv","model":"claude-haiku-4-5-20251001","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":9197,"outputTokens":6,"latencyMs":1206.390000000014},{"questionId":"q101","format":"xml","model":"claude-haiku-4-5-20251001","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":19871,"outputTokens":6,"latencyMs":1499.8067499999888},{"questionId":"q101","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":14556,"outputTokens":6,"latencyMs":1127.3515840000473},{"questionId":"q102","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"678","actual":"678","isCorrect":true,"inputTokens":17483,"outputTokens":5,"latencyMs":1328.8301249999786},{"questionId":"q102","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"678","actual":"678","isCorrect":true,"inputTokens":12618,"outputTokens":5,"latencyMs":1138.0201249999227},{"questionId":"q102","format":"toon","model":"claude-haiku-4-5-20251001","expected":"678","actual":"678","isCorrect":true,"inputTokens":9381,"outputTokens":5,"latencyMs":980.3800830000546},{"questionId":"q102","format":"csv","model":"claude-haiku-4-5-20251001","expected":"678","actual":"678","isCorrect":true,"inputTokens":9199,"outputTokens":5,"latencyMs":1157.8958750000456},{"questionId":"q102","format":"xml","model":"claude-haiku-4-5-20251001","expected":"678","actual":"678","isCorrect":true,"inputTokens":19873,"outputTokens":5,"latencyMs":1360.5095420000143},{"questionId":"q102","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"678","actual":"678","isCorrect":true,"inputTokens":14558,"outputTokens":5,"latencyMs":1273.2398329999996},{"questionId":"q103","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"main","actual":"main","isCorrect":true,"inputTokens":17477,"outputTokens":4,"latencyMs":1274.5509169999277},{"questionId":"q103","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"main","actual":"main","isCorrect":true,"inputTokens":12612,"outputTokens":4,"latencyMs":1256.6803749999963},{"questionId":"q103","format":"toon","model":"claude-haiku-4-5-20251001","expected":"main","actual":"main","isCorrect":true,"inputTokens":9375,"outputTokens":4,"latencyMs":1433.376416000072},{"questionId":"q103","format":"csv","model":"claude-haiku-4-5-20251001","expected":"main","actual":"main","isCorrect":true,"inputTokens":9193,"outputTokens":4,"latencyMs":1417.8876250000903},{"questionId":"q103","format":"xml","model":"claude-haiku-4-5-20251001","expected":"main","actual":"main","isCorrect":true,"inputTokens":19867,"outputTokens":4,"latencyMs":1639.6358750000363},{"questionId":"q103","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"main","actual":"main","isCorrect":true,"inputTokens":14552,"outputTokens":4,"latencyMs":1522.2459160001017},{"questionId":"q104","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":17484,"outputTokens":6,"latencyMs":1278.353458999889},{"questionId":"q104","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":12619,"outputTokens":6,"latencyMs":1228.4452499999898},{"questionId":"q104","format":"toon","model":"claude-haiku-4-5-20251001","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":9382,"outputTokens":6,"latencyMs":1285.1270830000285},{"questionId":"q104","format":"csv","model":"claude-haiku-4-5-20251001","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":9200,"outputTokens":6,"latencyMs":1185.8712910000468},{"questionId":"q104","format":"xml","model":"claude-haiku-4-5-20251001","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":19874,"outputTokens":6,"latencyMs":1733.4342500000494},{"questionId":"q104","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":14559,"outputTokens":6,"latencyMs":1182.3504170000087},{"questionId":"q105","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":17480,"outputTokens":6,"latencyMs":1328.0590000000084},{"questionId":"q105","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":12615,"outputTokens":6,"latencyMs":1337.2227919999277},{"questionId":"q105","format":"toon","model":"claude-haiku-4-5-20251001","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":9378,"outputTokens":6,"latencyMs":1245.6462499999907},{"questionId":"q105","format":"csv","model":"claude-haiku-4-5-20251001","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":9196,"outputTokens":6,"latencyMs":1152.3198330000741},{"questionId":"q105","format":"xml","model":"claude-haiku-4-5-20251001","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":19870,"outputTokens":6,"latencyMs":1417.1319579998963},{"questionId":"q105","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":14555,"outputTokens":6,"latencyMs":1587.597666000016},{"questionId":"q106","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":17485,"outputTokens":6,"latencyMs":1286.3247500000289},{"questionId":"q106","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":12620,"outputTokens":6,"latencyMs":1243.2615000000224},{"questionId":"q106","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":9383,"outputTokens":6,"latencyMs":1291.9809159999713},{"questionId":"q106","format":"csv","model":"claude-haiku-4-5-20251001","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":9201,"outputTokens":6,"latencyMs":1398.1902080000145},{"questionId":"q106","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":19875,"outputTokens":6,"latencyMs":1624.34620800009},{"questionId":"q106","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":14560,"outputTokens":6,"latencyMs":1721.1688750000903},{"questionId":"q107","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":17473,"outputTokens":5,"latencyMs":1369.6887080000015},{"questionId":"q107","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":12608,"outputTokens":5,"latencyMs":1290.1797500000102},{"questionId":"q107","format":"toon","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":9371,"outputTokens":5,"latencyMs":1811.6780829998897},{"questionId":"q107","format":"csv","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":9189,"outputTokens":5,"latencyMs":1179.5881659999723},{"questionId":"q107","format":"xml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":19863,"outputTokens":5,"latencyMs":2173.616832999978},{"questionId":"q107","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":14548,"outputTokens":5,"latencyMs":1352.0613330000779},{"questionId":"q108","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"15413563","actual":"11475937","isCorrect":false,"inputTokens":17476,"outputTokens":7,"latencyMs":2116.313165999949},{"questionId":"q108","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"15413563","actual":"13947885","isCorrect":false,"inputTokens":12611,"outputTokens":7,"latencyMs":1385.7955830000574},{"questionId":"q108","format":"toon","model":"claude-haiku-4-5-20251001","expected":"15413563","actual":"17348748","isCorrect":false,"inputTokens":9374,"outputTokens":7,"latencyMs":1142.456750000012},{"questionId":"q108","format":"csv","model":"claude-haiku-4-5-20251001","expected":"15413563","actual":"10737833","isCorrect":false,"inputTokens":9192,"outputTokens":7,"latencyMs":1214.3351249999832},{"questionId":"q108","format":"xml","model":"claude-haiku-4-5-20251001","expected":"15413563","actual":"10524881","isCorrect":false,"inputTokens":19866,"outputTokens":7,"latencyMs":1302.2744999999413},{"questionId":"q108","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"15413563","actual":"10959626","isCorrect":false,"inputTokens":14551,"outputTokens":7,"latencyMs":1313.938542000018},{"questionId":"q109","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2528243","actual":"1327621","isCorrect":false,"inputTokens":17477,"outputTokens":7,"latencyMs":1437.121167000034},{"questionId":"q109","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2528243","actual":"1453895","isCorrect":false,"inputTokens":12612,"outputTokens":7,"latencyMs":1452.4355000000214},{"questionId":"q109","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2528243","actual":"1429822","isCorrect":false,"inputTokens":9375,"outputTokens":7,"latencyMs":1270.1647080000257},{"questionId":"q109","format":"csv","model":"claude-haiku-4-5-20251001","expected":"2528243","actual":"1359722","isCorrect":false,"inputTokens":9193,"outputTokens":7,"latencyMs":1112.9368749998976},{"questionId":"q109","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2528243","actual":"1264743","isCorrect":false,"inputTokens":19867,"outputTokens":7,"latencyMs":1466.141500000027},{"questionId":"q109","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2528243","actual":"1434047","isCorrect":false,"inputTokens":14552,"outputTokens":7,"latencyMs":1131.1252089999616},{"questionId":"q110","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"154136","actual":"130896","isCorrect":false,"inputTokens":17475,"outputTokens":6,"latencyMs":1628.9582499999087},{"questionId":"q110","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"154136","actual":"137824","isCorrect":false,"inputTokens":12610,"outputTokens":6,"latencyMs":1084.4367499999935},{"questionId":"q110","format":"toon","model":"claude-haiku-4-5-20251001","expected":"154136","actual":"130658","isCorrect":false,"inputTokens":9373,"outputTokens":6,"latencyMs":1364.9022910000058},{"questionId":"q110","format":"csv","model":"claude-haiku-4-5-20251001","expected":"154136","actual":"130686.16","isCorrect":false,"inputTokens":9191,"outputTokens":8,"latencyMs":1165.2718750000931},{"questionId":"q110","format":"xml","model":"claude-haiku-4-5-20251001","expected":"154136","actual":"132689","isCorrect":false,"inputTokens":19865,"outputTokens":6,"latencyMs":1335.3207089999923},{"questionId":"q110","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"154136","actual":"132673","isCorrect":false,"inputTokens":14550,"outputTokens":6,"latencyMs":1393.0692500000587},{"questionId":"q111","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"41","actual":"26","isCorrect":false,"inputTokens":17477,"outputTokens":5,"latencyMs":2100.8017080000136},{"questionId":"q111","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"41","actual":"25","isCorrect":false,"inputTokens":12612,"outputTokens":5,"latencyMs":1400.2827919999836},{"questionId":"q111","format":"toon","model":"claude-haiku-4-5-20251001","expected":"41","actual":"21","isCorrect":false,"inputTokens":9375,"outputTokens":5,"latencyMs":1358.0510830000276},{"questionId":"q111","format":"csv","model":"claude-haiku-4-5-20251001","expected":"41","actual":"27","isCorrect":false,"inputTokens":9193,"outputTokens":5,"latencyMs":1191.0352500000736},{"questionId":"q111","format":"xml","model":"claude-haiku-4-5-20251001","expected":"41","actual":"26","isCorrect":false,"inputTokens":19867,"outputTokens":5,"latencyMs":3264.8267500000075},{"questionId":"q111","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"41","actual":"27","isCorrect":false,"inputTokens":14552,"outputTokens":5,"latencyMs":1273.1958330000052},{"questionId":"q112","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"53","actual":"56","isCorrect":false,"inputTokens":17477,"outputTokens":5,"latencyMs":1519.110915999976},{"questionId":"q112","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"53","actual":"56","isCorrect":false,"inputTokens":12612,"outputTokens":5,"latencyMs":1058.4677910000319},{"questionId":"q112","format":"toon","model":"claude-haiku-4-5-20251001","expected":"53","actual":"57","isCorrect":false,"inputTokens":9375,"outputTokens":5,"latencyMs":1207.4402080000145},{"questionId":"q112","format":"csv","model":"claude-haiku-4-5-20251001","expected":"53","actual":"53","isCorrect":true,"inputTokens":9193,"outputTokens":5,"latencyMs":1156.1299169999547},{"questionId":"q112","format":"xml","model":"claude-haiku-4-5-20251001","expected":"53","actual":"57","isCorrect":false,"inputTokens":19867,"outputTokens":5,"latencyMs":1534.4429999999702},{"questionId":"q112","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"53","actual":"46","isCorrect":false,"inputTokens":14552,"outputTokens":5,"latencyMs":1102.8952910000226},{"questionId":"q113","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"77","actual":"100","isCorrect":false,"inputTokens":17477,"outputTokens":5,"latencyMs":1363.827082999982},{"questionId":"q113","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"77","actual":"100","isCorrect":false,"inputTokens":12612,"outputTokens":5,"latencyMs":1098.9746250000317},{"questionId":"q113","format":"toon","model":"claude-haiku-4-5-20251001","expected":"77","actual":"100","isCorrect":false,"inputTokens":9375,"outputTokens":5,"latencyMs":1091.201000000001},{"questionId":"q113","format":"csv","model":"claude-haiku-4-5-20251001","expected":"77","actual":"100","isCorrect":false,"inputTokens":9193,"outputTokens":5,"latencyMs":985.0388750000857},{"questionId":"q113","format":"xml","model":"claude-haiku-4-5-20251001","expected":"77","actual":"100","isCorrect":false,"inputTokens":19867,"outputTokens":5,"latencyMs":1350.483332999982},{"questionId":"q113","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"77","actual":"100","isCorrect":false,"inputTokens":14552,"outputTokens":5,"latencyMs":1329.111082999967},{"questionId":"q114","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"37","actual":"41","isCorrect":false,"inputTokens":17477,"outputTokens":5,"latencyMs":1212.424457999994},{"questionId":"q114","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"37","actual":"50","isCorrect":false,"inputTokens":12612,"outputTokens":5,"latencyMs":1182.8672079999233},{"questionId":"q114","format":"toon","model":"claude-haiku-4-5-20251001","expected":"37","actual":"50","isCorrect":false,"inputTokens":9375,"outputTokens":5,"latencyMs":1758.9869160000235},{"questionId":"q114","format":"csv","model":"claude-haiku-4-5-20251001","expected":"37","actual":"45","isCorrect":false,"inputTokens":9193,"outputTokens":5,"latencyMs":1223.081125000026},{"questionId":"q114","format":"xml","model":"claude-haiku-4-5-20251001","expected":"37","actual":"50","isCorrect":false,"inputTokens":19867,"outputTokens":5,"latencyMs":1448.7562919999473},{"questionId":"q114","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"37","actual":"50","isCorrect":false,"inputTokens":14552,"outputTokens":5,"latencyMs":1141.3254169999855},{"questionId":"q115","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"16","actual":"5","isCorrect":false,"inputTokens":17477,"outputTokens":5,"latencyMs":1641.206375000067},{"questionId":"q115","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"16","actual":"31","isCorrect":false,"inputTokens":12612,"outputTokens":5,"latencyMs":1152.969541999977},{"questionId":"q115","format":"toon","model":"claude-haiku-4-5-20251001","expected":"16","actual":"15","isCorrect":false,"inputTokens":9375,"outputTokens":5,"latencyMs":1277.7170410000253},{"questionId":"q115","format":"csv","model":"claude-haiku-4-5-20251001","expected":"16","actual":"21","isCorrect":false,"inputTokens":9193,"outputTokens":5,"latencyMs":1170.6205830000108},{"questionId":"q115","format":"xml","model":"claude-haiku-4-5-20251001","expected":"16","actual":"3","isCorrect":false,"inputTokens":19867,"outputTokens":5,"latencyMs":1850.4669170000125},{"questionId":"q115","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"16","actual":"3","isCorrect":false,"inputTokens":14552,"outputTokens":5,"latencyMs":1451.0770000000484},{"questionId":"q116","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"49","actual":"42","isCorrect":false,"inputTokens":17478,"outputTokens":5,"latencyMs":1355.7432499999413},{"questionId":"q116","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"49","actual":"56","isCorrect":false,"inputTokens":12613,"outputTokens":5,"latencyMs":1275.323333999957},{"questionId":"q116","format":"toon","model":"claude-haiku-4-5-20251001","expected":"49","actual":"47","isCorrect":false,"inputTokens":9376,"outputTokens":5,"latencyMs":1957.939083000063},{"questionId":"q116","format":"csv","model":"claude-haiku-4-5-20251001","expected":"49","actual":"29","isCorrect":false,"inputTokens":9194,"outputTokens":5,"latencyMs":1257.4775420000078},{"questionId":"q116","format":"xml","model":"claude-haiku-4-5-20251001","expected":"49","actual":"25","isCorrect":false,"inputTokens":19868,"outputTokens":5,"latencyMs":1747.7625409999164},{"questionId":"q116","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"49","actual":"29","isCorrect":false,"inputTokens":14553,"outputTokens":5,"latencyMs":1300.0107079999289},{"questionId":"q117","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"23","actual":"26","isCorrect":false,"inputTokens":17478,"outputTokens":5,"latencyMs":1249.3837920000078},{"questionId":"q117","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"23","actual":"20","isCorrect":false,"inputTokens":12613,"outputTokens":5,"latencyMs":1169.6777500000317},{"questionId":"q117","format":"toon","model":"claude-haiku-4-5-20251001","expected":"23","actual":"22","isCorrect":false,"inputTokens":9376,"outputTokens":5,"latencyMs":1138.845290999976},{"questionId":"q117","format":"csv","model":"claude-haiku-4-5-20251001","expected":"23","actual":"22","isCorrect":false,"inputTokens":9194,"outputTokens":5,"latencyMs":1190.8722499998985},{"questionId":"q117","format":"xml","model":"claude-haiku-4-5-20251001","expected":"23","actual":"17","isCorrect":false,"inputTokens":19868,"outputTokens":5,"latencyMs":1297.84612500004},{"questionId":"q117","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"23","actual":"14","isCorrect":false,"inputTokens":14553,"outputTokens":5,"latencyMs":1273.5267079999903},{"questionId":"q118","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4","actual":"10","isCorrect":false,"inputTokens":17478,"outputTokens":5,"latencyMs":1501.3266669999575},{"questionId":"q118","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4","actual":"12","isCorrect":false,"inputTokens":12613,"outputTokens":5,"latencyMs":1335.36412500008},{"questionId":"q118","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4","actual":"15","isCorrect":false,"inputTokens":9376,"outputTokens":5,"latencyMs":1168.816125000012},{"questionId":"q118","format":"csv","model":"claude-haiku-4-5-20251001","expected":"4","actual":"31","isCorrect":false,"inputTokens":9194,"outputTokens":5,"latencyMs":1292.2094170000637},{"questionId":"q118","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"16","isCorrect":false,"inputTokens":19868,"outputTokens":5,"latencyMs":1603.7261660000077},{"questionId":"q118","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"10","isCorrect":false,"inputTokens":14553,"outputTokens":5,"latencyMs":1265.9923339999514},{"questionId":"q119","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"57","actual":"8","isCorrect":false,"inputTokens":17486,"outputTokens":5,"latencyMs":1319.7199580000015},{"questionId":"q119","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"57","actual":"23","isCorrect":false,"inputTokens":12621,"outputTokens":5,"latencyMs":1675.4872079999186},{"questionId":"q119","format":"toon","model":"claude-haiku-4-5-20251001","expected":"57","actual":"12","isCorrect":false,"inputTokens":9384,"outputTokens":5,"latencyMs":1093.7844170000171},{"questionId":"q119","format":"csv","model":"claude-haiku-4-5-20251001","expected":"57","actual":"15","isCorrect":false,"inputTokens":9202,"outputTokens":5,"latencyMs":1534.8674590000883},{"questionId":"q119","format":"xml","model":"claude-haiku-4-5-20251001","expected":"57","actual":"15","isCorrect":false,"inputTokens":19876,"outputTokens":5,"latencyMs":1421.0654590000631},{"questionId":"q119","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"57","actual":"15","isCorrect":false,"inputTokens":14561,"outputTokens":5,"latencyMs":1133.124291999964},{"questionId":"q120","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"43","actual":"38","isCorrect":false,"inputTokens":17486,"outputTokens":5,"latencyMs":3100.8175000000047},{"questionId":"q120","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"43","actual":"37","isCorrect":false,"inputTokens":12621,"outputTokens":5,"latencyMs":1586.5317499999655},{"questionId":"q120","format":"toon","model":"claude-haiku-4-5-20251001","expected":"43","actual":"37","isCorrect":false,"inputTokens":9384,"outputTokens":5,"latencyMs":1087.2246670000022},{"questionId":"q120","format":"csv","model":"claude-haiku-4-5-20251001","expected":"43","actual":"32","isCorrect":false,"inputTokens":9202,"outputTokens":5,"latencyMs":1252.717082999996},{"questionId":"q120","format":"xml","model":"claude-haiku-4-5-20251001","expected":"43","actual":"21","isCorrect":false,"inputTokens":19876,"outputTokens":5,"latencyMs":1433.415833999985},{"questionId":"q120","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"43","actual":"41","isCorrect":false,"inputTokens":14561,"outputTokens":5,"latencyMs":1497.922416999936},{"questionId":"q121","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"25","actual":"13","isCorrect":false,"inputTokens":17486,"outputTokens":5,"latencyMs":1229.641583000077},{"questionId":"q121","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"25","actual":"15","isCorrect":false,"inputTokens":12621,"outputTokens":5,"latencyMs":1479.924708000035},{"questionId":"q121","format":"toon","model":"claude-haiku-4-5-20251001","expected":"25","actual":"22","isCorrect":false,"inputTokens":9384,"outputTokens":5,"latencyMs":1331.8733749999665},{"questionId":"q121","format":"csv","model":"claude-haiku-4-5-20251001","expected":"25","actual":"18","isCorrect":false,"inputTokens":9202,"outputTokens":5,"latencyMs":1499.8951249999227},{"questionId":"q121","format":"xml","model":"claude-haiku-4-5-20251001","expected":"25","actual":"12","isCorrect":false,"inputTokens":19876,"outputTokens":5,"latencyMs":1506.3811669999268},{"questionId":"q121","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"25","actual":"10","isCorrect":false,"inputTokens":14561,"outputTokens":5,"latencyMs":1207.6717090000166},{"questionId":"q122","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"6","actual":"8","isCorrect":false,"inputTokens":17486,"outputTokens":5,"latencyMs":1919.7641669999575},{"questionId":"q122","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"6","actual":"18","isCorrect":false,"inputTokens":12621,"outputTokens":5,"latencyMs":2294.538125000079},{"questionId":"q122","format":"toon","model":"claude-haiku-4-5-20251001","expected":"6","actual":"15","isCorrect":false,"inputTokens":9384,"outputTokens":5,"latencyMs":1619.9265840000007},{"questionId":"q122","format":"csv","model":"claude-haiku-4-5-20251001","expected":"6","actual":"8","isCorrect":false,"inputTokens":9202,"outputTokens":5,"latencyMs":2120.3911249999655},{"questionId":"q122","format":"xml","model":"claude-haiku-4-5-20251001","expected":"6","actual":"17","isCorrect":false,"inputTokens":19876,"outputTokens":5,"latencyMs":1503.0869999999413},{"questionId":"q122","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"6","actual":"13","isCorrect":false,"inputTokens":14561,"outputTokens":5,"latencyMs":1234.3564580000238},{"questionId":"q123","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"8","isCorrect":false,"inputTokens":17486,"outputTokens":5,"latencyMs":1434.706542000058},{"questionId":"q123","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"3","isCorrect":false,"inputTokens":12621,"outputTokens":5,"latencyMs":1561.3982910000486},{"questionId":"q123","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"12","isCorrect":false,"inputTokens":9384,"outputTokens":5,"latencyMs":1187.2939999999944},{"questionId":"q123","format":"csv","model":"claude-haiku-4-5-20251001","expected":"1","actual":"4","isCorrect":false,"inputTokens":9202,"outputTokens":5,"latencyMs":1084.5471249999246},{"questionId":"q123","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"3","isCorrect":false,"inputTokens":19876,"outputTokens":5,"latencyMs":1870.1685000000289},{"questionId":"q123","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"4","isCorrect":false,"inputTokens":14561,"outputTokens":5,"latencyMs":1385.7719580000266},{"questionId":"q124","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":7783,"outputTokens":4,"latencyMs":1268.9927909999387},{"questionId":"q124","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":5221,"outputTokens":4,"latencyMs":1421.6205409999238},{"questionId":"q124","format":"toon","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":6241,"outputTokens":4,"latencyMs":1310.4610000000102},{"questionId":"q124","format":"xml","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":8788,"outputTokens":4,"latencyMs":1550.1076250000624},{"questionId":"q124","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":6187,"outputTokens":4,"latencyMs":1150.0919170000125},{"questionId":"q125","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":7783,"outputTokens":7,"latencyMs":1214.324041999993},{"questionId":"q125","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":5221,"outputTokens":7,"latencyMs":1421.8747919999296},{"questionId":"q125","format":"toon","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":6241,"outputTokens":7,"latencyMs":1067.4452499999898},{"questionId":"q125","format":"xml","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":8788,"outputTokens":7,"latencyMs":1218.4350420000264},{"questionId":"q125","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":6187,"outputTokens":7,"latencyMs":1223.642499999958},{"questionId":"q126","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"424","actual":"424","isCorrect":true,"inputTokens":7784,"outputTokens":5,"latencyMs":1130.6049170000479},{"questionId":"q126","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"424","actual":"424","isCorrect":true,"inputTokens":5222,"outputTokens":5,"latencyMs":1027.4643330000108},{"questionId":"q126","format":"toon","model":"claude-haiku-4-5-20251001","expected":"424","actual":"424","isCorrect":true,"inputTokens":6242,"outputTokens":5,"latencyMs":1244.4015420000069},{"questionId":"q126","format":"xml","model":"claude-haiku-4-5-20251001","expected":"424","actual":"424","isCorrect":true,"inputTokens":8789,"outputTokens":5,"latencyMs":1162.2130830000388},{"questionId":"q126","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"424","actual":"424","isCorrect":true,"inputTokens":6188,"outputTokens":5,"latencyMs":1949.5525829999242},{"questionId":"q127","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":7784,"outputTokens":6,"latencyMs":999.1413750000065},{"questionId":"q127","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":5222,"outputTokens":6,"latencyMs":1027.876125000068},{"questionId":"q127","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":6242,"outputTokens":6,"latencyMs":1985.8047080000397},{"questionId":"q127","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":8789,"outputTokens":6,"latencyMs":1117.451000000001},{"questionId":"q127","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":6188,"outputTokens":6,"latencyMs":1347.6159579999512},{"questionId":"q128","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":7783,"outputTokens":4,"latencyMs":1317.608249999932},{"questionId":"q128","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":5221,"outputTokens":4,"latencyMs":1256.9298749999143},{"questionId":"q128","format":"toon","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":6241,"outputTokens":4,"latencyMs":1083.470083000022},{"questionId":"q128","format":"xml","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":8788,"outputTokens":4,"latencyMs":1015.1183330001077},{"questionId":"q128","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":6187,"outputTokens":4,"latencyMs":955.9129999999423},{"questionId":"q129","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":7783,"outputTokens":7,"latencyMs":1433.7668750000885},{"questionId":"q129","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":5221,"outputTokens":7,"latencyMs":1191.4290000000037},{"questionId":"q129","format":"toon","model":"claude-haiku-4-5-20251001","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":6241,"outputTokens":7,"latencyMs":1086.0024169998942},{"questionId":"q129","format":"xml","model":"claude-haiku-4-5-20251001","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":8788,"outputTokens":7,"latencyMs":1344.1925419999752},{"questionId":"q129","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":6187,"outputTokens":7,"latencyMs":1327.7757920000004},{"questionId":"q130","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"435","actual":"435","isCorrect":true,"inputTokens":7784,"outputTokens":5,"latencyMs":975.5557089999784},{"questionId":"q130","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"435","actual":"435","isCorrect":true,"inputTokens":5222,"outputTokens":5,"latencyMs":1581.8543330000248},{"questionId":"q130","format":"toon","model":"claude-haiku-4-5-20251001","expected":"435","actual":"435","isCorrect":true,"inputTokens":6242,"outputTokens":5,"latencyMs":1049.4542499999516},{"questionId":"q130","format":"xml","model":"claude-haiku-4-5-20251001","expected":"435","actual":"435","isCorrect":true,"inputTokens":8789,"outputTokens":5,"latencyMs":1078.1500829999568},{"questionId":"q130","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"435","actual":"435","isCorrect":true,"inputTokens":6188,"outputTokens":5,"latencyMs":1060.7320830000099},{"questionId":"q131","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"408","actual":"408","isCorrect":true,"inputTokens":7784,"outputTokens":5,"latencyMs":1025.7722079999512},{"questionId":"q131","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"408","actual":"408","isCorrect":true,"inputTokens":5222,"outputTokens":5,"latencyMs":1314.0469169999706},{"questionId":"q131","format":"toon","model":"claude-haiku-4-5-20251001","expected":"408","actual":"408","isCorrect":true,"inputTokens":6242,"outputTokens":5,"latencyMs":1069.5918330000713},{"questionId":"q131","format":"xml","model":"claude-haiku-4-5-20251001","expected":"408","actual":"408","isCorrect":true,"inputTokens":8789,"outputTokens":5,"latencyMs":1142.9486249999609},{"questionId":"q131","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"408","actual":"408","isCorrect":true,"inputTokens":6188,"outputTokens":5,"latencyMs":992.1041250000708},{"questionId":"q132","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":7783,"outputTokens":4,"latencyMs":1161.393875000067},{"questionId":"q132","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":5221,"outputTokens":4,"latencyMs":939.8532920000143},{"questionId":"q132","format":"toon","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":6241,"outputTokens":4,"latencyMs":1038.753625000012},{"questionId":"q132","format":"xml","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":8788,"outputTokens":4,"latencyMs":1365.075458999956},{"questionId":"q132","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":6187,"outputTokens":4,"latencyMs":1184.1347500000848},{"questionId":"q133","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":7783,"outputTokens":7,"latencyMs":1443.2086669999408},{"questionId":"q133","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":5221,"outputTokens":7,"latencyMs":1285.2082500000251},{"questionId":"q133","format":"toon","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":6241,"outputTokens":7,"latencyMs":1422.8267079999205},{"questionId":"q133","format":"xml","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":8788,"outputTokens":7,"latencyMs":1320.150208999985},{"questionId":"q133","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":6187,"outputTokens":7,"latencyMs":1196.3562910000328},{"questionId":"q134","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"75","actual":"75","isCorrect":true,"inputTokens":7767,"outputTokens":5,"latencyMs":1288.936875000014},{"questionId":"q134","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"75","actual":"70","isCorrect":false,"inputTokens":5205,"outputTokens":5,"latencyMs":1696.469999999972},{"questionId":"q134","format":"toon","model":"claude-haiku-4-5-20251001","expected":"75","actual":"75","isCorrect":true,"inputTokens":6225,"outputTokens":5,"latencyMs":1488.3868329999968},{"questionId":"q134","format":"xml","model":"claude-haiku-4-5-20251001","expected":"75","actual":"100","isCorrect":false,"inputTokens":8772,"outputTokens":5,"latencyMs":1240.0713750000577},{"questionId":"q134","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"75","actual":"75","isCorrect":true,"inputTokens":6171,"outputTokens":5,"latencyMs":1378.1943749999627},{"questionId":"q135","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2453.41","actual":"2384.74","isCorrect":false,"inputTokens":7768,"outputTokens":8,"latencyMs":1541.0560830000322},{"questionId":"q135","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2453.41","actual":"2458.48","isCorrect":false,"inputTokens":5206,"outputTokens":8,"latencyMs":1120.7529169999762},{"questionId":"q135","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2453.41","actual":"2405.09","isCorrect":false,"inputTokens":6226,"outputTokens":8,"latencyMs":1157.566583000007},{"questionId":"q135","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2453.41","actual":"2445.96","isCorrect":false,"inputTokens":8773,"outputTokens":8,"latencyMs":1159.1351250000298},{"questionId":"q135","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2453.41","actual":"2414.48","isCorrect":false,"inputTokens":6172,"outputTokens":8,"latencyMs":1258.330958000035},{"questionId":"q136","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"29","actual":"33","isCorrect":false,"inputTokens":7768,"outputTokens":5,"latencyMs":1139.8584170000395},{"questionId":"q136","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"29","actual":"32","isCorrect":false,"inputTokens":5206,"outputTokens":5,"latencyMs":1128.9782079999568},{"questionId":"q136","format":"toon","model":"claude-haiku-4-5-20251001","expected":"29","actual":"30","isCorrect":false,"inputTokens":6226,"outputTokens":5,"latencyMs":1076.437042000005},{"questionId":"q136","format":"xml","model":"claude-haiku-4-5-20251001","expected":"29","actual":"31","isCorrect":false,"inputTokens":8773,"outputTokens":5,"latencyMs":1214.0489590000361},{"questionId":"q136","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"29","actual":"26","isCorrect":false,"inputTokens":6172,"outputTokens":5,"latencyMs":1147.1827920000069},{"questionId":"q137","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":7768,"outputTokens":5,"latencyMs":1060.8192499999423},{"questionId":"q137","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":5206,"outputTokens":5,"latencyMs":1000.7439579999773},{"questionId":"q137","format":"toon","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":6226,"outputTokens":5,"latencyMs":1444.9082920000656},{"questionId":"q137","format":"xml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":8773,"outputTokens":5,"latencyMs":1364.0987090000417},{"questionId":"q137","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":6172,"outputTokens":5,"latencyMs":1289.65149999992},{"questionId":"q138","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"29","actual":"24","isCorrect":false,"inputTokens":7768,"outputTokens":5,"latencyMs":1276.8952499999432},{"questionId":"q138","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"29","actual":"24","isCorrect":false,"inputTokens":5206,"outputTokens":5,"latencyMs":1270.1233340000035},{"questionId":"q138","format":"toon","model":"claude-haiku-4-5-20251001","expected":"29","actual":"24","isCorrect":false,"inputTokens":6226,"outputTokens":5,"latencyMs":1226.6909589999123},{"questionId":"q138","format":"xml","model":"claude-haiku-4-5-20251001","expected":"29","actual":"25","isCorrect":false,"inputTokens":8773,"outputTokens":5,"latencyMs":1133.3242500000633},{"questionId":"q138","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"29","actual":"28","isCorrect":false,"inputTokens":6172,"outputTokens":5,"latencyMs":2179.776416000095},{"questionId":"q139","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"11","actual":"10","isCorrect":false,"inputTokens":7771,"outputTokens":5,"latencyMs":1117.800791000016},{"questionId":"q139","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"11","actual":"10","isCorrect":false,"inputTokens":5209,"outputTokens":5,"latencyMs":1069.6763750000391},{"questionId":"q139","format":"toon","model":"claude-haiku-4-5-20251001","expected":"11","actual":"10","isCorrect":false,"inputTokens":6229,"outputTokens":5,"latencyMs":2063.5446249999804},{"questionId":"q139","format":"xml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"12","isCorrect":false,"inputTokens":8776,"outputTokens":5,"latencyMs":1202.8583749999525},{"questionId":"q139","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"9","isCorrect":false,"inputTokens":6175,"outputTokens":5,"latencyMs":1061.2812919999706},{"questionId":"q140","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"18","actual":"15","isCorrect":false,"inputTokens":7771,"outputTokens":5,"latencyMs":1386.753832999966},{"questionId":"q140","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"18","actual":"13","isCorrect":false,"inputTokens":5209,"outputTokens":5,"latencyMs":1034.8489169999957},{"questionId":"q140","format":"toon","model":"claude-haiku-4-5-20251001","expected":"18","actual":"15","isCorrect":false,"inputTokens":6229,"outputTokens":5,"latencyMs":1134.6799170000013},{"questionId":"q140","format":"xml","model":"claude-haiku-4-5-20251001","expected":"18","actual":"20","isCorrect":false,"inputTokens":8776,"outputTokens":5,"latencyMs":1083.533999999985},{"questionId":"q140","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"18","actual":"15","isCorrect":false,"inputTokens":6175,"outputTokens":5,"latencyMs":1075.4867920000106},{"questionId":"q141","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"33","actual":"33","isCorrect":true,"inputTokens":7775,"outputTokens":5,"latencyMs":1596.7704580000136},{"questionId":"q141","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"33","actual":"33","isCorrect":true,"inputTokens":5213,"outputTokens":5,"latencyMs":1052.1449169999687},{"questionId":"q141","format":"toon","model":"claude-haiku-4-5-20251001","expected":"33","actual":"40","isCorrect":false,"inputTokens":6233,"outputTokens":5,"latencyMs":1162.8800829999382},{"questionId":"q141","format":"xml","model":"claude-haiku-4-5-20251001","expected":"33","actual":"37","isCorrect":false,"inputTokens":8780,"outputTokens":5,"latencyMs":1121.927708000061},{"questionId":"q141","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"33","actual":"35","isCorrect":false,"inputTokens":6179,"outputTokens":5,"latencyMs":1078.549040999962},{"questionId":"q142","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"42","actual":"27","isCorrect":false,"inputTokens":7772,"outputTokens":5,"latencyMs":1063.4963330000173},{"questionId":"q142","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"42","actual":"25","isCorrect":false,"inputTokens":5210,"outputTokens":5,"latencyMs":1157.562166999909},{"questionId":"q142","format":"toon","model":"claude-haiku-4-5-20251001","expected":"42","actual":"28","isCorrect":false,"inputTokens":6230,"outputTokens":5,"latencyMs":1122.3327499999432},{"questionId":"q142","format":"xml","model":"claude-haiku-4-5-20251001","expected":"42","actual":"30","isCorrect":false,"inputTokens":8777,"outputTokens":5,"latencyMs":1094.998041999992},{"questionId":"q142","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"42","actual":"28","isCorrect":false,"inputTokens":6176,"outputTokens":5,"latencyMs":1123.9057500000345},{"questionId":"q143","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"25","actual":"24","isCorrect":false,"inputTokens":7769,"outputTokens":5,"latencyMs":1498.7087079999037},{"questionId":"q143","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"25","actual":"30","isCorrect":false,"inputTokens":5207,"outputTokens":5,"latencyMs":1361.3783330000006},{"questionId":"q143","format":"toon","model":"claude-haiku-4-5-20251001","expected":"25","actual":"38","isCorrect":false,"inputTokens":6227,"outputTokens":5,"latencyMs":1208.8371250000782},{"questionId":"q143","format":"xml","model":"claude-haiku-4-5-20251001","expected":"25","actual":"39","isCorrect":false,"inputTokens":8774,"outputTokens":5,"latencyMs":1436.0557080000872},{"questionId":"q143","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"25","actual":"31","isCorrect":false,"inputTokens":6173,"outputTokens":5,"latencyMs":1439.8235000000568},{"questionId":"q144","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"29","actual":"30","isCorrect":false,"inputTokens":7774,"outputTokens":5,"latencyMs":1211.4848749999655},{"questionId":"q144","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"29","actual":"30","isCorrect":false,"inputTokens":5212,"outputTokens":5,"latencyMs":1158.1638749999693},{"questionId":"q144","format":"toon","model":"claude-haiku-4-5-20251001","expected":"29","actual":"43","isCorrect":false,"inputTokens":6232,"outputTokens":5,"latencyMs":1706.8831250000512},{"questionId":"q144","format":"xml","model":"claude-haiku-4-5-20251001","expected":"29","actual":"38","isCorrect":false,"inputTokens":8779,"outputTokens":5,"latencyMs":1376.859208000009},{"questionId":"q144","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"29","actual":"31","isCorrect":false,"inputTokens":6178,"outputTokens":5,"latencyMs":1091.6000830000266},{"questionId":"q145","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4","actual":"8","isCorrect":false,"inputTokens":7774,"outputTokens":5,"latencyMs":1214.2384999999776},{"questionId":"q145","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4","actual":"4","isCorrect":true,"inputTokens":5212,"outputTokens":5,"latencyMs":1169.8784999999916},{"questionId":"q145","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4","actual":"3","isCorrect":false,"inputTokens":6232,"outputTokens":5,"latencyMs":1438.8369160000002},{"questionId":"q145","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"5","isCorrect":false,"inputTokens":8779,"outputTokens":5,"latencyMs":1235.8554580000928},{"questionId":"q145","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"5","isCorrect":false,"inputTokens":6178,"outputTokens":5,"latencyMs":1283.048208000022},{"questionId":"q146","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5","actual":"7","isCorrect":false,"inputTokens":7777,"outputTokens":5,"latencyMs":1316.5421670000069},{"questionId":"q146","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5","actual":"7","isCorrect":false,"inputTokens":5215,"outputTokens":5,"latencyMs":1155.4379169999156},{"questionId":"q146","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5","actual":"8","isCorrect":false,"inputTokens":6235,"outputTokens":5,"latencyMs":973.9992499999935},{"questionId":"q146","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":8782,"outputTokens":5,"latencyMs":1181.0223749999423},{"questionId":"q146","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":6181,"outputTokens":5,"latencyMs":1189.6436249999097},{"questionId":"q147","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2","actual":"6","isCorrect":false,"inputTokens":7777,"outputTokens":5,"latencyMs":1120.4472499999683},{"questionId":"q147","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2","actual":"4","isCorrect":false,"inputTokens":5215,"outputTokens":5,"latencyMs":1058.7860420000507},{"questionId":"q147","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2","actual":"5","isCorrect":false,"inputTokens":6235,"outputTokens":5,"latencyMs":1214.7113749999553},{"questionId":"q147","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"7","isCorrect":false,"inputTokens":8782,"outputTokens":5,"latencyMs":2272.7187089999206},{"questionId":"q147","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"3","isCorrect":false,"inputTokens":6181,"outputTokens":5,"latencyMs":1153.4284159999806},{"questionId":"q148","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3","actual":"4","isCorrect":false,"inputTokens":7777,"outputTokens":5,"latencyMs":1250.0174589999951},{"questionId":"q148","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3","actual":"5","isCorrect":false,"inputTokens":5215,"outputTokens":5,"latencyMs":1667.0216249999357},{"questionId":"q148","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3","actual":"4","isCorrect":false,"inputTokens":6235,"outputTokens":5,"latencyMs":1195.937874999945},{"questionId":"q148","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"5","isCorrect":false,"inputTokens":8782,"outputTokens":5,"latencyMs":1373.8929169999901},{"questionId":"q148","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"4","isCorrect":false,"inputTokens":6181,"outputTokens":5,"latencyMs":1219.8368749999208},{"questionId":"q149","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4","actual":"7","isCorrect":false,"inputTokens":7776,"outputTokens":5,"latencyMs":1261.3687080000527},{"questionId":"q149","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4","actual":"6","isCorrect":false,"inputTokens":5214,"outputTokens":5,"latencyMs":1096.3175000000047},{"questionId":"q149","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4","actual":"7","isCorrect":false,"inputTokens":6234,"outputTokens":5,"latencyMs":1464.7897079999093},{"questionId":"q149","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"6","isCorrect":false,"inputTokens":8781,"outputTokens":5,"latencyMs":1314.154084000038},{"questionId":"q149","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"6","isCorrect":false,"inputTokens":6180,"outputTokens":5,"latencyMs":1648.5650410000235},{"questionId":"q150","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5","actual":"7","isCorrect":false,"inputTokens":7776,"outputTokens":5,"latencyMs":1204.6717499999795},{"questionId":"q150","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":5214,"outputTokens":5,"latencyMs":1045.0933749999385},{"questionId":"q150","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":6234,"outputTokens":5,"latencyMs":1071.8020000000251},{"questionId":"q150","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"7","isCorrect":false,"inputTokens":8781,"outputTokens":5,"latencyMs":1080.7611669999314},{"questionId":"q150","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"8","isCorrect":false,"inputTokens":6180,"outputTokens":5,"latencyMs":1317.7093749999767},{"questionId":"q151","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"development","actual":"development","isCorrect":true,"inputTokens":1244,"outputTokens":4,"latencyMs":929.0065000000177},{"questionId":"q151","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"development","actual":"development","isCorrect":true,"inputTokens":785,"outputTokens":4,"latencyMs":1663.7153750000289},{"questionId":"q151","format":"toon","model":"claude-haiku-4-5-20251001","expected":"development","actual":"development","isCorrect":true,"inputTokens":879,"outputTokens":4,"latencyMs":731.0117499999469},{"questionId":"q151","format":"xml","model":"claude-haiku-4-5-20251001","expected":"development","actual":"development","isCorrect":true,"inputTokens":1314,"outputTokens":4,"latencyMs":883.2615830000723},{"questionId":"q151","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"development","actual":"development","isCorrect":true,"inputTokens":899,"outputTokens":4,"latencyMs":933.3807080000406},{"questionId":"q152","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":1242,"outputTokens":8,"latencyMs":1309.4723340000492},{"questionId":"q152","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":783,"outputTokens":8,"latencyMs":1349.013917000033},{"questionId":"q152","format":"toon","model":"claude-haiku-4-5-20251001","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":877,"outputTokens":8,"latencyMs":2834.810291999951},{"questionId":"q152","format":"xml","model":"claude-haiku-4-5-20251001","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":1312,"outputTokens":8,"latencyMs":1079.063041999936},{"questionId":"q152","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":897,"outputTokens":8,"latencyMs":2015.544249999919},{"questionId":"q153","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1242,"outputTokens":6,"latencyMs":1069.0238330001011},{"questionId":"q153","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":783,"outputTokens":6,"latencyMs":980.3079999999609},{"questionId":"q153","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":877,"outputTokens":6,"latencyMs":1044.9361250000075},{"questionId":"q153","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1312,"outputTokens":6,"latencyMs":2413.873292000033},{"questionId":"q153","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":897,"outputTokens":6,"latencyMs":873.3523750000168},{"questionId":"q154","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"37","actual":"37","isCorrect":true,"inputTokens":1244,"outputTokens":5,"latencyMs":942.2453750000568},{"questionId":"q154","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"37","actual":"37","isCorrect":true,"inputTokens":785,"outputTokens":5,"latencyMs":887.3804999999702},{"questionId":"q154","format":"toon","model":"claude-haiku-4-5-20251001","expected":"37","actual":"37","isCorrect":true,"inputTokens":879,"outputTokens":5,"latencyMs":875.3322920000646},{"questionId":"q154","format":"xml","model":"claude-haiku-4-5-20251001","expected":"37","actual":"37","isCorrect":true,"inputTokens":1314,"outputTokens":5,"latencyMs":980.2426670000423},{"questionId":"q154","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"37","actual":"37","isCorrect":true,"inputTokens":899,"outputTokens":5,"latencyMs":1153.6150420000777},{"questionId":"q155","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1242,"outputTokens":6,"latencyMs":1241.1024170001037},{"questionId":"q155","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":783,"outputTokens":6,"latencyMs":1081.2751660000067},{"questionId":"q155","format":"toon","model":"claude-haiku-4-5-20251001","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":877,"outputTokens":6,"latencyMs":771.7234170000302},{"questionId":"q155","format":"xml","model":"claude-haiku-4-5-20251001","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1312,"outputTokens":6,"latencyMs":825.690082999994},{"questionId":"q155","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":897,"outputTokens":6,"latencyMs":752.5629590000026},{"questionId":"q156","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1244,"outputTokens":5,"latencyMs":1425.01554199995},{"questionId":"q156","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":785,"outputTokens":5,"latencyMs":1021.5733329999493},{"questionId":"q156","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":879,"outputTokens":5,"latencyMs":888.623041999992},{"questionId":"q156","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1314,"outputTokens":5,"latencyMs":920.717166999937},{"questionId":"q156","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":899,"outputTokens":5,"latencyMs":799.5221250000177},{"questionId":"q157","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1244,"outputTokens":6,"latencyMs":955.9951249998994},{"questionId":"q157","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":785,"outputTokens":6,"latencyMs":794.2132920000004},{"questionId":"q157","format":"toon","model":"claude-haiku-4-5-20251001","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":879,"outputTokens":6,"latencyMs":981.5377080000471},{"questionId":"q157","format":"xml","model":"claude-haiku-4-5-20251001","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1314,"outputTokens":6,"latencyMs":1138.1192919999594},{"questionId":"q157","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":899,"outputTokens":6,"latencyMs":856.9616249999963},{"questionId":"q158","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"real","actual":"real","isCorrect":true,"inputTokens":1242,"outputTokens":4,"latencyMs":838.4199159999844},{"questionId":"q158","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"real","actual":"real","isCorrect":true,"inputTokens":783,"outputTokens":4,"latencyMs":1141.6517499999609},{"questionId":"q158","format":"toon","model":"claude-haiku-4-5-20251001","expected":"real","actual":"real","isCorrect":true,"inputTokens":877,"outputTokens":4,"latencyMs":1043.2275830000872},{"questionId":"q158","format":"xml","model":"claude-haiku-4-5-20251001","expected":"real","actual":"real","isCorrect":true,"inputTokens":1312,"outputTokens":4,"latencyMs":1111.2398749999702},{"questionId":"q158","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"real","actual":"real","isCorrect":true,"inputTokens":897,"outputTokens":4,"latencyMs":863.2164999999804},{"questionId":"q159","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1243,"outputTokens":6,"latencyMs":880.7821669999976},{"questionId":"q159","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":784,"outputTokens":6,"latencyMs":895.2262499999488},{"questionId":"q159","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":878,"outputTokens":6,"latencyMs":1003.9393329999875},{"questionId":"q159","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1313,"outputTokens":6,"latencyMs":907.4466250000987},{"questionId":"q159","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":898,"outputTokens":6,"latencyMs":1019.0216660000151},{"questionId":"q160","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":1244,"outputTokens":9,"latencyMs":1012.7389589999802},{"questionId":"q160","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":785,"outputTokens":9,"latencyMs":1003.1793339999858},{"questionId":"q160","format":"toon","model":"claude-haiku-4-5-20251001","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":879,"outputTokens":9,"latencyMs":916.7758330000797},{"questionId":"q160","format":"xml","model":"claude-haiku-4-5-20251001","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":1314,"outputTokens":9,"latencyMs":1208.6454590000212},{"questionId":"q160","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":899,"outputTokens":9,"latencyMs":923.5587499999674},{"questionId":"q161","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":1244,"outputTokens":5,"latencyMs":826.9563749999506},{"questionId":"q161","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":785,"outputTokens":5,"latencyMs":753.3855420000618},{"questionId":"q161","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":879,"outputTokens":5,"latencyMs":1089.936457999982},{"questionId":"q161","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":1314,"outputTokens":5,"latencyMs":795.5758750000969},{"questionId":"q161","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":899,"outputTokens":5,"latencyMs":872.4575829999521},{"questionId":"q162","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1244,"outputTokens":5,"latencyMs":887.2722920000087},{"questionId":"q162","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":785,"outputTokens":5,"latencyMs":900.1268749999581},{"questionId":"q162","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":879,"outputTokens":5,"latencyMs":812.1885420000181},{"questionId":"q162","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1314,"outputTokens":5,"latencyMs":839.9153749999823},{"questionId":"q162","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":899,"outputTokens":5,"latencyMs":871.6134580000071},{"questionId":"q163","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1243,"outputTokens":5,"latencyMs":902.4298330000602},{"questionId":"q163","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":784,"outputTokens":5,"latencyMs":889.4039999999804},{"questionId":"q163","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":878,"outputTokens":5,"latencyMs":1126.9705829999875},{"questionId":"q163","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1313,"outputTokens":5,"latencyMs":958.2488329999615},{"questionId":"q163","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":898,"outputTokens":5,"latencyMs":1385.0525420000777},{"questionId":"q164","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1243,"outputTokens":5,"latencyMs":1185.5424160000402},{"questionId":"q164","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":784,"outputTokens":5,"latencyMs":883.8527500000782},{"questionId":"q164","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":878,"outputTokens":5,"latencyMs":1052.9344580001198},{"questionId":"q164","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1313,"outputTokens":5,"latencyMs":911.154957999941},{"questionId":"q164","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":898,"outputTokens":5,"latencyMs":1121.9202919998206},{"questionId":"q165","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":1244,"outputTokens":5,"latencyMs":879.8078749999404},{"questionId":"q165","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":785,"outputTokens":5,"latencyMs":1288.6490829999093},{"questionId":"q165","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":879,"outputTokens":5,"latencyMs":786.6004580000881},{"questionId":"q165","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":1314,"outputTokens":5,"latencyMs":974.0200000000186},{"questionId":"q165","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":899,"outputTokens":5,"latencyMs":1045.2869170000777},{"questionId":"q166","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1247,"outputTokens":5,"latencyMs":824.5860000001267},{"questionId":"q166","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"0","isCorrect":false,"inputTokens":788,"outputTokens":5,"latencyMs":974.435499999905},{"questionId":"q166","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":882,"outputTokens":5,"latencyMs":804.1913339998573},{"questionId":"q166","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1317,"outputTokens":5,"latencyMs":1016.0534169999883},{"questionId":"q166","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":902,"outputTokens":5,"latencyMs":962.7041670000181},{"questionId":"q167","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":1243,"outputTokens":5,"latencyMs":1005.984332999913},{"questionId":"q167","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":784,"outputTokens":5,"latencyMs":1055.2427499999758},{"questionId":"q167","format":"toon","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":878,"outputTokens":5,"latencyMs":945.2598330001347},{"questionId":"q167","format":"xml","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":1313,"outputTokens":5,"latencyMs":1155.7412080001086},{"questionId":"q167","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":898,"outputTokens":5,"latencyMs":814.8985409999732},{"questionId":"q168","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":1245,"outputTokens":5,"latencyMs":1001.1619170000777},{"questionId":"q168","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":786,"outputTokens":5,"latencyMs":973.6378339999355},{"questionId":"q168","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":880,"outputTokens":5,"latencyMs":906.6011659998912},{"questionId":"q168","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":1315,"outputTokens":5,"latencyMs":5450.461332999868},{"questionId":"q168","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":900,"outputTokens":5,"latencyMs":893.4252499998547},{"questionId":"q169","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"8","actual":"9","isCorrect":false,"inputTokens":1247,"outputTokens":5,"latencyMs":913.8683329999913},{"questionId":"q169","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"8","actual":"8","isCorrect":true,"inputTokens":788,"outputTokens":5,"latencyMs":1247.810541999992},{"questionId":"q169","format":"toon","model":"claude-haiku-4-5-20251001","expected":"8","actual":"8","isCorrect":true,"inputTokens":882,"outputTokens":5,"latencyMs":927.667708999943},{"questionId":"q169","format":"xml","model":"claude-haiku-4-5-20251001","expected":"8","actual":"9","isCorrect":false,"inputTokens":1317,"outputTokens":5,"latencyMs":1024.6397919999436},{"questionId":"q169","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"8","actual":"8","isCorrect":true,"inputTokens":902,"outputTokens":5,"latencyMs":808.8294999999925},{"questionId":"q170","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5","actual":"7","isCorrect":false,"inputTokens":1246,"outputTokens":5,"latencyMs":948.8401660001837},{"questionId":"q170","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":787,"outputTokens":5,"latencyMs":807.7621660002042},{"questionId":"q170","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5","actual":"7","isCorrect":false,"inputTokens":881,"outputTokens":5,"latencyMs":995.4957500000019},{"questionId":"q170","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":1316,"outputTokens":5,"latencyMs":948.4754999999423},{"questionId":"q170","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":901,"outputTokens":5,"latencyMs":915.8271250000689},{"questionId":"q171","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":1248,"outputTokens":5,"latencyMs":923.2111250001471},{"questionId":"q171","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":789,"outputTokens":5,"latencyMs":780.6261670000385},{"questionId":"q171","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":883,"outputTokens":5,"latencyMs":923.018459000159},{"questionId":"q171","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":1318,"outputTokens":5,"latencyMs":993.0462080000434},{"questionId":"q171","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":903,"outputTokens":5,"latencyMs":1245.815000000177},{"questionId":"q172","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1249,"outputTokens":5,"latencyMs":1095.3125},{"questionId":"q172","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":790,"outputTokens":5,"latencyMs":709.230874999892},{"questionId":"q172","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":884,"outputTokens":5,"latencyMs":1246.6792499998119},{"questionId":"q172","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1319,"outputTokens":5,"latencyMs":870.7104170001112},{"questionId":"q172","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":904,"outputTokens":5,"latencyMs":974.5198749999981},{"questionId":"q173","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":1250,"outputTokens":5,"latencyMs":985.8480829999316},{"questionId":"q173","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":791,"outputTokens":5,"latencyMs":1002.6233339998871},{"questionId":"q173","format":"toon","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":885,"outputTokens":5,"latencyMs":1006.6666669999249},{"questionId":"q173","format":"xml","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":1320,"outputTokens":5,"latencyMs":1024.3824579999782},{"questionId":"q173","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":905,"outputTokens":5,"latencyMs":2409.9704589999747},{"questionId":"q174","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1246,"outputTokens":5,"latencyMs":1009.0951249999925},{"questionId":"q174","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"2","isCorrect":false,"inputTokens":787,"outputTokens":5,"latencyMs":916.1399580000434},{"questionId":"q174","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":881,"outputTokens":5,"latencyMs":1165.979916999815},{"questionId":"q174","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1316,"outputTokens":5,"latencyMs":864.8515409999527},{"questionId":"q174","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"2","isCorrect":false,"inputTokens":901,"outputTokens":5,"latencyMs":916.4436249998398},{"questionId":"q175","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":1251,"outputTokens":5,"latencyMs":1029.9872919998597},{"questionId":"q175","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":792,"outputTokens":5,"latencyMs":1144.0288329999894},{"questionId":"q175","format":"toon","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":886,"outputTokens":5,"latencyMs":822.7032080001663},{"questionId":"q175","format":"xml","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":1321,"outputTokens":5,"latencyMs":1031.0812089999672},{"questionId":"q175","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":906,"outputTokens":5,"latencyMs":937.500250000041},{"questionId":"q176","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1244,"outputTokens":5,"latencyMs":911.2006250000559},{"questionId":"q176","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":785,"outputTokens":5,"latencyMs":791.9767080000602},{"questionId":"q176","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":879,"outputTokens":5,"latencyMs":817.9461670001037},{"questionId":"q176","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1314,"outputTokens":5,"latencyMs":800.3201659999322},{"questionId":"q176","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":899,"outputTokens":5,"latencyMs":1166.106958999997},{"questionId":"q177","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1254,"outputTokens":5,"latencyMs":1153.844291999936},{"questionId":"q177","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":795,"outputTokens":5,"latencyMs":772.196958999848},{"questionId":"q177","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":889,"outputTokens":5,"latencyMs":761.2225830000825},{"questionId":"q177","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1324,"outputTokens":5,"latencyMs":1030.3278750001919},{"questionId":"q177","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":909,"outputTokens":5,"latencyMs":956.0868329999503},{"questionId":"q178","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1249,"outputTokens":5,"latencyMs":1061.2030420000665},{"questionId":"q178","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":790,"outputTokens":5,"latencyMs":888.081167000113},{"questionId":"q178","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":884,"outputTokens":5,"latencyMs":1051.8333749999292},{"questionId":"q178","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1319,"outputTokens":5,"latencyMs":799.0097499999683},{"questionId":"q178","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":904,"outputTokens":5,"latencyMs":905.2197910000104},{"questionId":"q179","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1247,"outputTokens":5,"latencyMs":988.6873749999795},{"questionId":"q179","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":788,"outputTokens":5,"latencyMs":723.2364169999491},{"questionId":"q179","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":882,"outputTokens":5,"latencyMs":728.2359160000924},{"questionId":"q179","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1317,"outputTokens":5,"latencyMs":977.7162500000559},{"questionId":"q179","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":902,"outputTokens":5,"latencyMs":1027.893291000044},{"questionId":"q180","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":7935,"outputTokens":5,"latencyMs":1120.7492909999564},{"questionId":"q180","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":4826,"outputTokens":5,"latencyMs":1454.5087910001166},{"questionId":"q180","format":"toon","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":3075,"outputTokens":5,"latencyMs":1035.1915830001235},{"questionId":"q180","format":"csv","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":2921,"outputTokens":5,"latencyMs":931.953375000041},{"questionId":"q180","format":"xml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":9420,"outputTokens":5,"latencyMs":941.3573339998256},{"questionId":"q180","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":5826,"outputTokens":5,"latencyMs":986.0300420001149},{"questionId":"q181","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":7941,"outputTokens":20,"latencyMs":1113.5572500000708},{"questionId":"q181","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":4832,"outputTokens":20,"latencyMs":993.531415999867},{"questionId":"q181","format":"toon","model":"claude-haiku-4-5-20251001","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":3081,"outputTokens":20,"latencyMs":949.0614160001278},{"questionId":"q181","format":"csv","model":"claude-haiku-4-5-20251001","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":2927,"outputTokens":20,"latencyMs":1220.1684170002118},{"questionId":"q181","format":"xml","model":"claude-haiku-4-5-20251001","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":9426,"outputTokens":20,"latencyMs":2677.9535000000615},{"questionId":"q181","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":5832,"outputTokens":20,"latencyMs":1150.8704589998815},{"questionId":"q182","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"email","actual":"email","isCorrect":true,"inputTokens":7938,"outputTokens":4,"latencyMs":1143.8894159998745},{"questionId":"q182","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"email","actual":"email","isCorrect":true,"inputTokens":4829,"outputTokens":4,"latencyMs":939.9587090001442},{"questionId":"q182","format":"toon","model":"claude-haiku-4-5-20251001","expected":"email","actual":"email","isCorrect":true,"inputTokens":3078,"outputTokens":4,"latencyMs":1002.7564159999602},{"questionId":"q182","format":"csv","model":"claude-haiku-4-5-20251001","expected":"email","actual":"email","isCorrect":true,"inputTokens":2924,"outputTokens":4,"latencyMs":1499.9509160001762},{"questionId":"q182","format":"xml","model":"claude-haiku-4-5-20251001","expected":"email","actual":"email","isCorrect":true,"inputTokens":9423,"outputTokens":4,"latencyMs":1206.003415999934},{"questionId":"q182","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"email","actual":"email","isCorrect":true,"inputTokens":5829,"outputTokens":4,"latencyMs":1156.9789579999633},{"questionId":"q183","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":7939,"outputTokens":4,"latencyMs":1133.3445830000564},{"questionId":"q183","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":4830,"outputTokens":4,"latencyMs":1083.2715829999652},{"questionId":"q183","format":"toon","model":"claude-haiku-4-5-20251001","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":3079,"outputTokens":4,"latencyMs":1054.9281669999473},{"questionId":"q183","format":"csv","model":"claude-haiku-4-5-20251001","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":2925,"outputTokens":4,"latencyMs":1052.3783750000875},{"questionId":"q183","format":"xml","model":"claude-haiku-4-5-20251001","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":9424,"outputTokens":4,"latencyMs":1340.8406249999534},{"questionId":"q183","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":5830,"outputTokens":4,"latencyMs":1075.3891660000663},{"questionId":"q184","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":7939,"outputTokens":12,"latencyMs":2124.266124999849},{"questionId":"q184","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":4830,"outputTokens":12,"latencyMs":1068.5028750000056},{"questionId":"q184","format":"toon","model":"claude-haiku-4-5-20251001","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":3079,"outputTokens":12,"latencyMs":1167.4424999998882},{"questionId":"q184","format":"csv","model":"claude-haiku-4-5-20251001","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":2925,"outputTokens":12,"latencyMs":1302.0248749998864},{"questionId":"q184","format":"xml","model":"claude-haiku-4-5-20251001","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":9424,"outputTokens":12,"latencyMs":1186.8695000000298},{"questionId":"q184","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":5830,"outputTokens":12,"latencyMs":1270.5893329998944},{"questionId":"q185","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"7","actual":"7","isCorrect":true,"inputTokens":7936,"outputTokens":5,"latencyMs":1134.64995799982},{"questionId":"q185","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"7","actual":"7","isCorrect":true,"inputTokens":4827,"outputTokens":5,"latencyMs":1194.1974160000682},{"questionId":"q185","format":"toon","model":"claude-haiku-4-5-20251001","expected":"7","actual":"7","isCorrect":true,"inputTokens":3076,"outputTokens":5,"latencyMs":1283.6654169999529},{"questionId":"q185","format":"csv","model":"claude-haiku-4-5-20251001","expected":"7","actual":"7","isCorrect":true,"inputTokens":2922,"outputTokens":5,"latencyMs":1176.9219159998465},{"questionId":"q185","format":"xml","model":"claude-haiku-4-5-20251001","expected":"7","actual":"7","isCorrect":true,"inputTokens":9421,"outputTokens":5,"latencyMs":1174.4477500000503},{"questionId":"q185","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"7","actual":"6","isCorrect":false,"inputTokens":5827,"outputTokens":5,"latencyMs":1267.2275419998914},{"questionId":"q186","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":13066,"outputTokens":5,"latencyMs":1317.5971669999417},{"questionId":"q186","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":8027,"outputTokens":5,"latencyMs":1233.12991600018},{"questionId":"q186","format":"toon","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":8344,"outputTokens":5,"latencyMs":1202.2103329999372},{"questionId":"q186","format":"xml","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":14571,"outputTokens":5,"latencyMs":1269.4371249999385},{"questionId":"q186","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":9469,"outputTokens":5,"latencyMs":1300.4245419998188},{"questionId":"q187","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":13075,"outputTokens":21,"latencyMs":1696.230416999897},{"questionId":"q187","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":8036,"outputTokens":21,"latencyMs":1352.8969999998808},{"questionId":"q187","format":"toon","model":"claude-haiku-4-5-20251001","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":8353,"outputTokens":21,"latencyMs":1110.8835419998504},{"questionId":"q187","format":"xml","model":"claude-haiku-4-5-20251001","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":14580,"outputTokens":21,"latencyMs":1534.4549169999082},{"questionId":"q187","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":9478,"outputTokens":21,"latencyMs":1164.3688749999274},{"questionId":"q188","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4","actual":"4","isCorrect":true,"inputTokens":13071,"outputTokens":5,"latencyMs":1336.527957999846},{"questionId":"q188","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4","actual":"4","isCorrect":true,"inputTokens":8032,"outputTokens":5,"latencyMs":1103.1468749998603},{"questionId":"q188","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4","actual":"4","isCorrect":true,"inputTokens":8349,"outputTokens":5,"latencyMs":1129.3212500000373},{"questionId":"q188","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"4","isCorrect":true,"inputTokens":14576,"outputTokens":5,"latencyMs":1354.4471249999478},{"questionId":"q188","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"4","isCorrect":true,"inputTokens":9474,"outputTokens":5,"latencyMs":1144.939916000003},{"questionId":"q189","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":13075,"outputTokens":11,"latencyMs":1416.0433340000454},{"questionId":"q189","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":8036,"outputTokens":11,"latencyMs":1131.6960839999374},{"questionId":"q189","format":"toon","model":"claude-haiku-4-5-20251001","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":8353,"outputTokens":11,"latencyMs":1228.2068749999162},{"questionId":"q189","format":"xml","model":"claude-haiku-4-5-20251001","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":14580,"outputTokens":11,"latencyMs":1415.090707999887},{"questionId":"q189","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":9478,"outputTokens":11,"latencyMs":1161.2281250001397},{"questionId":"q190","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":13070,"outputTokens":4,"latencyMs":2074.3852499998175},{"questionId":"q190","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8031,"outputTokens":4,"latencyMs":1235.8749160000589},{"questionId":"q190","format":"toon","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8348,"outputTokens":4,"latencyMs":1108.9501659998205},{"questionId":"q190","format":"xml","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":14575,"outputTokens":4,"latencyMs":1624.1051250000019},{"questionId":"q190","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":9473,"outputTokens":4,"latencyMs":1303.3145830000285},{"questionId":"q191","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":13076,"outputTokens":10,"latencyMs":1195.093499999959},{"questionId":"q191","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":8037,"outputTokens":10,"latencyMs":1125.0091250000987},{"questionId":"q191","format":"toon","model":"claude-haiku-4-5-20251001","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":8354,"outputTokens":10,"latencyMs":1002.9523330000229},{"questionId":"q191","format":"xml","model":"claude-haiku-4-5-20251001","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":14581,"outputTokens":10,"latencyMs":1123.1422500000335},{"questionId":"q191","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":9479,"outputTokens":10,"latencyMs":1390.2729579999577},{"questionId":"q192","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":4142,"outputTokens":5,"latencyMs":1418.0205409999471},{"questionId":"q192","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":2453,"outputTokens":5,"latencyMs":1059.0376249998808},{"questionId":"q192","format":"toon","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":1599,"outputTokens":5,"latencyMs":1186.7520419999491},{"questionId":"q192","format":"csv","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":1507,"outputTokens":5,"latencyMs":1620.152291999897},{"questionId":"q192","format":"xml","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":4844,"outputTokens":5,"latencyMs":1310.0510420000646},{"questionId":"q192","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":3173,"outputTokens":5,"latencyMs":1221.876249999972},{"questionId":"q193","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":4147,"outputTokens":17,"latencyMs":1487.5997919999063},{"questionId":"q193","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":2458,"outputTokens":17,"latencyMs":1157.4452500001062},{"questionId":"q193","format":"toon","model":"claude-haiku-4-5-20251001","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":1604,"outputTokens":17,"latencyMs":1520.7116659998428},{"questionId":"q193","format":"csv","model":"claude-haiku-4-5-20251001","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":1512,"outputTokens":17,"latencyMs":1203.7664580000564},{"questionId":"q193","format":"xml","model":"claude-haiku-4-5-20251001","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":4849,"outputTokens":17,"latencyMs":1226.0437910000328},{"questionId":"q193","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":3178,"outputTokens":17,"latencyMs":977.1910840000492},{"questionId":"q194","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"revenue","actual":"bounceRate","isCorrect":false,"inputTokens":4145,"outputTokens":6,"latencyMs":1073.1372919999994},{"questionId":"q194","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"revenue","actual":"bounceRate","isCorrect":false,"inputTokens":2456,"outputTokens":6,"latencyMs":1266.1767920001876},{"questionId":"q194","format":"toon","model":"claude-haiku-4-5-20251001","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":1602,"outputTokens":4,"latencyMs":1342.251207999885},{"questionId":"q194","format":"csv","model":"claude-haiku-4-5-20251001","expected":"revenue","actual":"bounceRate","isCorrect":false,"inputTokens":1510,"outputTokens":6,"latencyMs":1179.91620899993},{"questionId":"q194","format":"xml","model":"claude-haiku-4-5-20251001","expected":"revenue","actual":"bounceRate","isCorrect":false,"inputTokens":4847,"outputTokens":6,"latencyMs":1468.4425409999676},{"questionId":"q194","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"revenue","actual":"bounceRate","isCorrect":false,"inputTokens":3176,"outputTokens":6,"latencyMs":900.0507499999367},{"questionId":"q195","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":4146,"outputTokens":10,"latencyMs":1087.1228330000304},{"questionId":"q195","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":2457,"outputTokens":10,"latencyMs":1056.3863329999149},{"questionId":"q195","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":1603,"outputTokens":10,"latencyMs":1167.88495899993},{"questionId":"q195","format":"csv","model":"claude-haiku-4-5-20251001","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":1511,"outputTokens":10,"latencyMs":984.4359160000458},{"questionId":"q195","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":4848,"outputTokens":10,"latencyMs":1138.7140420000069},{"questionId":"q195","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":3177,"outputTokens":10,"latencyMs":1117.2731670001522},{"questionId":"q196","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":4142,"outputTokens":5,"latencyMs":1177.2991659999825},{"questionId":"q196","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":2453,"outputTokens":5,"latencyMs":965.145041000098},{"questionId":"q196","format":"toon","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":1599,"outputTokens":5,"latencyMs":1326.4698749999516},{"questionId":"q196","format":"csv","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":1507,"outputTokens":5,"latencyMs":1118.3482919998933},{"questionId":"q196","format":"xml","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":4844,"outputTokens":5,"latencyMs":1536.1617910000496},{"questionId":"q196","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":3173,"outputTokens":5,"latencyMs":930.9039169999305},{"questionId":"q197","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":17473,"outputTokens":5,"latencyMs":1286.1768749998882},{"questionId":"q197","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":12608,"outputTokens":5,"latencyMs":1507.926083000144},{"questionId":"q197","format":"toon","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":9371,"outputTokens":5,"latencyMs":1098.5005830000155},{"questionId":"q197","format":"csv","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":9189,"outputTokens":5,"latencyMs":11181.550583999837},{"questionId":"q197","format":"xml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":19863,"outputTokens":5,"latencyMs":1288.1963339999784},{"questionId":"q197","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":14548,"outputTokens":5,"latencyMs":1512.4048330001533},{"questionId":"q198","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":17479,"outputTokens":35,"latencyMs":1934.8035840000957},{"questionId":"q198","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":12614,"outputTokens":35,"latencyMs":1396.143374999985},{"questionId":"q198","format":"toon","model":"claude-haiku-4-5-20251001","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":9377,"outputTokens":35,"latencyMs":1337.9257079998497},{"questionId":"q198","format":"csv","model":"claude-haiku-4-5-20251001","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":9195,"outputTokens":35,"latencyMs":1317.0077919999603},{"questionId":"q198","format":"xml","model":"claude-haiku-4-5-20251001","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":19869,"outputTokens":35,"latencyMs":2428.95404099999},{"questionId":"q198","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":14554,"outputTokens":35,"latencyMs":1332.3844579998404},{"questionId":"q199","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":17477,"outputTokens":6,"latencyMs":1435.5708749999758},{"questionId":"q199","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":12612,"outputTokens":6,"latencyMs":1323.1899170000106},{"questionId":"q199","format":"toon","model":"claude-haiku-4-5-20251001","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":9375,"outputTokens":6,"latencyMs":1109.1495000000577},{"questionId":"q199","format":"csv","model":"claude-haiku-4-5-20251001","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":9193,"outputTokens":6,"latencyMs":1622.9887500000186},{"questionId":"q199","format":"xml","model":"claude-haiku-4-5-20251001","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":19867,"outputTokens":6,"latencyMs":1395.366083000088},{"questionId":"q199","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":14552,"outputTokens":6,"latencyMs":1273.513874999946},{"questionId":"q200","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":17477,"outputTokens":7,"latencyMs":1235.034042000072},{"questionId":"q200","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":12612,"outputTokens":7,"latencyMs":1452.7786670001224},{"questionId":"q200","format":"toon","model":"claude-haiku-4-5-20251001","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":9375,"outputTokens":7,"latencyMs":1009.8154169998597},{"questionId":"q200","format":"csv","model":"claude-haiku-4-5-20251001","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":9193,"outputTokens":7,"latencyMs":1215.5075840000063},{"questionId":"q200","format":"xml","model":"claude-haiku-4-5-20251001","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":19867,"outputTokens":7,"latencyMs":1331.6306249999907},{"questionId":"q200","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":14552,"outputTokens":7,"latencyMs":1234.5180420000106},{"questionId":"q201","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"11","actual":"11","isCorrect":true,"inputTokens":17474,"outputTokens":5,"latencyMs":1365.1134999999776},{"questionId":"q201","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"11","actual":"10","isCorrect":false,"inputTokens":12609,"outputTokens":5,"latencyMs":1297.5984169999138},{"questionId":"q201","format":"toon","model":"claude-haiku-4-5-20251001","expected":"11","actual":"11","isCorrect":true,"inputTokens":9372,"outputTokens":5,"latencyMs":1234.3033330000471},{"questionId":"q201","format":"csv","model":"claude-haiku-4-5-20251001","expected":"11","actual":"11","isCorrect":true,"inputTokens":9190,"outputTokens":5,"latencyMs":1500.0707079998683},{"questionId":"q201","format":"xml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"12","isCorrect":false,"inputTokens":19864,"outputTokens":5,"latencyMs":1370.718208000064},{"questionId":"q201","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"13","isCorrect":false,"inputTokens":14549,"outputTokens":5,"latencyMs":1321.9931669998914},{"questionId":"q202","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"75","actual":"75","isCorrect":true,"inputTokens":7767,"outputTokens":5,"latencyMs":1315.0292080000509},{"questionId":"q202","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"75","actual":"70","isCorrect":false,"inputTokens":5205,"outputTokens":5,"latencyMs":918.2240830000956},{"questionId":"q202","format":"toon","model":"claude-haiku-4-5-20251001","expected":"75","actual":"75","isCorrect":true,"inputTokens":6225,"outputTokens":5,"latencyMs":993.3855000000913},{"questionId":"q202","format":"xml","model":"claude-haiku-4-5-20251001","expected":"75","actual":"100","isCorrect":false,"inputTokens":8772,"outputTokens":5,"latencyMs":998.8321670000441},{"questionId":"q202","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"75","actual":"74","isCorrect":false,"inputTokens":6171,"outputTokens":5,"latencyMs":1022.2879580000881},{"questionId":"q203","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":7777,"outputTokens":18,"latencyMs":1319.1331670000218},{"questionId":"q203","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error,message,stack,retryable","isCorrect":false,"inputTokens":5215,"outputTokens":26,"latencyMs":1044.1837079999968},{"questionId":"q203","format":"toon","model":"claude-haiku-4-5-20251001","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error,message,stack,retryable","isCorrect":false,"inputTokens":6235,"outputTokens":26,"latencyMs":1229.3462499999441},{"questionId":"q203","format":"xml","model":"claude-haiku-4-5-20251001","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error,message,stack,retryable","isCorrect":false,"inputTokens":8782,"outputTokens":26,"latencyMs":1153.167082999833},{"questionId":"q203","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error,message,stack,retryable","isCorrect":false,"inputTokens":6181,"outputTokens":26,"latencyMs":1140.1483329997864},{"questionId":"q204","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":7771,"outputTokens":4,"latencyMs":1057.0545830000192},{"questionId":"q204","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":5209,"outputTokens":4,"latencyMs":1038.1022499999963},{"questionId":"q204","format":"toon","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":6229,"outputTokens":4,"latencyMs":951.9464999998454},{"questionId":"q204","format":"xml","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":8776,"outputTokens":4,"latencyMs":1045.921832999913},{"questionId":"q204","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":6175,"outputTokens":4,"latencyMs":1206.6149999999907},{"questionId":"q205","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1685,"outputTokens":4,"latencyMs":781.0249159999657},{"questionId":"q205","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1057,"outputTokens":4,"latencyMs":865.4773750000168},{"questionId":"q205","format":"toon","model":"claude-haiku-4-5-20251001","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":745,"outputTokens":4,"latencyMs":847.1748330001719},{"questionId":"q205","format":"csv","model":"claude-haiku-4-5-20251001","expected":"YES","actual":"NO","isCorrect":false,"inputTokens":688,"outputTokens":4,"latencyMs":1022.7747920001857},{"questionId":"q205","format":"xml","model":"claude-haiku-4-5-20251001","expected":"YES","actual":"NO","isCorrect":false,"inputTokens":1967,"outputTokens":4,"latencyMs":788.3179999999702},{"questionId":"q205","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1257,"outputTokens":4,"latencyMs":746.1863339999691},{"questionId":"q206","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1459,"outputTokens":4,"latencyMs":1047.0943330000155},{"questionId":"q206","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":924,"outputTokens":4,"latencyMs":919.9484170000069},{"questionId":"q206","format":"toon","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":666,"outputTokens":4,"latencyMs":907.2270830001216},{"questionId":"q206","format":"csv","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":612,"outputTokens":4,"latencyMs":845.8464999999851},{"questionId":"q206","format":"xml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1696,"outputTokens":4,"latencyMs":946.0020830000285},{"questionId":"q206","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1094,"outputTokens":4,"latencyMs":920.9464169999119},{"questionId":"q207","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1925,"outputTokens":4,"latencyMs":1408.2781249999534},{"questionId":"q207","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1204,"outputTokens":4,"latencyMs":1325.2302080001682},{"questionId":"q207","format":"toon","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":838,"outputTokens":4,"latencyMs":939.0019169999287},{"questionId":"q207","format":"csv","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":778,"outputTokens":4,"latencyMs":822.6255419999361},{"questionId":"q207","format":"xml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":2255,"outputTokens":4,"latencyMs":857.5038749999367},{"questionId":"q207","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1434,"outputTokens":4,"latencyMs":850.0120409999508},{"questionId":"q208","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1676,"outputTokens":4,"latencyMs":913.5827909999061},{"questionId":"q208","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1052,"outputTokens":4,"latencyMs":1083.7319999998435},{"questionId":"q208","format":"toon","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1280,"outputTokens":4,"latencyMs":1061.7704580000136},{"questionId":"q208","format":"csv","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":685,"outputTokens":4,"latencyMs":736.6914170000236},{"questionId":"q208","format":"xml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1957,"outputTokens":4,"latencyMs":909.4639159999788},{"questionId":"q208","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1250,"outputTokens":4,"latencyMs":1399.6871670000255},{"questionId":"q209","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1619,"outputTokens":4,"latencyMs":856.7064160001464},{"questionId":"q209","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1007,"outputTokens":4,"latencyMs":794.2346660001203},{"questionId":"q209","format":"toon","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1233,"outputTokens":4,"latencyMs":1252.9911249999423},{"questionId":"q209","format":"csv","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":472,"outputTokens":4,"latencyMs":1058.458375000162},{"questionId":"q209","format":"xml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1897,"outputTokens":4,"latencyMs":972.7352500001434},{"questionId":"q209","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1203,"outputTokens":4,"latencyMs":910.6434579999186}] ================================================ FILE: benchmarks/results/accuracy/models/gemini-3-flash-preview ================================================ [{"questionId":"q1","format":"json-pretty","model":"gemini-3-flash-preview","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":7954,"outputTokens":239,"latencyMs":3625.455249999999},{"questionId":"q1","format":"json-compact","model":"gemini-3-flash-preview","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":4744,"outputTokens":202,"latencyMs":2635.1330420000004},{"questionId":"q1","format":"toon","model":"gemini-3-flash-preview","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":3387,"outputTokens":259,"latencyMs":2909.5863329999993},{"questionId":"q1","format":"csv","model":"gemini-3-flash-preview","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":3239,"outputTokens":295,"latencyMs":3192.4188330000006},{"questionId":"q1","format":"xml","model":"gemini-3-flash-preview","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":9141,"outputTokens":250,"latencyMs":3300.681708},{"questionId":"q1","format":"yaml","model":"gemini-3-flash-preview","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":5794,"outputTokens":189,"latencyMs":2525.3320000000003},{"questionId":"q2","format":"json-pretty","model":"gemini-3-flash-preview","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":7954,"outputTokens":192,"latencyMs":2552.432},{"questionId":"q2","format":"json-compact","model":"gemini-3-flash-preview","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":4744,"outputTokens":168,"latencyMs":4588.639333},{"questionId":"q2","format":"toon","model":"gemini-3-flash-preview","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":3387,"outputTokens":357,"latencyMs":3214.033292},{"questionId":"q2","format":"csv","model":"gemini-3-flash-preview","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":3239,"outputTokens":265,"latencyMs":3336.27775},{"questionId":"q2","format":"xml","model":"gemini-3-flash-preview","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":9141,"outputTokens":271,"latencyMs":3296.883},{"questionId":"q2","format":"yaml","model":"gemini-3-flash-preview","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":5794,"outputTokens":186,"latencyMs":1830.6979580000007},{"questionId":"q3","format":"json-pretty","model":"gemini-3-flash-preview","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":7954,"outputTokens":206,"latencyMs":1814.1679999999997},{"questionId":"q3","format":"json-compact","model":"gemini-3-flash-preview","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":4744,"outputTokens":156,"latencyMs":1816.5712080000012},{"questionId":"q3","format":"toon","model":"gemini-3-flash-preview","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":3387,"outputTokens":467,"latencyMs":3315.0727499999994},{"questionId":"q3","format":"csv","model":"gemini-3-flash-preview","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":3239,"outputTokens":317,"latencyMs":2661.6989170000015},{"questionId":"q3","format":"xml","model":"gemini-3-flash-preview","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":9141,"outputTokens":275,"latencyMs":3456.165792},{"questionId":"q3","format":"yaml","model":"gemini-3-flash-preview","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":5794,"outputTokens":203,"latencyMs":1919.311416999999},{"questionId":"q4","format":"json-pretty","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":7955,"outputTokens":174,"latencyMs":2340.148166000001},{"questionId":"q4","format":"json-compact","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":4745,"outputTokens":118,"latencyMs":1578.8430000000008},{"questionId":"q4","format":"toon","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":3388,"outputTokens":245,"latencyMs":2972.7989159999997},{"questionId":"q4","format":"csv","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":3240,"outputTokens":220,"latencyMs":2409.4155420000006},{"questionId":"q4","format":"xml","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":9142,"outputTokens":202,"latencyMs":2596.7662090000013},{"questionId":"q4","format":"yaml","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":5795,"outputTokens":167,"latencyMs":1865.9715830000005},{"questionId":"q5","format":"json-pretty","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":7952,"outputTokens":240,"latencyMs":2259.9251249999998},{"questionId":"q5","format":"json-compact","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":4742,"outputTokens":162,"latencyMs":2379.219333000001},{"questionId":"q5","format":"toon","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":3385,"outputTokens":249,"latencyMs":3043.0672090000007},{"questionId":"q5","format":"csv","model":"gemini-3-flash-preview","expected":"yes","actual":"1","isCorrect":true,"inputTokens":3237,"outputTokens":274,"latencyMs":2661.173041999995},{"questionId":"q5","format":"xml","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":9139,"outputTokens":202,"latencyMs":2767.9813749999885},{"questionId":"q5","format":"yaml","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":5792,"outputTokens":231,"latencyMs":2197.264167000001},{"questionId":"q6","format":"json-pretty","model":"gemini-3-flash-preview","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":7954,"outputTokens":251,"latencyMs":2468.244166999997},{"questionId":"q6","format":"json-compact","model":"gemini-3-flash-preview","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":4744,"outputTokens":127,"latencyMs":1833.99820799999},{"questionId":"q6","format":"toon","model":"gemini-3-flash-preview","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":3387,"outputTokens":477,"latencyMs":3602.21712500001},{"questionId":"q6","format":"csv","model":"gemini-3-flash-preview","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":3239,"outputTokens":346,"latencyMs":3165.0912090000056},{"questionId":"q6","format":"xml","model":"gemini-3-flash-preview","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":9141,"outputTokens":297,"latencyMs":3294.3596249999973},{"questionId":"q6","format":"yaml","model":"gemini-3-flash-preview","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":5794,"outputTokens":172,"latencyMs":1667.5077500000043},{"questionId":"q7","format":"json-pretty","model":"gemini-3-flash-preview","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":7956,"outputTokens":268,"latencyMs":2391.997583000004},{"questionId":"q7","format":"json-compact","model":"gemini-3-flash-preview","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":4746,"outputTokens":217,"latencyMs":2120.4972500000003},{"questionId":"q7","format":"toon","model":"gemini-3-flash-preview","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":3389,"outputTokens":585,"latencyMs":3673.572166999991},{"questionId":"q7","format":"csv","model":"gemini-3-flash-preview","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":3241,"outputTokens":383,"latencyMs":3456.103833000001},{"questionId":"q7","format":"xml","model":"gemini-3-flash-preview","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":9143,"outputTokens":229,"latencyMs":2590.3361659999937},{"questionId":"q7","format":"yaml","model":"gemini-3-flash-preview","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":5796,"outputTokens":166,"latencyMs":1763.4515410000022},{"questionId":"q8","format":"json-pretty","model":"gemini-3-flash-preview","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":7955,"outputTokens":187,"latencyMs":1945.595417000004},{"questionId":"q8","format":"json-compact","model":"gemini-3-flash-preview","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":4745,"outputTokens":151,"latencyMs":2032.5836249999993},{"questionId":"q8","format":"toon","model":"gemini-3-flash-preview","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":3388,"outputTokens":405,"latencyMs":2813.043042000005},{"questionId":"q8","format":"csv","model":"gemini-3-flash-preview","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":3240,"outputTokens":362,"latencyMs":3427.3383750000066},{"questionId":"q8","format":"xml","model":"gemini-3-flash-preview","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":9142,"outputTokens":235,"latencyMs":2655.8687500000087},{"questionId":"q8","format":"yaml","model":"gemini-3-flash-preview","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":5795,"outputTokens":209,"latencyMs":1998.403999999995},{"questionId":"q9","format":"json-pretty","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":7956,"outputTokens":231,"latencyMs":2156.807958000005},{"questionId":"q9","format":"json-compact","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":4746,"outputTokens":153,"latencyMs":2193.381708000001},{"questionId":"q9","format":"toon","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":3389,"outputTokens":285,"latencyMs":2569.0447079999867},{"questionId":"q9","format":"csv","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":3241,"outputTokens":184,"latencyMs":2225.6912079999893},{"questionId":"q9","format":"xml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":9143,"outputTokens":258,"latencyMs":3088.273749999993},{"questionId":"q9","format":"yaml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":5796,"outputTokens":245,"latencyMs":2433.3254999999917},{"questionId":"q10","format":"json-pretty","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":7952,"outputTokens":192,"latencyMs":1999.0586670000048},{"questionId":"q10","format":"json-compact","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":4742,"outputTokens":172,"latencyMs":2230.6631659999985},{"questionId":"q10","format":"toon","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":3385,"outputTokens":472,"latencyMs":3616.934208999999},{"questionId":"q10","format":"csv","model":"gemini-3-flash-preview","expected":"yes","actual":"1","isCorrect":true,"inputTokens":3237,"outputTokens":5802,"latencyMs":32870.668957999995},{"questionId":"q10","format":"xml","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":9139,"outputTokens":251,"latencyMs":2970.3544590000092},{"questionId":"q10","format":"yaml","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":5792,"outputTokens":223,"latencyMs":2275.9059169999964},{"questionId":"q11","format":"json-pretty","model":"gemini-3-flash-preview","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":7952,"outputTokens":208,"latencyMs":2240.154291999992},{"questionId":"q11","format":"json-compact","model":"gemini-3-flash-preview","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":4742,"outputTokens":220,"latencyMs":2019.8260420000006},{"questionId":"q11","format":"toon","model":"gemini-3-flash-preview","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":3385,"outputTokens":279,"latencyMs":2217.485499999995},{"questionId":"q11","format":"csv","model":"gemini-3-flash-preview","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":3237,"outputTokens":233,"latencyMs":2372.3970419999823},{"questionId":"q11","format":"xml","model":"gemini-3-flash-preview","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":9139,"outputTokens":286,"latencyMs":3144.522540999984},{"questionId":"q11","format":"yaml","model":"gemini-3-flash-preview","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":5792,"outputTokens":182,"latencyMs":1776.9458330000052},{"questionId":"q12","format":"json-pretty","model":"gemini-3-flash-preview","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":7954,"outputTokens":199,"latencyMs":2068.243167000008},{"questionId":"q12","format":"json-compact","model":"gemini-3-flash-preview","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":4744,"outputTokens":249,"latencyMs":2556.309041000015},{"questionId":"q12","format":"toon","model":"gemini-3-flash-preview","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":3387,"outputTokens":197,"latencyMs":2067.33312499999},{"questionId":"q12","format":"csv","model":"gemini-3-flash-preview","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":3239,"outputTokens":330,"latencyMs":2840.658457999991},{"questionId":"q12","format":"xml","model":"gemini-3-flash-preview","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":9141,"outputTokens":202,"latencyMs":2776.487707999986},{"questionId":"q12","format":"yaml","model":"gemini-3-flash-preview","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":5794,"outputTokens":141,"latencyMs":2206.415458000003},{"questionId":"q13","format":"json-pretty","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":7951,"outputTokens":2213,"latencyMs":11963.009374999994},{"questionId":"q13","format":"json-compact","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":4741,"outputTokens":3743,"latencyMs":19159.714540999994},{"questionId":"q13","format":"toon","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":3384,"outputTokens":2734,"latencyMs":14160.47195799998},{"questionId":"q13","format":"csv","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":3236,"outputTokens":2330,"latencyMs":12950.263708000013},{"questionId":"q13","format":"xml","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":9138,"outputTokens":3771,"latencyMs":24574.40391699999},{"questionId":"q13","format":"yaml","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":5791,"outputTokens":1533,"latencyMs":7880.103917},{"questionId":"q14","format":"json-pretty","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":7951,"outputTokens":1852,"latencyMs":9905.873375000025},{"questionId":"q14","format":"json-compact","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":4741,"outputTokens":1573,"latencyMs":9463.203749999986},{"questionId":"q14","format":"toon","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":3384,"outputTokens":3383,"latencyMs":25641.172249999974},{"questionId":"q14","format":"csv","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":3236,"outputTokens":6869,"latencyMs":34513.120375},{"questionId":"q14","format":"xml","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":9138,"outputTokens":4196,"latencyMs":26234.407166999998},{"questionId":"q14","format":"yaml","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":5791,"outputTokens":2150,"latencyMs":12435.973457999993},{"questionId":"q15","format":"json-pretty","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":7951,"outputTokens":2007,"latencyMs":10856.205459000019},{"questionId":"q15","format":"json-compact","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":4741,"outputTokens":830,"latencyMs":5377.5375420000055},{"questionId":"q15","format":"toon","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":3384,"outputTokens":2670,"latencyMs":20326.423833000008},{"questionId":"q15","format":"csv","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":3236,"outputTokens":1251,"latencyMs":7192.888832999975},{"questionId":"q15","format":"xml","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":9138,"outputTokens":1183,"latencyMs":8130.2351250000065},{"questionId":"q15","format":"yaml","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":5791,"outputTokens":1565,"latencyMs":8948.246167000005},{"questionId":"q16","format":"json-pretty","model":"gemini-3-flash-preview","expected":"86","actual":"86","isCorrect":true,"inputTokens":7959,"outputTokens":11526,"latencyMs":54927.654708999995},{"questionId":"q16","format":"json-compact","model":"gemini-3-flash-preview","expected":"86","actual":"86","isCorrect":true,"inputTokens":4749,"outputTokens":14129,"latencyMs":69444.23004200001},{"questionId":"q16","format":"toon","model":"gemini-3-flash-preview","expected":"86","actual":"86","isCorrect":true,"inputTokens":3392,"outputTokens":17813,"latencyMs":85544.84624999997},{"questionId":"q16","format":"csv","model":"gemini-3-flash-preview","expected":"86","actual":"86","isCorrect":true,"inputTokens":3244,"outputTokens":10237,"latencyMs":48699.181625},{"questionId":"q16","format":"xml","model":"gemini-3-flash-preview","expected":"86","actual":"86","isCorrect":true,"inputTokens":9146,"outputTokens":15144,"latencyMs":88944.13062499999},{"questionId":"q16","format":"yaml","model":"gemini-3-flash-preview","expected":"86","actual":"86","isCorrect":true,"inputTokens":5799,"outputTokens":11901,"latencyMs":58236.14600000004},{"questionId":"q17","format":"json-pretty","model":"gemini-3-flash-preview","expected":"65","actual":"65","isCorrect":true,"inputTokens":7959,"outputTokens":9828,"latencyMs":46873.048624999996},{"questionId":"q17","format":"json-compact","model":"gemini-3-flash-preview","expected":"65","actual":"65","isCorrect":true,"inputTokens":4749,"outputTokens":5737,"latencyMs":28472.17749999999},{"questionId":"q17","format":"toon","model":"gemini-3-flash-preview","expected":"65","actual":"65","isCorrect":true,"inputTokens":3392,"outputTokens":13246,"latencyMs":64685.196417},{"questionId":"q17","format":"csv","model":"gemini-3-flash-preview","expected":"65","actual":"65","isCorrect":true,"inputTokens":3244,"outputTokens":8302,"latencyMs":39618.234958999994},{"questionId":"q17","format":"xml","model":"gemini-3-flash-preview","expected":"65","actual":"65","isCorrect":true,"inputTokens":9146,"outputTokens":5772,"latencyMs":34000.402166000014},{"questionId":"q17","format":"yaml","model":"gemini-3-flash-preview","expected":"65","actual":"65","isCorrect":true,"inputTokens":5799,"outputTokens":11352,"latencyMs":54563.97954099998},{"questionId":"q18","format":"json-pretty","model":"gemini-3-flash-preview","expected":"47","actual":"47","isCorrect":true,"inputTokens":7960,"outputTokens":7910,"latencyMs":38451.564625},{"questionId":"q18","format":"json-compact","model":"gemini-3-flash-preview","expected":"47","actual":"47","isCorrect":true,"inputTokens":4750,"outputTokens":4783,"latencyMs":23734.33158399997},{"questionId":"q18","format":"toon","model":"gemini-3-flash-preview","expected":"47","actual":"47","isCorrect":true,"inputTokens":3393,"outputTokens":7694,"latencyMs":38817.53254200003},{"questionId":"q18","format":"csv","model":"gemini-3-flash-preview","expected":"47","actual":"47","isCorrect":true,"inputTokens":3245,"outputTokens":8402,"latencyMs":40587.71158299997},{"questionId":"q18","format":"xml","model":"gemini-3-flash-preview","expected":"47","actual":"47","isCorrect":true,"inputTokens":9147,"outputTokens":9972,"latencyMs":61279.913833},{"questionId":"q18","format":"yaml","model":"gemini-3-flash-preview","expected":"47","actual":"47","isCorrect":true,"inputTokens":5800,"outputTokens":10034,"latencyMs":48386.947958000004},{"questionId":"q19","format":"json-pretty","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":7952,"outputTokens":233,"latencyMs":2216.1940419999883},{"questionId":"q19","format":"json-compact","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":4742,"outputTokens":187,"latencyMs":2281.4948330000043},{"questionId":"q19","format":"toon","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":3385,"outputTokens":421,"latencyMs":3185.9499589999905},{"questionId":"q19","format":"csv","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":3237,"outputTokens":299,"latencyMs":2735.214833999984},{"questionId":"q19","format":"xml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":9139,"outputTokens":249,"latencyMs":2865.5949580000015},{"questionId":"q19","format":"yaml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":5792,"outputTokens":575,"latencyMs":4232.22866600001},{"questionId":"q20","format":"json-pretty","model":"gemini-3-flash-preview","expected":"96825","actual":"96825.46","isCorrect":true,"inputTokens":7953,"outputTokens":16645,"latencyMs":78783.37699999998},{"questionId":"q20","format":"json-compact","model":"gemini-3-flash-preview","expected":"96825","actual":"96825.46","isCorrect":true,"inputTokens":4743,"outputTokens":20007,"latencyMs":96595.14683399996},{"questionId":"q20","format":"toon","model":"gemini-3-flash-preview","expected":"96825","actual":"96825.46","isCorrect":true,"inputTokens":3386,"outputTokens":16125,"latencyMs":76973.349583},{"questionId":"q20","format":"csv","model":"gemini-3-flash-preview","expected":"96825","actual":"96825.46","isCorrect":true,"inputTokens":3238,"outputTokens":12697,"latencyMs":61811.191666},{"questionId":"q20","format":"xml","model":"gemini-3-flash-preview","expected":"96825","actual":"96825.46","isCorrect":true,"inputTokens":9140,"outputTokens":14755,"latencyMs":86098.75858299999},{"questionId":"q20","format":"yaml","model":"gemini-3-flash-preview","expected":"96825","actual":"96825.46","isCorrect":true,"inputTokens":5793,"outputTokens":25603,"latencyMs":120761.245833},{"questionId":"q21","format":"json-pretty","model":"gemini-3-flash-preview","expected":"79","actual":"79","isCorrect":true,"inputTokens":7950,"outputTokens":6329,"latencyMs":30479.19987499999},{"questionId":"q21","format":"json-compact","model":"gemini-3-flash-preview","expected":"79","actual":"79","isCorrect":true,"inputTokens":4740,"outputTokens":4927,"latencyMs":24148.572749999992},{"questionId":"q21","format":"toon","model":"gemini-3-flash-preview","expected":"79","actual":"79","isCorrect":true,"inputTokens":3383,"outputTokens":5128,"latencyMs":24522.518415999948},{"questionId":"q21","format":"csv","model":"gemini-3-flash-preview","expected":"79","actual":"79","isCorrect":true,"inputTokens":3235,"outputTokens":11060,"latencyMs":54258.82816600002},{"questionId":"q21","format":"xml","model":"gemini-3-flash-preview","expected":"79","actual":"79","isCorrect":true,"inputTokens":9137,"outputTokens":7237,"latencyMs":44614.00195800001},{"questionId":"q21","format":"yaml","model":"gemini-3-flash-preview","expected":"79","actual":"79","isCorrect":true,"inputTokens":5790,"outputTokens":5987,"latencyMs":30460.19395799999},{"questionId":"q22","format":"json-pretty","model":"gemini-3-flash-preview","expected":"21","actual":"21","isCorrect":true,"inputTokens":7950,"outputTokens":1978,"latencyMs":10332.646833000006},{"questionId":"q22","format":"json-compact","model":"gemini-3-flash-preview","expected":"21","actual":"21","isCorrect":true,"inputTokens":4740,"outputTokens":1626,"latencyMs":8706.571708999982},{"questionId":"q22","format":"toon","model":"gemini-3-flash-preview","expected":"21","actual":"21","isCorrect":true,"inputTokens":3383,"outputTokens":8494,"latencyMs":40063.24837499997},{"questionId":"q22","format":"csv","model":"gemini-3-flash-preview","expected":"21","actual":"21","isCorrect":true,"inputTokens":3235,"outputTokens":11763,"latencyMs":56610.87650000001},{"questionId":"q22","format":"xml","model":"gemini-3-flash-preview","expected":"21","actual":"21","isCorrect":true,"inputTokens":9137,"outputTokens":2207,"latencyMs":14080.183291999972},{"questionId":"q22","format":"yaml","model":"gemini-3-flash-preview","expected":"21","actual":"21","isCorrect":true,"inputTokens":5790,"outputTokens":8848,"latencyMs":43156.040875000006},{"questionId":"q23","format":"json-pretty","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":7961,"outputTokens":1568,"latencyMs":8185.19537500001},{"questionId":"q23","format":"json-compact","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":4751,"outputTokens":5473,"latencyMs":25986.331999999995},{"questionId":"q23","format":"toon","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3394,"outputTokens":6014,"latencyMs":29227.068333000003},{"questionId":"q23","format":"csv","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3246,"outputTokens":4512,"latencyMs":22080.70795900002},{"questionId":"q23","format":"xml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":9148,"outputTokens":1228,"latencyMs":8106.303375000018},{"questionId":"q23","format":"yaml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":5801,"outputTokens":4065,"latencyMs":21301.958958000003},{"questionId":"q24","format":"json-pretty","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":7961,"outputTokens":2448,"latencyMs":12367.767333000025},{"questionId":"q24","format":"json-compact","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":4751,"outputTokens":3160,"latencyMs":15658.321832999995},{"questionId":"q24","format":"toon","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":3394,"outputTokens":7522,"latencyMs":35942.96525000001},{"questionId":"q24","format":"csv","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":3246,"outputTokens":1981,"latencyMs":10421.97308299999},{"questionId":"q24","format":"xml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":9148,"outputTokens":9091,"latencyMs":57416.60583299998},{"questionId":"q24","format":"yaml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":5801,"outputTokens":4041,"latencyMs":20619.26762499998},{"questionId":"q25","format":"json-pretty","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":7961,"outputTokens":1164,"latencyMs":6491.6565829999745},{"questionId":"q25","format":"json-compact","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":4751,"outputTokens":1045,"latencyMs":5745.743167000008},{"questionId":"q25","format":"toon","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":3394,"outputTokens":1595,"latencyMs":8288.561583999952},{"questionId":"q25","format":"csv","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":3246,"outputTokens":2395,"latencyMs":12089.373750000028},{"questionId":"q25","format":"xml","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":9148,"outputTokens":1616,"latencyMs":10385.165582999995},{"questionId":"q25","format":"yaml","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":5801,"outputTokens":5484,"latencyMs":26458.59583299997},{"questionId":"q26","format":"json-pretty","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":7961,"outputTokens":2459,"latencyMs":12580.541666999983},{"questionId":"q26","format":"json-compact","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":4751,"outputTokens":1588,"latencyMs":8502.766125000024},{"questionId":"q26","format":"toon","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3394,"outputTokens":6893,"latencyMs":33344.51425000001},{"questionId":"q26","format":"csv","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3246,"outputTokens":2091,"latencyMs":11260.58600000001},{"questionId":"q26","format":"xml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":9148,"outputTokens":2587,"latencyMs":16338.016667000018},{"questionId":"q26","format":"yaml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":5801,"outputTokens":2567,"latencyMs":13451.73641699995},{"questionId":"q27","format":"json-pretty","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":7961,"outputTokens":2407,"latencyMs":12215.841707999993},{"questionId":"q27","format":"json-compact","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":4751,"outputTokens":2037,"latencyMs":10651.695333999989},{"questionId":"q27","format":"toon","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":3394,"outputTokens":5106,"latencyMs":34497.624583999976},{"questionId":"q27","format":"csv","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":3246,"outputTokens":4975,"latencyMs":24697.602375000017},{"questionId":"q27","format":"xml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":9148,"outputTokens":4362,"latencyMs":27752.959124999994},{"questionId":"q27","format":"yaml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":5801,"outputTokens":3987,"latencyMs":19767.733332999982},{"questionId":"q28","format":"json-pretty","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":7957,"outputTokens":16748,"latencyMs":79600.15991699998},{"questionId":"q28","format":"json-compact","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":4747,"outputTokens":14052,"latencyMs":66176.12862500001},{"questionId":"q28","format":"toon","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":3390,"outputTokens":13157,"latencyMs":61450.89712499996},{"questionId":"q28","format":"csv","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":3242,"outputTokens":15232,"latencyMs":71720.73720800004},{"questionId":"q28","format":"xml","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":9144,"outputTokens":15125,"latencyMs":89182.87950000004},{"questionId":"q28","format":"yaml","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":5797,"outputTokens":8460,"latencyMs":40369.170458999986},{"questionId":"q29","format":"json-pretty","model":"gemini-3-flash-preview","expected":"48","actual":"48","isCorrect":true,"inputTokens":7958,"outputTokens":16386,"latencyMs":77503.73624999996},{"questionId":"q29","format":"json-compact","model":"gemini-3-flash-preview","expected":"48","actual":"48","isCorrect":true,"inputTokens":4748,"outputTokens":11887,"latencyMs":56601.65512500005},{"questionId":"q29","format":"toon","model":"gemini-3-flash-preview","expected":"48","actual":"48","isCorrect":true,"inputTokens":3391,"outputTokens":16959,"latencyMs":78704.543458},{"questionId":"q29","format":"csv","model":"gemini-3-flash-preview","expected":"48","actual":"48","isCorrect":true,"inputTokens":3243,"outputTokens":23609,"latencyMs":112825.481584},{"questionId":"q29","format":"xml","model":"gemini-3-flash-preview","expected":"48","actual":"48","isCorrect":true,"inputTokens":9145,"outputTokens":20310,"latencyMs":120850.20487499994},{"questionId":"q29","format":"yaml","model":"gemini-3-flash-preview","expected":"48","actual":"48","isCorrect":true,"inputTokens":5798,"outputTokens":14498,"latencyMs":68155.17616600002},{"questionId":"q30","format":"json-pretty","model":"gemini-3-flash-preview","expected":"36","actual":"36","isCorrect":true,"inputTokens":7958,"outputTokens":10569,"latencyMs":49480.454290999915},{"questionId":"q30","format":"json-compact","model":"gemini-3-flash-preview","expected":"36","actual":"36","isCorrect":true,"inputTokens":4748,"outputTokens":15329,"latencyMs":71282.15554099996},{"questionId":"q30","format":"toon","model":"gemini-3-flash-preview","expected":"36","actual":"36","isCorrect":true,"inputTokens":3391,"outputTokens":15211,"latencyMs":71150.65308300004},{"questionId":"q30","format":"csv","model":"gemini-3-flash-preview","expected":"36","actual":"36","isCorrect":true,"inputTokens":3243,"outputTokens":19407,"latencyMs":92346.22900000005},{"questionId":"q30","format":"xml","model":"gemini-3-flash-preview","expected":"36","actual":"36","isCorrect":true,"inputTokens":9145,"outputTokens":13208,"latencyMs":80364.869083},{"questionId":"q30","format":"yaml","model":"gemini-3-flash-preview","expected":"36","actual":"36","isCorrect":true,"inputTokens":5798,"outputTokens":16422,"latencyMs":77469.96666700009},{"questionId":"q31","format":"json-pretty","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":7959,"outputTokens":1455,"latencyMs":7871.520207999973},{"questionId":"q31","format":"json-compact","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":4749,"outputTokens":3337,"latencyMs":16940.903041999904},{"questionId":"q31","format":"toon","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3392,"outputTokens":7698,"latencyMs":37917.3464579999},{"questionId":"q31","format":"csv","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3244,"outputTokens":2777,"latencyMs":14377.056125000003},{"questionId":"q31","format":"xml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":9146,"outputTokens":4019,"latencyMs":26194.21516699996},{"questionId":"q31","format":"yaml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":5799,"outputTokens":4811,"latencyMs":24804.60845900001},{"questionId":"q32","format":"json-pretty","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":7959,"outputTokens":1267,"latencyMs":6935.964540999965},{"questionId":"q32","format":"json-compact","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":4749,"outputTokens":2935,"latencyMs":14397.04920800007},{"questionId":"q32","format":"toon","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":3392,"outputTokens":1792,"latencyMs":9424.540042000008},{"questionId":"q32","format":"csv","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":3244,"outputTokens":7315,"latencyMs":36771.32283399999},{"questionId":"q32","format":"xml","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":9146,"outputTokens":4903,"latencyMs":30213.814125000034},{"questionId":"q32","format":"yaml","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":5799,"outputTokens":2639,"latencyMs":14096.540874999948},{"questionId":"q33","format":"json-pretty","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":7959,"outputTokens":3301,"latencyMs":16739.186916000093},{"questionId":"q33","format":"json-compact","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":4749,"outputTokens":1826,"latencyMs":9530.344500000007},{"questionId":"q33","format":"toon","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3392,"outputTokens":8386,"latencyMs":40940.272916999995},{"questionId":"q33","format":"csv","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3244,"outputTokens":5507,"latencyMs":27193.604374999995},{"questionId":"q33","format":"xml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":9146,"outputTokens":1722,"latencyMs":10933.165042000008},{"questionId":"q33","format":"yaml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":5799,"outputTokens":4603,"latencyMs":23554.350582999992},{"questionId":"q34","format":"json-pretty","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":7952,"outputTokens":3164,"latencyMs":16076.625875000027},{"questionId":"q34","format":"json-compact","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":4742,"outputTokens":2649,"latencyMs":13420.916791999945},{"questionId":"q34","format":"toon","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":3385,"outputTokens":13444,"latencyMs":65856.00358400005},{"questionId":"q34","format":"csv","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":3237,"outputTokens":10681,"latencyMs":52920.23595900007},{"questionId":"q34","format":"xml","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":9139,"outputTokens":4409,"latencyMs":27524.992582999985},{"questionId":"q34","format":"yaml","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":5792,"outputTokens":8537,"latencyMs":42930.85120799998},{"questionId":"q35","format":"json-pretty","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":7952,"outputTokens":2509,"latencyMs":12793.079041999998},{"questionId":"q35","format":"json-compact","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":4742,"outputTokens":9923,"latencyMs":46836.51587500004},{"questionId":"q35","format":"toon","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":3385,"outputTokens":11447,"latencyMs":55717.83683299995},{"questionId":"q35","format":"csv","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":3237,"outputTokens":6299,"latencyMs":31261.50887499994},{"questionId":"q35","format":"xml","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":9139,"outputTokens":8239,"latencyMs":48178.13370800007},{"questionId":"q35","format":"yaml","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":5792,"outputTokens":10789,"latencyMs":48658.011833},{"questionId":"q36","format":"json-pretty","model":"gemini-3-flash-preview","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":14520,"outputTokens":239,"latencyMs":4084.1892080000835},{"questionId":"q36","format":"json-compact","model":"gemini-3-flash-preview","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":8893,"outputTokens":213,"latencyMs":2221.47795900004},{"questionId":"q36","format":"toon","model":"gemini-3-flash-preview","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":9426,"outputTokens":330,"latencyMs":2917.066125000012},{"questionId":"q36","format":"xml","model":"gemini-3-flash-preview","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":16023,"outputTokens":405,"latencyMs":3279.3449170000385},{"questionId":"q36","format":"yaml","model":"gemini-3-flash-preview","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":10588,"outputTokens":186,"latencyMs":2375.2371660000645},{"questionId":"q37","format":"json-pretty","model":"gemini-3-flash-preview","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":14520,"outputTokens":269,"latencyMs":4300.670500000007},{"questionId":"q37","format":"json-compact","model":"gemini-3-flash-preview","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":8893,"outputTokens":120,"latencyMs":2138.3337090000277},{"questionId":"q37","format":"toon","model":"gemini-3-flash-preview","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":9426,"outputTokens":379,"latencyMs":4250.551541999914},{"questionId":"q37","format":"xml","model":"gemini-3-flash-preview","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":16023,"outputTokens":348,"latencyMs":3051.399666000041},{"questionId":"q37","format":"yaml","model":"gemini-3-flash-preview","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":10588,"outputTokens":365,"latencyMs":2959.8823329999577},{"questionId":"q38","format":"json-pretty","model":"gemini-3-flash-preview","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":14520,"outputTokens":401,"latencyMs":3398.0299589999486},{"questionId":"q38","format":"json-compact","model":"gemini-3-flash-preview","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":8893,"outputTokens":262,"latencyMs":2614.450458000065},{"questionId":"q38","format":"toon","model":"gemini-3-flash-preview","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":9426,"outputTokens":1033,"latencyMs":7634.493875000044},{"questionId":"q38","format":"xml","model":"gemini-3-flash-preview","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":16023,"outputTokens":257,"latencyMs":2810.2027499999385},{"questionId":"q38","format":"yaml","model":"gemini-3-flash-preview","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":10588,"outputTokens":271,"latencyMs":3648.3497920000227},{"questionId":"q39","format":"json-pretty","model":"gemini-3-flash-preview","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":14520,"outputTokens":332,"latencyMs":4216.776208000025},{"questionId":"q39","format":"json-compact","model":"gemini-3-flash-preview","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":8893,"outputTokens":323,"latencyMs":2974.3484999999637},{"questionId":"q39","format":"toon","model":"gemini-3-flash-preview","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":9426,"outputTokens":347,"latencyMs":4259.04241600004},{"questionId":"q39","format":"xml","model":"gemini-3-flash-preview","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":16023,"outputTokens":310,"latencyMs":2637.323708000011},{"questionId":"q39","format":"yaml","model":"gemini-3-flash-preview","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":10588,"outputTokens":385,"latencyMs":3122.3812909999397},{"questionId":"q40","format":"json-pretty","model":"gemini-3-flash-preview","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":14520,"outputTokens":114,"latencyMs":1757.4957500000019},{"questionId":"q40","format":"json-compact","model":"gemini-3-flash-preview","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":8893,"outputTokens":376,"latencyMs":3298.5533750000177},{"questionId":"q40","format":"toon","model":"gemini-3-flash-preview","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":9426,"outputTokens":220,"latencyMs":2433.519541000016},{"questionId":"q40","format":"xml","model":"gemini-3-flash-preview","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":16023,"outputTokens":519,"latencyMs":4017.63049999997},{"questionId":"q40","format":"yaml","model":"gemini-3-flash-preview","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":10588,"outputTokens":356,"latencyMs":4236.816583999898},{"questionId":"q41","format":"json-pretty","model":"gemini-3-flash-preview","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":14520,"outputTokens":337,"latencyMs":3656.2743330000667},{"questionId":"q41","format":"json-compact","model":"gemini-3-flash-preview","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":8893,"outputTokens":299,"latencyMs":3071.092665999895},{"questionId":"q41","format":"toon","model":"gemini-3-flash-preview","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":9426,"outputTokens":312,"latencyMs":3423.856124999933},{"questionId":"q41","format":"xml","model":"gemini-3-flash-preview","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":16023,"outputTokens":384,"latencyMs":3164.3665000000037},{"questionId":"q41","format":"yaml","model":"gemini-3-flash-preview","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":10588,"outputTokens":258,"latencyMs":2742.6081249999115},{"questionId":"q42","format":"json-pretty","model":"gemini-3-flash-preview","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":14520,"outputTokens":370,"latencyMs":6332.384084000019},{"questionId":"q42","format":"json-compact","model":"gemini-3-flash-preview","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":8893,"outputTokens":247,"latencyMs":2596.109124999959},{"questionId":"q42","format":"toon","model":"gemini-3-flash-preview","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":9426,"outputTokens":287,"latencyMs":3813.7137079999084},{"questionId":"q42","format":"xml","model":"gemini-3-flash-preview","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":16023,"outputTokens":517,"latencyMs":4750.313916999963},{"questionId":"q42","format":"yaml","model":"gemini-3-flash-preview","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":10588,"outputTokens":368,"latencyMs":3190.7336670000805},{"questionId":"q43","format":"json-pretty","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":14520,"outputTokens":208,"latencyMs":3772.5327089999337},{"questionId":"q43","format":"json-compact","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8893,"outputTokens":383,"latencyMs":3755.0355419999687},{"questionId":"q43","format":"toon","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":9426,"outputTokens":356,"latencyMs":3920.7263749999693},{"questionId":"q43","format":"xml","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":16023,"outputTokens":524,"latencyMs":4781.3879169999855},{"questionId":"q43","format":"yaml","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":10588,"outputTokens":213,"latencyMs":2596.978000000003},{"questionId":"q44","format":"json-pretty","model":"gemini-3-flash-preview","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":14521,"outputTokens":219,"latencyMs":3901.433541999897},{"questionId":"q44","format":"json-compact","model":"gemini-3-flash-preview","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":8894,"outputTokens":153,"latencyMs":2440.811333000078},{"questionId":"q44","format":"toon","model":"gemini-3-flash-preview","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":9427,"outputTokens":225,"latencyMs":3439.9409160000505},{"questionId":"q44","format":"xml","model":"gemini-3-flash-preview","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":16024,"outputTokens":206,"latencyMs":2855.7029999999795},{"questionId":"q44","format":"yaml","model":"gemini-3-flash-preview","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":10589,"outputTokens":213,"latencyMs":2617.3762919999426},{"questionId":"q45","format":"json-pretty","model":"gemini-3-flash-preview","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":14521,"outputTokens":131,"latencyMs":1981.949041999993},{"questionId":"q45","format":"json-compact","model":"gemini-3-flash-preview","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":8894,"outputTokens":223,"latencyMs":2614.310708999983},{"questionId":"q45","format":"toon","model":"gemini-3-flash-preview","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":9427,"outputTokens":481,"latencyMs":5567.526500000036},{"questionId":"q45","format":"xml","model":"gemini-3-flash-preview","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":16024,"outputTokens":255,"latencyMs":2444.7313330000034},{"questionId":"q45","format":"yaml","model":"gemini-3-flash-preview","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":10589,"outputTokens":201,"latencyMs":2859.697374999989},{"questionId":"q46","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":14521,"outputTokens":438,"latencyMs":6194.675749999937},{"questionId":"q46","format":"json-compact","model":"gemini-3-flash-preview","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":8894,"outputTokens":419,"latencyMs":3519.895957999979},{"questionId":"q46","format":"toon","model":"gemini-3-flash-preview","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":9427,"outputTokens":370,"latencyMs":4239.617625000072},{"questionId":"q46","format":"xml","model":"gemini-3-flash-preview","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":16024,"outputTokens":441,"latencyMs":3473.413541999995},{"questionId":"q46","format":"yaml","model":"gemini-3-flash-preview","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":10589,"outputTokens":218,"latencyMs":2793.329832999967},{"questionId":"q47","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":14520,"outputTokens":517,"latencyMs":6171.0984580001095},{"questionId":"q47","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":8893,"outputTokens":493,"latencyMs":4513.148999999976},{"questionId":"q47","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":9426,"outputTokens":357,"latencyMs":3426.631167000043},{"questionId":"q47","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":16023,"outputTokens":715,"latencyMs":5319.806500000064},{"questionId":"q47","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":10588,"outputTokens":753,"latencyMs":5722.641249999986},{"questionId":"q48","format":"json-pretty","model":"gemini-3-flash-preview","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":14521,"outputTokens":214,"latencyMs":2704.503792000003},{"questionId":"q48","format":"json-compact","model":"gemini-3-flash-preview","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":8894,"outputTokens":226,"latencyMs":3192.3600840000436},{"questionId":"q48","format":"toon","model":"gemini-3-flash-preview","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":9427,"outputTokens":337,"latencyMs":3822.5321669999976},{"questionId":"q48","format":"xml","model":"gemini-3-flash-preview","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":16024,"outputTokens":253,"latencyMs":4071.4312500000233},{"questionId":"q48","format":"yaml","model":"gemini-3-flash-preview","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":10589,"outputTokens":202,"latencyMs":2787.2034579999745},{"questionId":"q49","format":"json-pretty","model":"gemini-3-flash-preview","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":14521,"outputTokens":172,"latencyMs":2478.8660420000087},{"questionId":"q49","format":"json-compact","model":"gemini-3-flash-preview","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":8894,"outputTokens":266,"latencyMs":3291.44908400008},{"questionId":"q49","format":"toon","model":"gemini-3-flash-preview","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":9427,"outputTokens":303,"latencyMs":3379.1073340000585},{"questionId":"q49","format":"xml","model":"gemini-3-flash-preview","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":16024,"outputTokens":215,"latencyMs":3803.1052909999853},{"questionId":"q49","format":"yaml","model":"gemini-3-flash-preview","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":10589,"outputTokens":242,"latencyMs":3215.7367089999607},{"questionId":"q50","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":14521,"outputTokens":534,"latencyMs":5678.362208000035},{"questionId":"q50","format":"json-compact","model":"gemini-3-flash-preview","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":8894,"outputTokens":351,"latencyMs":3124.625166999991},{"questionId":"q50","format":"toon","model":"gemini-3-flash-preview","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":9427,"outputTokens":383,"latencyMs":3374.228000000003},{"questionId":"q50","format":"xml","model":"gemini-3-flash-preview","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":16024,"outputTokens":513,"latencyMs":4168.5925419999985},{"questionId":"q50","format":"yaml","model":"gemini-3-flash-preview","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":10589,"outputTokens":221,"latencyMs":2439.0822499999776},{"questionId":"q51","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":14520,"outputTokens":1894,"latencyMs":14970.280374999973},{"questionId":"q51","format":"json-compact","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":8893,"outputTokens":282,"latencyMs":2762.680416999967},{"questionId":"q51","format":"toon","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":9426,"outputTokens":510,"latencyMs":5080.336041999981},{"questionId":"q51","format":"xml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":16023,"outputTokens":737,"latencyMs":5118.358458999894},{"questionId":"q51","format":"yaml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":10588,"outputTokens":547,"latencyMs":4121.631540999981},{"questionId":"q52","format":"json-pretty","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":14515,"outputTokens":1586,"latencyMs":10084.839665999985},{"questionId":"q52","format":"json-compact","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":8888,"outputTokens":939,"latencyMs":6117.736290999921},{"questionId":"q52","format":"toon","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":9421,"outputTokens":3970,"latencyMs":22298.110333000077},{"questionId":"q52","format":"xml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":16018,"outputTokens":1824,"latencyMs":10803.940333000035},{"questionId":"q52","format":"yaml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":10583,"outputTokens":1312,"latencyMs":8204.72112500004},{"questionId":"q53","format":"json-pretty","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":14515,"outputTokens":1293,"latencyMs":8477.8419590001},{"questionId":"q53","format":"json-compact","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":8888,"outputTokens":897,"latencyMs":6555.321166999987},{"questionId":"q53","format":"toon","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":9421,"outputTokens":1737,"latencyMs":11308.854500000016},{"questionId":"q53","format":"xml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":16018,"outputTokens":1534,"latencyMs":9507.72520799993},{"questionId":"q53","format":"yaml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":10583,"outputTokens":1579,"latencyMs":9510.461666999967},{"questionId":"q54","format":"json-pretty","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":14516,"outputTokens":1855,"latencyMs":11689.714666999993},{"questionId":"q54","format":"json-compact","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":8889,"outputTokens":1595,"latencyMs":10057.804125000024},{"questionId":"q54","format":"toon","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":9422,"outputTokens":919,"latencyMs":6539.259709000005},{"questionId":"q54","format":"xml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":16019,"outputTokens":1549,"latencyMs":9686.657083999948},{"questionId":"q54","format":"yaml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":10584,"outputTokens":1009,"latencyMs":6737.831374999951},{"questionId":"q55","format":"json-pretty","model":"gemini-3-flash-preview","expected":"38069.93","actual":"38069.93","isCorrect":true,"inputTokens":14516,"outputTokens":22046,"latencyMs":119814.9747919999},{"questionId":"q55","format":"json-compact","model":"gemini-3-flash-preview","expected":"38069.93","actual":"38069.93","isCorrect":true,"inputTokens":8889,"outputTokens":17747,"latencyMs":94282.35662500001},{"questionId":"q55","format":"toon","model":"gemini-3-flash-preview","expected":"38069.93","actual":"Answer: 38069.93","isCorrect":true,"inputTokens":9422,"outputTokens":26876,"latencyMs":144182.03554200009},{"questionId":"q55","format":"xml","model":"gemini-3-flash-preview","expected":"38069.93","actual":"38069.93","isCorrect":true,"inputTokens":16019,"outputTokens":15952,"latencyMs":83522.74987499998},{"questionId":"q55","format":"yaml","model":"gemini-3-flash-preview","expected":"38069.93","actual":"38069.93","isCorrect":true,"inputTokens":10584,"outputTokens":15993,"latencyMs":83854.517291},{"questionId":"q56","format":"json-pretty","model":"gemini-3-flash-preview","expected":"761.40","actual":"761.3986","isCorrect":true,"inputTokens":14514,"outputTokens":17232,"latencyMs":93816.99558300001},{"questionId":"q56","format":"json-compact","model":"gemini-3-flash-preview","expected":"761.40","actual":"761.3986","isCorrect":true,"inputTokens":8887,"outputTokens":10439,"latencyMs":55391.57041699998},{"questionId":"q56","format":"toon","model":"gemini-3-flash-preview","expected":"761.40","actual":"761.3986","isCorrect":true,"inputTokens":9420,"outputTokens":26949,"latencyMs":147609.17754199996},{"questionId":"q56","format":"xml","model":"gemini-3-flash-preview","expected":"761.40","actual":"761.3986","isCorrect":true,"inputTokens":16017,"outputTokens":14824,"latencyMs":78400.49437500001},{"questionId":"q56","format":"yaml","model":"gemini-3-flash-preview","expected":"761.40","actual":"761.3986","isCorrect":true,"inputTokens":10582,"outputTokens":21639,"latencyMs":115665.42524999997},{"questionId":"q57","format":"json-pretty","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":14515,"outputTokens":279,"latencyMs":2675.1986250000773},{"questionId":"q57","format":"json-compact","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":8888,"outputTokens":716,"latencyMs":5042.823250000016},{"questionId":"q57","format":"toon","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":9421,"outputTokens":424,"latencyMs":3740.129125000094},{"questionId":"q57","format":"xml","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":16018,"outputTokens":776,"latencyMs":5229.144583000103},{"questionId":"q57","format":"yaml","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":10583,"outputTokens":620,"latencyMs":4741.052417000057},{"questionId":"q58","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":14514,"outputTokens":8780,"latencyMs":50699.300667},{"questionId":"q58","format":"json-compact","model":"gemini-3-flash-preview","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":8887,"outputTokens":1805,"latencyMs":10487.411750000087},{"questionId":"q58","format":"toon","model":"gemini-3-flash-preview","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":9420,"outputTokens":5850,"latencyMs":33373.52466699993},{"questionId":"q58","format":"xml","model":"gemini-3-flash-preview","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":16017,"outputTokens":8972,"latencyMs":49852.997083999915},{"questionId":"q58","format":"yaml","model":"gemini-3-flash-preview","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":10582,"outputTokens":8969,"latencyMs":48664.47791699995},{"questionId":"q59","format":"json-pretty","model":"gemini-3-flash-preview","expected":"46","actual":"46","isCorrect":true,"inputTokens":14520,"outputTokens":4700,"latencyMs":27796.357416999992},{"questionId":"q59","format":"json-compact","model":"gemini-3-flash-preview","expected":"46","actual":"46","isCorrect":true,"inputTokens":8893,"outputTokens":15868,"latencyMs":83459.89466699993},{"questionId":"q59","format":"toon","model":"gemini-3-flash-preview","expected":"46","actual":"46","isCorrect":true,"inputTokens":9426,"outputTokens":1723,"latencyMs":10277.792625000002},{"questionId":"q59","format":"xml","model":"gemini-3-flash-preview","expected":"46","actual":"46","isCorrect":true,"inputTokens":16023,"outputTokens":4585,"latencyMs":24014.985666999943},{"questionId":"q59","format":"yaml","model":"gemini-3-flash-preview","expected":"46","actual":"46","isCorrect":true,"inputTokens":10588,"outputTokens":6603,"latencyMs":34684.088292},{"questionId":"q60","format":"json-pretty","model":"gemini-3-flash-preview","expected":"38","actual":"38","isCorrect":true,"inputTokens":14520,"outputTokens":9913,"latencyMs":55103.00254100002},{"questionId":"q60","format":"json-compact","model":"gemini-3-flash-preview","expected":"38","actual":"38","isCorrect":true,"inputTokens":8893,"outputTokens":8949,"latencyMs":46694.73845800001},{"questionId":"q60","format":"toon","model":"gemini-3-flash-preview","expected":"38","actual":"38","isCorrect":true,"inputTokens":9426,"outputTokens":12668,"latencyMs":67138.90345800004},{"questionId":"q60","format":"xml","model":"gemini-3-flash-preview","expected":"38","actual":"38","isCorrect":true,"inputTokens":16023,"outputTokens":13794,"latencyMs":71987.96762500005},{"questionId":"q60","format":"yaml","model":"gemini-3-flash-preview","expected":"38","actual":"38","isCorrect":true,"inputTokens":10588,"outputTokens":4639,"latencyMs":24705.85929199995},{"questionId":"q61","format":"json-pretty","model":"gemini-3-flash-preview","expected":"29","actual":"29","isCorrect":true,"inputTokens":14520,"outputTokens":6175,"latencyMs":33881.228292000014},{"questionId":"q61","format":"json-compact","model":"gemini-3-flash-preview","expected":"29","actual":"29","isCorrect":true,"inputTokens":8893,"outputTokens":2873,"latencyMs":15629.773790999898},{"questionId":"q61","format":"toon","model":"gemini-3-flash-preview","expected":"29","actual":"29","isCorrect":true,"inputTokens":9426,"outputTokens":2590,"latencyMs":15352.111291000037},{"questionId":"q61","format":"xml","model":"gemini-3-flash-preview","expected":"29","actual":"29","isCorrect":true,"inputTokens":16023,"outputTokens":3560,"latencyMs":19004.292000000016},{"questionId":"q61","format":"yaml","model":"gemini-3-flash-preview","expected":"29","actual":"29","isCorrect":true,"inputTokens":10588,"outputTokens":3757,"latencyMs":20216.709333000006},{"questionId":"q62","format":"json-pretty","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":14524,"outputTokens":2701,"latencyMs":16061.990833000047},{"questionId":"q62","format":"json-compact","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":8897,"outputTokens":3692,"latencyMs":20302.256417000084},{"questionId":"q62","format":"toon","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":9430,"outputTokens":3430,"latencyMs":19275.306500000064},{"questionId":"q62","format":"xml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":16027,"outputTokens":3723,"latencyMs":20844.68087499996},{"questionId":"q62","format":"yaml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":10592,"outputTokens":2730,"latencyMs":15592.86608399998},{"questionId":"q63","format":"json-pretty","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":14524,"outputTokens":1698,"latencyMs":10112.519083000021},{"questionId":"q63","format":"json-compact","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":8897,"outputTokens":2376,"latencyMs":13232.432625000016},{"questionId":"q63","format":"toon","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":9430,"outputTokens":5256,"latencyMs":29218.475041999947},{"questionId":"q63","format":"xml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":16027,"outputTokens":4564,"latencyMs":24987.84912499995},{"questionId":"q63","format":"yaml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":10592,"outputTokens":2961,"latencyMs":16846.877332999953},{"questionId":"q64","format":"json-pretty","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":14525,"outputTokens":2356,"latencyMs":13198.135666999966},{"questionId":"q64","format":"json-compact","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":8898,"outputTokens":4747,"latencyMs":26265.633375000092},{"questionId":"q64","format":"toon","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":9431,"outputTokens":2256,"latencyMs":13048.221834000084},{"questionId":"q64","format":"xml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":16028,"outputTokens":1678,"latencyMs":9641.471332999994},{"questionId":"q64","format":"yaml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":10593,"outputTokens":1215,"latencyMs":7445.436791999964},{"questionId":"q65","format":"json-pretty","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":14524,"outputTokens":1490,"latencyMs":8881.846499999985},{"questionId":"q65","format":"json-compact","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":8897,"outputTokens":2284,"latencyMs":13075.758625000017},{"questionId":"q65","format":"toon","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":9430,"outputTokens":2322,"latencyMs":13604.233208999969},{"questionId":"q65","format":"xml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":16027,"outputTokens":2710,"latencyMs":14792.38137499988},{"questionId":"q65","format":"yaml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":10592,"outputTokens":4272,"latencyMs":23129.41341700009},{"questionId":"q66","format":"json-pretty","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":14522,"outputTokens":2084,"latencyMs":11989.847166999942},{"questionId":"q66","format":"json-compact","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":8895,"outputTokens":1664,"latencyMs":10901.901291999966},{"questionId":"q66","format":"toon","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":9428,"outputTokens":1219,"latencyMs":7919.164124999894},{"questionId":"q66","format":"xml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":16025,"outputTokens":6934,"latencyMs":38142.17258400004},{"questionId":"q66","format":"yaml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":10590,"outputTokens":1495,"latencyMs":10440.66483299993},{"questionId":"q67","format":"json-pretty","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":14522,"outputTokens":1507,"latencyMs":9200.867458999855},{"questionId":"q67","format":"json-compact","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":8895,"outputTokens":2148,"latencyMs":12279.845625000075},{"questionId":"q67","format":"toon","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":9428,"outputTokens":1610,"latencyMs":9950.602542000124},{"questionId":"q67","format":"xml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":16025,"outputTokens":6010,"latencyMs":32675.77766599995},{"questionId":"q67","format":"yaml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":10590,"outputTokens":1839,"latencyMs":10577.224958000006},{"questionId":"q68","format":"json-pretty","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":14523,"outputTokens":1878,"latencyMs":11117.630457999883},{"questionId":"q68","format":"json-compact","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":8896,"outputTokens":10461,"latencyMs":56053.61800000002},{"questionId":"q68","format":"toon","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":9429,"outputTokens":4414,"latencyMs":25176.159292000113},{"questionId":"q68","format":"xml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":16026,"outputTokens":1915,"latencyMs":11221.173333999934},{"questionId":"q68","format":"yaml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":10591,"outputTokens":1413,"latencyMs":8690.182666999986},{"questionId":"q69","format":"json-pretty","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":14526,"outputTokens":3831,"latencyMs":21001.882874999894},{"questionId":"q69","format":"json-compact","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":8899,"outputTokens":4165,"latencyMs":22770.965709000127},{"questionId":"q69","format":"toon","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":9432,"outputTokens":18550,"latencyMs":100414.03024999984},{"questionId":"q69","format":"xml","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":16029,"outputTokens":17310,"latencyMs":91830.33833399997},{"questionId":"q69","format":"yaml","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":10594,"outputTokens":13016,"latencyMs":68778.63179200003},{"questionId":"q70","format":"json-pretty","model":"gemini-3-flash-preview","expected":"22","actual":"22","isCorrect":true,"inputTokens":14526,"outputTokens":5235,"latencyMs":28447.320374999894},{"questionId":"q70","format":"json-compact","model":"gemini-3-flash-preview","expected":"22","actual":"22","isCorrect":true,"inputTokens":8899,"outputTokens":15466,"latencyMs":82281.10062499996},{"questionId":"q70","format":"toon","model":"gemini-3-flash-preview","expected":"22","actual":"22","isCorrect":true,"inputTokens":9432,"outputTokens":18011,"latencyMs":98886.45154100005},{"questionId":"q70","format":"xml","model":"gemini-3-flash-preview","expected":"22","actual":"22","isCorrect":true,"inputTokens":16029,"outputTokens":13188,"latencyMs":69947.36058399989},{"questionId":"q70","format":"yaml","model":"gemini-3-flash-preview","expected":"22","actual":"22","isCorrect":true,"inputTokens":10594,"outputTokens":11280,"latencyMs":59114.20470799995},{"questionId":"q71","format":"json-pretty","model":"gemini-3-flash-preview","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":4850,"outputTokens":119,"latencyMs":1829.5722499999683},{"questionId":"q71","format":"json-compact","model":"gemini-3-flash-preview","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":3098,"outputTokens":169,"latencyMs":2557.803041000152},{"questionId":"q71","format":"toon","model":"gemini-3-flash-preview","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":2361,"outputTokens":245,"latencyMs":2140.7205409999005},{"questionId":"q71","format":"csv","model":"gemini-3-flash-preview","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":2275,"outputTokens":214,"latencyMs":1957.9020410000812},{"questionId":"q71","format":"xml","model":"gemini-3-flash-preview","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":5493,"outputTokens":233,"latencyMs":2409.1820829999633},{"questionId":"q71","format":"yaml","model":"gemini-3-flash-preview","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":3879,"outputTokens":280,"latencyMs":2552.6536250000354},{"questionId":"q72","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":4850,"outputTokens":223,"latencyMs":2256.7499589999206},{"questionId":"q72","format":"json-compact","model":"gemini-3-flash-preview","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":3098,"outputTokens":299,"latencyMs":2469.9167500000913},{"questionId":"q72","format":"toon","model":"gemini-3-flash-preview","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":2361,"outputTokens":487,"latencyMs":3384.175290999934},{"questionId":"q72","format":"csv","model":"gemini-3-flash-preview","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":2275,"outputTokens":306,"latencyMs":2464.7807089998387},{"questionId":"q72","format":"xml","model":"gemini-3-flash-preview","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":5493,"outputTokens":248,"latencyMs":2633.3309579999186},{"questionId":"q72","format":"yaml","model":"gemini-3-flash-preview","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":3879,"outputTokens":301,"latencyMs":2412.069792000111},{"questionId":"q73","format":"json-pretty","model":"gemini-3-flash-preview","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":4851,"outputTokens":184,"latencyMs":2531.2841250000056},{"questionId":"q73","format":"json-compact","model":"gemini-3-flash-preview","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":3099,"outputTokens":338,"latencyMs":2906.40533400001},{"questionId":"q73","format":"toon","model":"gemini-3-flash-preview","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":2362,"outputTokens":336,"latencyMs":3120.241541000083},{"questionId":"q73","format":"csv","model":"gemini-3-flash-preview","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":2276,"outputTokens":221,"latencyMs":2187.43429200002},{"questionId":"q73","format":"xml","model":"gemini-3-flash-preview","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":5494,"outputTokens":269,"latencyMs":2550.5684999998193},{"questionId":"q73","format":"yaml","model":"gemini-3-flash-preview","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":3880,"outputTokens":564,"latencyMs":3829.89783300017},{"questionId":"q74","format":"json-pretty","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":4851,"outputTokens":212,"latencyMs":2191.561499999836},{"questionId":"q74","format":"json-compact","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":3099,"outputTokens":161,"latencyMs":2047.781084000133},{"questionId":"q74","format":"toon","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":2362,"outputTokens":416,"latencyMs":3263.763874999946},{"questionId":"q74","format":"csv","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":2276,"outputTokens":259,"latencyMs":2059.760958999861},{"questionId":"q74","format":"xml","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":5494,"outputTokens":249,"latencyMs":2466.5045409998856},{"questionId":"q74","format":"yaml","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":3880,"outputTokens":189,"latencyMs":1927.4447920001112},{"questionId":"q75","format":"json-pretty","model":"gemini-3-flash-preview","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":4850,"outputTokens":168,"latencyMs":1590.1414999999106},{"questionId":"q75","format":"json-compact","model":"gemini-3-flash-preview","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":3098,"outputTokens":233,"latencyMs":2454.150749999797},{"questionId":"q75","format":"toon","model":"gemini-3-flash-preview","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":2361,"outputTokens":294,"latencyMs":2953.3149170000106},{"questionId":"q75","format":"csv","model":"gemini-3-flash-preview","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":2275,"outputTokens":190,"latencyMs":1742.4772909998428},{"questionId":"q75","format":"xml","model":"gemini-3-flash-preview","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":5493,"outputTokens":230,"latencyMs":2560.6022919998504},{"questionId":"q75","format":"yaml","model":"gemini-3-flash-preview","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":3879,"outputTokens":225,"latencyMs":2538.289208999835},{"questionId":"q76","format":"json-pretty","model":"gemini-3-flash-preview","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":4850,"outputTokens":276,"latencyMs":2100.7069999999367},{"questionId":"q76","format":"json-compact","model":"gemini-3-flash-preview","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":3098,"outputTokens":171,"latencyMs":2313.077791000018},{"questionId":"q76","format":"toon","model":"gemini-3-flash-preview","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":2361,"outputTokens":1265,"latencyMs":7686.007291999878},{"questionId":"q76","format":"csv","model":"gemini-3-flash-preview","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":2275,"outputTokens":316,"latencyMs":2579.2038340000436},{"questionId":"q76","format":"xml","model":"gemini-3-flash-preview","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":5493,"outputTokens":205,"latencyMs":2630.3139589999337},{"questionId":"q76","format":"yaml","model":"gemini-3-flash-preview","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":3879,"outputTokens":334,"latencyMs":3260.627499999944},{"questionId":"q77","format":"json-pretty","model":"gemini-3-flash-preview","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":4851,"outputTokens":210,"latencyMs":2122.3630420002155},{"questionId":"q77","format":"json-compact","model":"gemini-3-flash-preview","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":3099,"outputTokens":362,"latencyMs":2923.6872910000384},{"questionId":"q77","format":"toon","model":"gemini-3-flash-preview","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":2362,"outputTokens":231,"latencyMs":2338.994457999943},{"questionId":"q77","format":"csv","model":"gemini-3-flash-preview","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":2276,"outputTokens":195,"latencyMs":2056.686915999977},{"questionId":"q77","format":"xml","model":"gemini-3-flash-preview","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":5494,"outputTokens":185,"latencyMs":2833.9936659999657},{"questionId":"q77","format":"yaml","model":"gemini-3-flash-preview","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":3880,"outputTokens":279,"latencyMs":2919.6189160000067},{"questionId":"q78","format":"json-pretty","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":4851,"outputTokens":176,"latencyMs":1762.5695419998374},{"questionId":"q78","format":"json-compact","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":3099,"outputTokens":344,"latencyMs":2663.3037089998834},{"questionId":"q78","format":"toon","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":2362,"outputTokens":492,"latencyMs":3573.057499999879},{"questionId":"q78","format":"csv","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":2276,"outputTokens":353,"latencyMs":2462.9716250000056},{"questionId":"q78","format":"xml","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":5494,"outputTokens":209,"latencyMs":2461.564041999867},{"questionId":"q78","format":"yaml","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":3880,"outputTokens":173,"latencyMs":2031.1732079999056},{"questionId":"q79","format":"json-pretty","model":"gemini-3-flash-preview","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":4850,"outputTokens":179,"latencyMs":1812.0257910001092},{"questionId":"q79","format":"json-compact","model":"gemini-3-flash-preview","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":3098,"outputTokens":165,"latencyMs":1910.7827920001},{"questionId":"q79","format":"toon","model":"gemini-3-flash-preview","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":2361,"outputTokens":478,"latencyMs":4064.633916999912},{"questionId":"q79","format":"csv","model":"gemini-3-flash-preview","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":2275,"outputTokens":363,"latencyMs":2539.4810419999994},{"questionId":"q79","format":"xml","model":"gemini-3-flash-preview","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":5493,"outputTokens":235,"latencyMs":2455.422833000077},{"questionId":"q79","format":"yaml","model":"gemini-3-flash-preview","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":3879,"outputTokens":150,"latencyMs":2093.886832999997},{"questionId":"q80","format":"json-pretty","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":4843,"outputTokens":840,"latencyMs":4959.74383299984},{"questionId":"q80","format":"json-compact","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":3091,"outputTokens":1262,"latencyMs":10007.206290999893},{"questionId":"q80","format":"toon","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":2354,"outputTokens":1385,"latencyMs":7940.294125000015},{"questionId":"q80","format":"csv","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":2268,"outputTokens":1884,"latencyMs":9444.756959000137},{"questionId":"q80","format":"xml","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":5486,"outputTokens":1489,"latencyMs":9292.004332999932},{"questionId":"q80","format":"yaml","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":3872,"outputTokens":1235,"latencyMs":7389.324040999869},{"questionId":"q81","format":"json-pretty","model":"gemini-3-flash-preview","expected":"338580","actual":"338580","isCorrect":true,"inputTokens":4844,"outputTokens":9907,"latencyMs":44819.90987500013},{"questionId":"q81","format":"json-compact","model":"gemini-3-flash-preview","expected":"338580","actual":"338580","isCorrect":true,"inputTokens":3092,"outputTokens":6790,"latencyMs":33174.798957999796},{"questionId":"q81","format":"toon","model":"gemini-3-flash-preview","expected":"338580","actual":"338580","isCorrect":true,"inputTokens":2355,"outputTokens":16999,"latencyMs":83272.93574999995},{"questionId":"q81","format":"csv","model":"gemini-3-flash-preview","expected":"338580","actual":"338580","isCorrect":true,"inputTokens":2269,"outputTokens":18298,"latencyMs":83436.81708399998},{"questionId":"q81","format":"xml","model":"gemini-3-flash-preview","expected":"338580","actual":"338580","isCorrect":true,"inputTokens":5487,"outputTokens":16309,"latencyMs":79165.26470799977},{"questionId":"q81","format":"yaml","model":"gemini-3-flash-preview","expected":"338580","actual":"338580","isCorrect":true,"inputTokens":3873,"outputTokens":5834,"latencyMs":29618.839625000022},{"questionId":"q82","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1666","actual":"1666","isCorrect":true,"inputTokens":4844,"outputTokens":10427,"latencyMs":47558.13629200007},{"questionId":"q82","format":"json-compact","model":"gemini-3-flash-preview","expected":"1666","actual":"1666","isCorrect":true,"inputTokens":3092,"outputTokens":10931,"latencyMs":52990.450666000135},{"questionId":"q82","format":"toon","model":"gemini-3-flash-preview","expected":"1666","actual":"1666","isCorrect":true,"inputTokens":2355,"outputTokens":5785,"latencyMs":28527.226582999807},{"questionId":"q82","format":"csv","model":"gemini-3-flash-preview","expected":"1666","actual":"1666","isCorrect":true,"inputTokens":2269,"outputTokens":12442,"latencyMs":56605.5457919999},{"questionId":"q82","format":"xml","model":"gemini-3-flash-preview","expected":"1666","actual":"1666","isCorrect":true,"inputTokens":5487,"outputTokens":8100,"latencyMs":40539.29049999989},{"questionId":"q82","format":"yaml","model":"gemini-3-flash-preview","expected":"1666","actual":"1666","isCorrect":true,"inputTokens":3873,"outputTokens":8493,"latencyMs":42170.64012500015},{"questionId":"q83","format":"json-pretty","model":"gemini-3-flash-preview","expected":"278050.98","actual":"278050.98","isCorrect":true,"inputTokens":4842,"outputTokens":26424,"latencyMs":120804.7117920001},{"questionId":"q83","format":"json-compact","model":"gemini-3-flash-preview","expected":"278050.98","actual":"278050.98","isCorrect":true,"inputTokens":3090,"outputTokens":26383,"latencyMs":126429.28150000004},{"questionId":"q83","format":"toon","model":"gemini-3-flash-preview","expected":"278050.98","actual":"278050.98","isCorrect":true,"inputTokens":2353,"outputTokens":17386,"latencyMs":84437.76829200005},{"questionId":"q83","format":"csv","model":"gemini-3-flash-preview","expected":"278050.98","actual":"278050.98","isCorrect":true,"inputTokens":2267,"outputTokens":18943,"latencyMs":85890.99399999995},{"questionId":"q83","format":"xml","model":"gemini-3-flash-preview","expected":"278050.98","actual":"278050.98","isCorrect":true,"inputTokens":5485,"outputTokens":17626,"latencyMs":85216.78054199996},{"questionId":"q83","format":"yaml","model":"gemini-3-flash-preview","expected":"278050.98","actual":"278050.98","isCorrect":true,"inputTokens":3871,"outputTokens":17143,"latencyMs":83208.28020799998},{"questionId":"q84","format":"json-pretty","model":"gemini-3-flash-preview","expected":"0.49","actual":"0.4858333333333333","isCorrect":true,"inputTokens":4840,"outputTokens":23834,"latencyMs":111591.6606660001},{"questionId":"q84","format":"json-compact","model":"gemini-3-flash-preview","expected":"0.49","actual":"0.4858333333333333","isCorrect":true,"inputTokens":3088,"outputTokens":21038,"latencyMs":103479.88100000005},{"questionId":"q84","format":"toon","model":"gemini-3-flash-preview","expected":"0.49","actual":"0.4858333333333333","isCorrect":true,"inputTokens":2351,"outputTokens":35851,"latencyMs":174881.03937500017},{"questionId":"q84","format":"csv","model":"gemini-3-flash-preview","expected":"0.49","actual":"0.4858333333333333","isCorrect":true,"inputTokens":2265,"outputTokens":16542,"latencyMs":74305.61962499982},{"questionId":"q84","format":"xml","model":"gemini-3-flash-preview","expected":"0.49","actual":"0.4858333333333333","isCorrect":true,"inputTokens":5483,"outputTokens":29239,"latencyMs":143886.16129099997},{"questionId":"q84","format":"yaml","model":"gemini-3-flash-preview","expected":"0.49","actual":"0.4858333333333333","isCorrect":true,"inputTokens":3869,"outputTokens":22128,"latencyMs":108770.82458300004},{"questionId":"q85","format":"json-pretty","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":4846,"outputTokens":5096,"latencyMs":23482.037666999968},{"questionId":"q85","format":"json-compact","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":3094,"outputTokens":2796,"latencyMs":14461.010040999856},{"questionId":"q85","format":"toon","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":2357,"outputTokens":5121,"latencyMs":26287.597333999816},{"questionId":"q85","format":"csv","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":2271,"outputTokens":2990,"latencyMs":14215.18995800009},{"questionId":"q85","format":"xml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":5489,"outputTokens":2621,"latencyMs":13538.900291000027},{"questionId":"q85","format":"yaml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":3875,"outputTokens":9585,"latencyMs":47650.92799999984},{"questionId":"q86","format":"json-pretty","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":4844,"outputTokens":8114,"latencyMs":37666.09054100001},{"questionId":"q86","format":"json-compact","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":3092,"outputTokens":4856,"latencyMs":23527.82224999997},{"questionId":"q86","format":"toon","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":2355,"outputTokens":12702,"latencyMs":60716.7359999998},{"questionId":"q86","format":"csv","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":2269,"outputTokens":3749,"latencyMs":17356.627792000072},{"questionId":"q86","format":"xml","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":5487,"outputTokens":9591,"latencyMs":46486.24741699989},{"questionId":"q86","format":"yaml","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":3873,"outputTokens":3137,"latencyMs":16645.47083300003},{"questionId":"q87","format":"json-pretty","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":4853,"outputTokens":4768,"latencyMs":22222.0095840001},{"questionId":"q87","format":"json-compact","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":3101,"outputTokens":10763,"latencyMs":52359.5263749999},{"questionId":"q87","format":"toon","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":2364,"outputTokens":8655,"latencyMs":43057.93550000014},{"questionId":"q87","format":"csv","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":2278,"outputTokens":8486,"latencyMs":40806.75987499999},{"questionId":"q87","format":"xml","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":5496,"outputTokens":6805,"latencyMs":32607.476540999953},{"questionId":"q87","format":"yaml","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":3882,"outputTokens":14007,"latencyMs":69418.0892080001},{"questionId":"q88","format":"json-pretty","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":4853,"outputTokens":3753,"latencyMs":17887.077249999857},{"questionId":"q88","format":"json-compact","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":3101,"outputTokens":4513,"latencyMs":21889.44045799994},{"questionId":"q88","format":"toon","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":2364,"outputTokens":12240,"latencyMs":58832.42224999983},{"questionId":"q88","format":"csv","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":2278,"outputTokens":5194,"latencyMs":26775.547833000077},{"questionId":"q88","format":"xml","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":5496,"outputTokens":4922,"latencyMs":25243.463334000204},{"questionId":"q88","format":"yaml","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":3882,"outputTokens":5731,"latencyMs":29080.502792000072},{"questionId":"q89","format":"json-pretty","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":4854,"outputTokens":5849,"latencyMs":26516.438708999893},{"questionId":"q89","format":"json-compact","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":3102,"outputTokens":16173,"latencyMs":77878.715875},{"questionId":"q89","format":"toon","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":2365,"outputTokens":12760,"latencyMs":61007.31995799998},{"questionId":"q89","format":"csv","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":2279,"outputTokens":3727,"latencyMs":18021.589042000007},{"questionId":"q89","format":"xml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":5497,"outputTokens":13472,"latencyMs":64319.46916700015},{"questionId":"q89","format":"yaml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":3883,"outputTokens":10570,"latencyMs":50606.915917000035},{"questionId":"q90","format":"json-pretty","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":4854,"outputTokens":9662,"latencyMs":43878.04604199994},{"questionId":"q90","format":"json-compact","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":3102,"outputTokens":11573,"latencyMs":56116.81316699996},{"questionId":"q90","format":"toon","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":2365,"outputTokens":8494,"latencyMs":41614.19895899994},{"questionId":"q90","format":"csv","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":2279,"outputTokens":13149,"latencyMs":61570.56495800009},{"questionId":"q90","format":"xml","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":5497,"outputTokens":12169,"latencyMs":57587.416958000045},{"questionId":"q90","format":"yaml","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":3883,"outputTokens":8472,"latencyMs":41254.44212500006},{"questionId":"q91","format":"json-pretty","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":4854,"outputTokens":8273,"latencyMs":37649.36825000006},{"questionId":"q91","format":"json-compact","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":3102,"outputTokens":13135,"latencyMs":62559.60337499995},{"questionId":"q91","format":"toon","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":2365,"outputTokens":12544,"latencyMs":60352.34270899999},{"questionId":"q91","format":"csv","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":2279,"outputTokens":8737,"latencyMs":40803.06487499992},{"questionId":"q91","format":"xml","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":5497,"outputTokens":10767,"latencyMs":52170.04133299994},{"questionId":"q91","format":"yaml","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":3883,"outputTokens":16281,"latencyMs":76947.36479100003},{"questionId":"q92","format":"json-pretty","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":4852,"outputTokens":6738,"latencyMs":30578.224417000078},{"questionId":"q92","format":"json-compact","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":3100,"outputTokens":8966,"latencyMs":42782.286208000034},{"questionId":"q92","format":"toon","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":2363,"outputTokens":11119,"latencyMs":53578.49137499998},{"questionId":"q92","format":"csv","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":2277,"outputTokens":8610,"latencyMs":41204.4491669999},{"questionId":"q92","format":"xml","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":5495,"outputTokens":12674,"latencyMs":59980.40558399982},{"questionId":"q92","format":"yaml","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":3881,"outputTokens":5423,"latencyMs":27335.205374999903},{"questionId":"q93","format":"json-pretty","model":"gemini-3-flash-preview","expected":"9","actual":"9","isCorrect":true,"inputTokens":4852,"outputTokens":3484,"latencyMs":16504.15283300006},{"questionId":"q93","format":"json-compact","model":"gemini-3-flash-preview","expected":"9","actual":"9","isCorrect":true,"inputTokens":3100,"outputTokens":3330,"latencyMs":16408.33737500012},{"questionId":"q93","format":"toon","model":"gemini-3-flash-preview","expected":"9","actual":"9","isCorrect":true,"inputTokens":2363,"outputTokens":6409,"latencyMs":31941.1357499999},{"questionId":"q93","format":"csv","model":"gemini-3-flash-preview","expected":"9","actual":"9","isCorrect":true,"inputTokens":2277,"outputTokens":2998,"latencyMs":15110.609875000082},{"questionId":"q93","format":"xml","model":"gemini-3-flash-preview","expected":"9","actual":"9","isCorrect":true,"inputTokens":5495,"outputTokens":4221,"latencyMs":21624.425083999988},{"questionId":"q93","format":"yaml","model":"gemini-3-flash-preview","expected":"9","actual":"9","isCorrect":true,"inputTokens":3881,"outputTokens":5008,"latencyMs":25177.73974999995},{"questionId":"q94","format":"json-pretty","model":"gemini-3-flash-preview","expected":"31","actual":"31","isCorrect":true,"inputTokens":4854,"outputTokens":12810,"latencyMs":58066.875459},{"questionId":"q94","format":"json-compact","model":"gemini-3-flash-preview","expected":"31","actual":"31","isCorrect":true,"inputTokens":3102,"outputTokens":17452,"latencyMs":83096.03012500005},{"questionId":"q94","format":"toon","model":"gemini-3-flash-preview","expected":"31","actual":"31","isCorrect":true,"inputTokens":2365,"outputTokens":11515,"latencyMs":55915.19383300003},{"questionId":"q94","format":"csv","model":"gemini-3-flash-preview","expected":"31","actual":"31","isCorrect":true,"inputTokens":2279,"outputTokens":9584,"latencyMs":45677.51520899986},{"questionId":"q94","format":"xml","model":"gemini-3-flash-preview","expected":"31","actual":"31","isCorrect":true,"inputTokens":5497,"outputTokens":12458,"latencyMs":59882.34912499995},{"questionId":"q94","format":"yaml","model":"gemini-3-flash-preview","expected":"31","actual":"31","isCorrect":true,"inputTokens":3883,"outputTokens":10155,"latencyMs":49421.32391600008},{"questionId":"q95","format":"json-pretty","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":4854,"outputTokens":19415,"latencyMs":88079.928541},{"questionId":"q95","format":"json-compact","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":3102,"outputTokens":12142,"latencyMs":59118.61595800007},{"questionId":"q95","format":"toon","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":2365,"outputTokens":18314,"latencyMs":87911.21258299984},{"questionId":"q95","format":"csv","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":2279,"outputTokens":5962,"latencyMs":29477.809167},{"questionId":"q95","format":"xml","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":5497,"outputTokens":10896,"latencyMs":53725.84250000003},{"questionId":"q95","format":"yaml","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":3883,"outputTokens":9581,"latencyMs":47041.31004200014},{"questionId":"q96","format":"json-pretty","model":"gemini-3-flash-preview","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":20061,"outputTokens":368,"latencyMs":3275.9754170000087},{"questionId":"q96","format":"json-compact","model":"gemini-3-flash-preview","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":15099,"outputTokens":339,"latencyMs":2886.0949589998927},{"questionId":"q96","format":"toon","model":"gemini-3-flash-preview","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":12431,"outputTokens":587,"latencyMs":4363.1266250000335},{"questionId":"q96","format":"csv","model":"gemini-3-flash-preview","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":12278,"outputTokens":287,"latencyMs":3419.5997910001315},{"questionId":"q96","format":"xml","model":"gemini-3-flash-preview","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":21947,"outputTokens":364,"latencyMs":3803.6397500000894},{"questionId":"q96","format":"yaml","model":"gemini-3-flash-preview","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":17145,"outputTokens":128,"latencyMs":2026.3678749999963},{"questionId":"q97","format":"json-pretty","model":"gemini-3-flash-preview","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":20064,"outputTokens":210,"latencyMs":2619.5515419999138},{"questionId":"q97","format":"json-compact","model":"gemini-3-flash-preview","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":15102,"outputTokens":331,"latencyMs":2843.4305420001037},{"questionId":"q97","format":"toon","model":"gemini-3-flash-preview","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":12434,"outputTokens":833,"latencyMs":7584.43200000003},{"questionId":"q97","format":"csv","model":"gemini-3-flash-preview","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":12281,"outputTokens":590,"latencyMs":7030.619375000009},{"questionId":"q97","format":"xml","model":"gemini-3-flash-preview","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":21950,"outputTokens":377,"latencyMs":3602.929834000068},{"questionId":"q97","format":"yaml","model":"gemini-3-flash-preview","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":17148,"outputTokens":297,"latencyMs":3195.7054999999236},{"questionId":"q98","format":"json-pretty","model":"gemini-3-flash-preview","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":20058,"outputTokens":331,"latencyMs":3507.5251670000143},{"questionId":"q98","format":"json-compact","model":"gemini-3-flash-preview","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":15096,"outputTokens":279,"latencyMs":2730.177417000057},{"questionId":"q98","format":"toon","model":"gemini-3-flash-preview","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":12428,"outputTokens":487,"latencyMs":3778.1009579999372},{"questionId":"q98","format":"csv","model":"gemini-3-flash-preview","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":12275,"outputTokens":559,"latencyMs":7012.587832999881},{"questionId":"q98","format":"xml","model":"gemini-3-flash-preview","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":21944,"outputTokens":362,"latencyMs":3486.3610419998877},{"questionId":"q98","format":"yaml","model":"gemini-3-flash-preview","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":17142,"outputTokens":308,"latencyMs":3335.2515829999465},{"questionId":"q99","format":"json-pretty","model":"gemini-3-flash-preview","expected":"master","actual":"master","isCorrect":true,"inputTokens":20065,"outputTokens":279,"latencyMs":3803.778750000056},{"questionId":"q99","format":"json-compact","model":"gemini-3-flash-preview","expected":"master","actual":"master","isCorrect":true,"inputTokens":15103,"outputTokens":269,"latencyMs":2556.105082999915},{"questionId":"q99","format":"toon","model":"gemini-3-flash-preview","expected":"master","actual":"master","isCorrect":true,"inputTokens":12435,"outputTokens":530,"latencyMs":4303.937750000041},{"questionId":"q99","format":"csv","model":"gemini-3-flash-preview","expected":"master","actual":"master","isCorrect":true,"inputTokens":12282,"outputTokens":489,"latencyMs":5632.182875000173},{"questionId":"q99","format":"xml","model":"gemini-3-flash-preview","expected":"master","actual":"master","isCorrect":true,"inputTokens":21951,"outputTokens":437,"latencyMs":4391.142166000092},{"questionId":"q99","format":"yaml","model":"gemini-3-flash-preview","expected":"master","actual":"master","isCorrect":true,"inputTokens":17149,"outputTokens":305,"latencyMs":2996.1447920000646},{"questionId":"q100","format":"json-pretty","model":"gemini-3-flash-preview","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":20057,"outputTokens":295,"latencyMs":3074.485832999926},{"questionId":"q100","format":"json-compact","model":"gemini-3-flash-preview","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":15095,"outputTokens":251,"latencyMs":2522.9678330000024},{"questionId":"q100","format":"toon","model":"gemini-3-flash-preview","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":12427,"outputTokens":458,"latencyMs":4044.0953329999465},{"questionId":"q100","format":"csv","model":"gemini-3-flash-preview","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":12274,"outputTokens":449,"latencyMs":4740.3539999998175},{"questionId":"q100","format":"xml","model":"gemini-3-flash-preview","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":21943,"outputTokens":347,"latencyMs":5268.900499999989},{"questionId":"q100","format":"yaml","model":"gemini-3-flash-preview","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":17141,"outputTokens":302,"latencyMs":3375.2723749999423},{"questionId":"q101","format":"json-pretty","model":"gemini-3-flash-preview","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":20063,"outputTokens":382,"latencyMs":3648.6807500000577},{"questionId":"q101","format":"json-compact","model":"gemini-3-flash-preview","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":15101,"outputTokens":372,"latencyMs":3123.4450840000063},{"questionId":"q101","format":"toon","model":"gemini-3-flash-preview","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":12433,"outputTokens":549,"latencyMs":3993.329832999967},{"questionId":"q101","format":"csv","model":"gemini-3-flash-preview","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":12280,"outputTokens":553,"latencyMs":6161.550374999875},{"questionId":"q101","format":"xml","model":"gemini-3-flash-preview","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":21949,"outputTokens":378,"latencyMs":3448.810958000133},{"questionId":"q101","format":"yaml","model":"gemini-3-flash-preview","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":17147,"outputTokens":312,"latencyMs":3196.9216660000384},{"questionId":"q102","format":"json-pretty","model":"gemini-3-flash-preview","expected":"678","actual":"678","isCorrect":true,"inputTokens":20063,"outputTokens":304,"latencyMs":2753.285584000172},{"questionId":"q102","format":"json-compact","model":"gemini-3-flash-preview","expected":"678","actual":"678","isCorrect":true,"inputTokens":15101,"outputTokens":278,"latencyMs":3056.6362079998944},{"questionId":"q102","format":"toon","model":"gemini-3-flash-preview","expected":"678","actual":"678","isCorrect":true,"inputTokens":12433,"outputTokens":836,"latencyMs":5912.967791999923},{"questionId":"q102","format":"csv","model":"gemini-3-flash-preview","expected":"678","actual":"678","isCorrect":true,"inputTokens":12280,"outputTokens":742,"latencyMs":6940.559041000204},{"questionId":"q102","format":"xml","model":"gemini-3-flash-preview","expected":"678","actual":"678","isCorrect":true,"inputTokens":21949,"outputTokens":357,"latencyMs":3420.518582999939},{"questionId":"q102","format":"yaml","model":"gemini-3-flash-preview","expected":"678","actual":"678","isCorrect":true,"inputTokens":17147,"outputTokens":286,"latencyMs":3228.524791999953},{"questionId":"q103","format":"json-pretty","model":"gemini-3-flash-preview","expected":"main","actual":"main","isCorrect":true,"inputTokens":20059,"outputTokens":151,"latencyMs":2331.234625000041},{"questionId":"q103","format":"json-compact","model":"gemini-3-flash-preview","expected":"main","actual":"main","isCorrect":true,"inputTokens":15097,"outputTokens":310,"latencyMs":2850.6913749999367},{"questionId":"q103","format":"toon","model":"gemini-3-flash-preview","expected":"main","actual":"main","isCorrect":true,"inputTokens":12429,"outputTokens":399,"latencyMs":3272.5516659999266},{"questionId":"q103","format":"csv","model":"gemini-3-flash-preview","expected":"main","actual":"main","isCorrect":true,"inputTokens":12276,"outputTokens":477,"latencyMs":5130.917875000043},{"questionId":"q103","format":"xml","model":"gemini-3-flash-preview","expected":"main","actual":"main","isCorrect":true,"inputTokens":21945,"outputTokens":339,"latencyMs":3651.871207999997},{"questionId":"q103","format":"yaml","model":"gemini-3-flash-preview","expected":"main","actual":"main","isCorrect":true,"inputTokens":17143,"outputTokens":113,"latencyMs":4187.869750000071},{"questionId":"q104","format":"json-pretty","model":"gemini-3-flash-preview","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":20066,"outputTokens":414,"latencyMs":3960.3870000001043},{"questionId":"q104","format":"json-compact","model":"gemini-3-flash-preview","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":15104,"outputTokens":327,"latencyMs":3003.0286250000354},{"questionId":"q104","format":"toon","model":"gemini-3-flash-preview","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":12436,"outputTokens":1157,"latencyMs":8071.643667000113},{"questionId":"q104","format":"csv","model":"gemini-3-flash-preview","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":12283,"outputTokens":497,"latencyMs":5497.376291999826},{"questionId":"q104","format":"xml","model":"gemini-3-flash-preview","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":21952,"outputTokens":389,"latencyMs":4018.265083000064},{"questionId":"q104","format":"yaml","model":"gemini-3-flash-preview","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":17150,"outputTokens":304,"latencyMs":3488.251042000018},{"questionId":"q105","format":"json-pretty","model":"gemini-3-flash-preview","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":20061,"outputTokens":307,"latencyMs":3335.819958999986},{"questionId":"q105","format":"json-compact","model":"gemini-3-flash-preview","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":15099,"outputTokens":279,"latencyMs":2880.8569579999894},{"questionId":"q105","format":"toon","model":"gemini-3-flash-preview","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":12431,"outputTokens":663,"latencyMs":5555.758832999971},{"questionId":"q105","format":"csv","model":"gemini-3-flash-preview","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":12278,"outputTokens":526,"latencyMs":5690.138708000071},{"questionId":"q105","format":"xml","model":"gemini-3-flash-preview","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":21947,"outputTokens":166,"latencyMs":2661.0854580001906},{"questionId":"q105","format":"yaml","model":"gemini-3-flash-preview","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":17145,"outputTokens":280,"latencyMs":3149.7653339998797},{"questionId":"q106","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":20064,"outputTokens":355,"latencyMs":3686.7114580001216},{"questionId":"q106","format":"json-compact","model":"gemini-3-flash-preview","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":15102,"outputTokens":375,"latencyMs":3059.123250000179},{"questionId":"q106","format":"toon","model":"gemini-3-flash-preview","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":12434,"outputTokens":1062,"latencyMs":6691.106749999803},{"questionId":"q106","format":"csv","model":"gemini-3-flash-preview","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":12281,"outputTokens":459,"latencyMs":5283.2788749998435},{"questionId":"q106","format":"xml","model":"gemini-3-flash-preview","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":21950,"outputTokens":344,"latencyMs":3704.6703340001404},{"questionId":"q106","format":"yaml","model":"gemini-3-flash-preview","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":17148,"outputTokens":363,"latencyMs":3662.9946250000503},{"questionId":"q107","format":"json-pretty","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":20056,"outputTokens":1089,"latencyMs":6864.773415999953},{"questionId":"q107","format":"json-compact","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":15094,"outputTokens":1526,"latencyMs":9207.031332999934},{"questionId":"q107","format":"toon","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":12426,"outputTokens":1359,"latencyMs":9062.027125000022},{"questionId":"q107","format":"csv","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":12273,"outputTokens":1806,"latencyMs":14685.348665999947},{"questionId":"q107","format":"xml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":21942,"outputTokens":1976,"latencyMs":13114.445250000106},{"questionId":"q107","format":"yaml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":17140,"outputTokens":1208,"latencyMs":8724.333208000055},{"questionId":"q108","format":"json-pretty","model":"gemini-3-flash-preview","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":20059,"outputTokens":5726,"latencyMs":31383.031709000003},{"questionId":"q108","format":"json-compact","model":"gemini-3-flash-preview","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":15097,"outputTokens":15927,"latencyMs":83352.50908400002},{"questionId":"q108","format":"toon","model":"gemini-3-flash-preview","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":12429,"outputTokens":14642,"latencyMs":76793.0145419999},{"questionId":"q108","format":"csv","model":"gemini-3-flash-preview","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":12276,"outputTokens":15582,"latencyMs":119955.48891700013},{"questionId":"q108","format":"xml","model":"gemini-3-flash-preview","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":21945,"outputTokens":13882,"latencyMs":82885.28808300011},{"questionId":"q108","format":"yaml","model":"gemini-3-flash-preview","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":17143,"outputTokens":9705,"latencyMs":58469.82349999994},{"questionId":"q109","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":20059,"outputTokens":13535,"latencyMs":71802.53425000003},{"questionId":"q109","format":"json-compact","model":"gemini-3-flash-preview","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":15097,"outputTokens":15321,"latencyMs":79592.80083299987},{"questionId":"q109","format":"toon","model":"gemini-3-flash-preview","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":12429,"outputTokens":26445,"latencyMs":138743.61295900005},{"questionId":"q109","format":"csv","model":"gemini-3-flash-preview","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":12276,"outputTokens":13615,"latencyMs":100573.19325000001},{"questionId":"q109","format":"xml","model":"gemini-3-flash-preview","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":21945,"outputTokens":15448,"latencyMs":92402.44233300001},{"questionId":"q109","format":"yaml","model":"gemini-3-flash-preview","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":17143,"outputTokens":14660,"latencyMs":91427.13770799991},{"questionId":"q110","format":"json-pretty","model":"gemini-3-flash-preview","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":20058,"outputTokens":19797,"latencyMs":106845.49187499983},{"questionId":"q110","format":"json-compact","model":"gemini-3-flash-preview","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":15096,"outputTokens":7409,"latencyMs":39714.11279199994},{"questionId":"q110","format":"toon","model":"gemini-3-flash-preview","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":12428,"outputTokens":14679,"latencyMs":76982.10566700017},{"questionId":"q110","format":"csv","model":"gemini-3-flash-preview","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":12275,"outputTokens":23414,"latencyMs":174875.65070800018},{"questionId":"q110","format":"xml","model":"gemini-3-flash-preview","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":21944,"outputTokens":25851,"latencyMs":159788.21325000003},{"questionId":"q110","format":"yaml","model":"gemini-3-flash-preview","expected":"154136","actual":"154185.63","isCorrect":false,"inputTokens":17142,"outputTokens":10394,"latencyMs":62433.57787499996},{"questionId":"q111","format":"json-pretty","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":20060,"outputTokens":3877,"latencyMs":21225.32150000008},{"questionId":"q111","format":"json-compact","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":15098,"outputTokens":8925,"latencyMs":48984.84008300002},{"questionId":"q111","format":"toon","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":12430,"outputTokens":8459,"latencyMs":45477.47308299993},{"questionId":"q111","format":"csv","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":12277,"outputTokens":8758,"latencyMs":63605.94079099991},{"questionId":"q111","format":"xml","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":21946,"outputTokens":10094,"latencyMs":62024.501332999906},{"questionId":"q111","format":"yaml","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":17144,"outputTokens":5849,"latencyMs":35764.14533299999},{"questionId":"q112","format":"json-pretty","model":"gemini-3-flash-preview","expected":"53","actual":"53","isCorrect":true,"inputTokens":20060,"outputTokens":4375,"latencyMs":25708.430333000142},{"questionId":"q112","format":"json-compact","model":"gemini-3-flash-preview","expected":"53","actual":"55","isCorrect":false,"inputTokens":15098,"outputTokens":7221,"latencyMs":38571.22424999997},{"questionId":"q112","format":"toon","model":"gemini-3-flash-preview","expected":"53","actual":"53","isCorrect":true,"inputTokens":12430,"outputTokens":16826,"latencyMs":87263.43662499986},{"questionId":"q112","format":"csv","model":"gemini-3-flash-preview","expected":"53","actual":"53","isCorrect":true,"inputTokens":12277,"outputTokens":10953,"latencyMs":81493.85495900013},{"questionId":"q112","format":"xml","model":"gemini-3-flash-preview","expected":"53","actual":"53","isCorrect":true,"inputTokens":21946,"outputTokens":7502,"latencyMs":46489.75708299992},{"questionId":"q112","format":"yaml","model":"gemini-3-flash-preview","expected":"53","actual":"53","isCorrect":true,"inputTokens":17144,"outputTokens":22521,"latencyMs":138532.84033400007},{"questionId":"q113","format":"json-pretty","model":"gemini-3-flash-preview","expected":"77","actual":"77","isCorrect":true,"inputTokens":20063,"outputTokens":3380,"latencyMs":19474.907166999998},{"questionId":"q113","format":"json-compact","model":"gemini-3-flash-preview","expected":"77","actual":"77","isCorrect":true,"inputTokens":15101,"outputTokens":9797,"latencyMs":51337.08504200005},{"questionId":"q113","format":"toon","model":"gemini-3-flash-preview","expected":"77","actual":"77","isCorrect":true,"inputTokens":12433,"outputTokens":5676,"latencyMs":30308.65554100019},{"questionId":"q113","format":"csv","model":"gemini-3-flash-preview","expected":"77","actual":"77","isCorrect":true,"inputTokens":12280,"outputTokens":8687,"latencyMs":64769.25004099985},{"questionId":"q113","format":"xml","model":"gemini-3-flash-preview","expected":"77","actual":"77","isCorrect":true,"inputTokens":21949,"outputTokens":10329,"latencyMs":64240.5879579999},{"questionId":"q113","format":"yaml","model":"gemini-3-flash-preview","expected":"77","actual":"77","isCorrect":true,"inputTokens":17147,"outputTokens":6667,"latencyMs":41567.80608300003},{"questionId":"q114","format":"json-pretty","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":20063,"outputTokens":1183,"latencyMs":8509.001875000307},{"questionId":"q114","format":"json-compact","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":15101,"outputTokens":1906,"latencyMs":11160.206208999734},{"questionId":"q114","format":"toon","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":12433,"outputTokens":3053,"latencyMs":16863.59800000023},{"questionId":"q114","format":"csv","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":12280,"outputTokens":4704,"latencyMs":37698.24920800002},{"questionId":"q114","format":"xml","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":21949,"outputTokens":2804,"latencyMs":17786.60029199999},{"questionId":"q114","format":"yaml","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":17147,"outputTokens":1307,"latencyMs":9145.139749999624},{"questionId":"q115","format":"json-pretty","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":20063,"outputTokens":1112,"latencyMs":7486.016334000044},{"questionId":"q115","format":"json-compact","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":15101,"outputTokens":1227,"latencyMs":8173.635000000242},{"questionId":"q115","format":"toon","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":12433,"outputTokens":1472,"latencyMs":11321.796833000146},{"questionId":"q115","format":"csv","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":12280,"outputTokens":1027,"latencyMs":8629.404999999795},{"questionId":"q115","format":"xml","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":21949,"outputTokens":1227,"latencyMs":8906.687082999852},{"questionId":"q115","format":"yaml","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":17147,"outputTokens":2133,"latencyMs":14624.23195900023},{"questionId":"q116","format":"json-pretty","model":"gemini-3-flash-preview","expected":"49","actual":"49","isCorrect":true,"inputTokens":20062,"outputTokens":18791,"latencyMs":96396.51483300002},{"questionId":"q116","format":"json-compact","model":"gemini-3-flash-preview","expected":"49","actual":"49","isCorrect":true,"inputTokens":15100,"outputTokens":22416,"latencyMs":115972.16949999984},{"questionId":"q116","format":"toon","model":"gemini-3-flash-preview","expected":"49","actual":"49","isCorrect":true,"inputTokens":12432,"outputTokens":16355,"latencyMs":84317.18258300005},{"questionId":"q116","format":"csv","model":"gemini-3-flash-preview","expected":"49","actual":"49","isCorrect":true,"inputTokens":12279,"outputTokens":16434,"latencyMs":121070.73441699985},{"questionId":"q116","format":"xml","model":"gemini-3-flash-preview","expected":"49","actual":"49","isCorrect":true,"inputTokens":21948,"outputTokens":10321,"latencyMs":62978.07237499999},{"questionId":"q116","format":"yaml","model":"gemini-3-flash-preview","expected":"49","actual":"49","isCorrect":true,"inputTokens":17146,"outputTokens":12161,"latencyMs":74617.54583299998},{"questionId":"q117","format":"json-pretty","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":20062,"outputTokens":4951,"latencyMs":26968.749374999665},{"questionId":"q117","format":"json-compact","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":15100,"outputTokens":6042,"latencyMs":31304.52966699982},{"questionId":"q117","format":"toon","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":12432,"outputTokens":15115,"latencyMs":81497.56237499975},{"questionId":"q117","format":"csv","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":12279,"outputTokens":14830,"latencyMs":114598.21562500019},{"questionId":"q117","format":"xml","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":21948,"outputTokens":5084,"latencyMs":31102.013125000056},{"questionId":"q117","format":"yaml","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":17146,"outputTokens":5562,"latencyMs":35271.17933299998},{"questionId":"q118","format":"json-pretty","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":20061,"outputTokens":2330,"latencyMs":15030.119249999989},{"questionId":"q118","format":"json-compact","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":15099,"outputTokens":2889,"latencyMs":16500.675624999683},{"questionId":"q118","format":"toon","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":12431,"outputTokens":3077,"latencyMs":16814.760708000045},{"questionId":"q118","format":"csv","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":12278,"outputTokens":2834,"latencyMs":23582.076708000153},{"questionId":"q118","format":"xml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":21947,"outputTokens":3361,"latencyMs":21489.94058299996},{"questionId":"q118","format":"yaml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":17145,"outputTokens":2525,"latencyMs":16160.734375},{"questionId":"q119","format":"json-pretty","model":"gemini-3-flash-preview","expected":"57","actual":"57","isCorrect":true,"inputTokens":20072,"outputTokens":10374,"latencyMs":54647.002125000115},{"questionId":"q119","format":"json-compact","model":"gemini-3-flash-preview","expected":"57","actual":"57","isCorrect":true,"inputTokens":15110,"outputTokens":14983,"latencyMs":78034.23604199989},{"questionId":"q119","format":"toon","model":"gemini-3-flash-preview","expected":"57","actual":"57","isCorrect":true,"inputTokens":12442,"outputTokens":16898,"latencyMs":88867.03208300006},{"questionId":"q119","format":"csv","model":"gemini-3-flash-preview","expected":"57","actual":"57","isCorrect":true,"inputTokens":12289,"outputTokens":19038,"latencyMs":146344.30866600014},{"questionId":"q119","format":"xml","model":"gemini-3-flash-preview","expected":"57","actual":"57","isCorrect":true,"inputTokens":21958,"outputTokens":12176,"latencyMs":72528.92975000013},{"questionId":"q119","format":"yaml","model":"gemini-3-flash-preview","expected":"57","actual":"57","isCorrect":true,"inputTokens":17156,"outputTokens":15805,"latencyMs":98779.64145899983},{"questionId":"q120","format":"json-pretty","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":20073,"outputTokens":15742,"latencyMs":83371.43599999975},{"questionId":"q120","format":"json-compact","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":15111,"outputTokens":19185,"latencyMs":100630.21837500017},{"questionId":"q120","format":"toon","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":12443,"outputTokens":19468,"latencyMs":101790.76791699976},{"questionId":"q120","format":"csv","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":12290,"outputTokens":20826,"latencyMs":158435.00495800003},{"questionId":"q120","format":"xml","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":21959,"outputTokens":9706,"latencyMs":58976.20620799996},{"questionId":"q120","format":"yaml","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":17157,"outputTokens":13271,"latencyMs":79840.96987499995},{"questionId":"q121","format":"json-pretty","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":20073,"outputTokens":10340,"latencyMs":54298.25204200018},{"questionId":"q121","format":"json-compact","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":15111,"outputTokens":6370,"latencyMs":34149.41604199959},{"questionId":"q121","format":"toon","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":12443,"outputTokens":13374,"latencyMs":70759.14558300003},{"questionId":"q121","format":"csv","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":12290,"outputTokens":10680,"latencyMs":79050.34208299965},{"questionId":"q121","format":"xml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":21959,"outputTokens":8704,"latencyMs":54347.43133299984},{"questionId":"q121","format":"yaml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":17157,"outputTokens":3836,"latencyMs":24404.290165999904},{"questionId":"q122","format":"json-pretty","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":20072,"outputTokens":8597,"latencyMs":45838.55229200004},{"questionId":"q122","format":"json-compact","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":15110,"outputTokens":4847,"latencyMs":26016.524833000265},{"questionId":"q122","format":"toon","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":12442,"outputTokens":5150,"latencyMs":27610.3652499998},{"questionId":"q122","format":"csv","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":12289,"outputTokens":21062,"latencyMs":154955.62645799993},{"questionId":"q122","format":"xml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":21958,"outputTokens":6566,"latencyMs":40338.911374999676},{"questionId":"q122","format":"yaml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":17156,"outputTokens":4538,"latencyMs":27895.8099580002},{"questionId":"q123","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":20072,"outputTokens":4105,"latencyMs":22319.978999999817},{"questionId":"q123","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":15110,"outputTokens":10182,"latencyMs":52993.30112500023},{"questionId":"q123","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":12442,"outputTokens":18289,"latencyMs":96740.66679200018},{"questionId":"q123","format":"csv","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":12289,"outputTokens":4838,"latencyMs":35836.71829200024},{"questionId":"q123","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":21958,"outputTokens":2869,"latencyMs":18574.773374999873},{"questionId":"q123","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":17156,"outputTokens":1579,"latencyMs":10453.930792000145},{"questionId":"q124","format":"json-pretty","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":8608,"outputTokens":267,"latencyMs":2424.0212500002235},{"questionId":"q124","format":"json-compact","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":5935,"outputTokens":383,"latencyMs":2769.5240000002086},{"questionId":"q124","format":"toon","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":7062,"outputTokens":285,"latencyMs":3217.1704579996876},{"questionId":"q124","format":"xml","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":9431,"outputTokens":328,"latencyMs":2869.171916999854},{"questionId":"q124","format":"yaml","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":6993,"outputTokens":289,"latencyMs":2557.126457999926},{"questionId":"q125","format":"json-pretty","model":"gemini-3-flash-preview","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":8608,"outputTokens":318,"latencyMs":3223.1263749999925},{"questionId":"q125","format":"json-compact","model":"gemini-3-flash-preview","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":5935,"outputTokens":429,"latencyMs":3635.5987909999676},{"questionId":"q125","format":"toon","model":"gemini-3-flash-preview","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":7062,"outputTokens":477,"latencyMs":4300.350459000096},{"questionId":"q125","format":"xml","model":"gemini-3-flash-preview","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":9431,"outputTokens":339,"latencyMs":3100.7087920000777},{"questionId":"q125","format":"yaml","model":"gemini-3-flash-preview","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":6993,"outputTokens":383,"latencyMs":2880.1571249999106},{"questionId":"q126","format":"json-pretty","model":"gemini-3-flash-preview","expected":"297","actual":"297","isCorrect":true,"inputTokens":8609,"outputTokens":295,"latencyMs":2598.6074999999255},{"questionId":"q126","format":"json-compact","model":"gemini-3-flash-preview","expected":"297","actual":"297","isCorrect":true,"inputTokens":5936,"outputTokens":705,"latencyMs":4438.443750000093},{"questionId":"q126","format":"toon","model":"gemini-3-flash-preview","expected":"297","actual":"297","isCorrect":true,"inputTokens":7063,"outputTokens":242,"latencyMs":2333.717665999662},{"questionId":"q126","format":"xml","model":"gemini-3-flash-preview","expected":"297","actual":"297","isCorrect":true,"inputTokens":9432,"outputTokens":252,"latencyMs":2550.3124170000665},{"questionId":"q126","format":"yaml","model":"gemini-3-flash-preview","expected":"297","actual":"297","isCorrect":true,"inputTokens":6994,"outputTokens":281,"latencyMs":2307.0850829998963},{"questionId":"q127","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":8609,"outputTokens":231,"latencyMs":2298.9086250001565},{"questionId":"q127","format":"json-compact","model":"gemini-3-flash-preview","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":5936,"outputTokens":690,"latencyMs":3840.421374999918},{"questionId":"q127","format":"toon","model":"gemini-3-flash-preview","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":7063,"outputTokens":410,"latencyMs":3249.4291249997914},{"questionId":"q127","format":"xml","model":"gemini-3-flash-preview","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":9432,"outputTokens":268,"latencyMs":2453.096334000118},{"questionId":"q127","format":"yaml","model":"gemini-3-flash-preview","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":6994,"outputTokens":242,"latencyMs":2304.7999580004252},{"questionId":"q128","format":"json-pretty","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":8608,"outputTokens":349,"latencyMs":3285.871041000355},{"questionId":"q128","format":"json-compact","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":5935,"outputTokens":249,"latencyMs":2135.6634579999372},{"questionId":"q128","format":"toon","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":7062,"outputTokens":232,"latencyMs":2489.8922919998877},{"questionId":"q128","format":"xml","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":9431,"outputTokens":263,"latencyMs":2909.1205829996616},{"questionId":"q128","format":"yaml","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":6993,"outputTokens":353,"latencyMs":2890.1726660002023},{"questionId":"q129","format":"json-pretty","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":8608,"outputTokens":238,"latencyMs":2798.9996670000255},{"questionId":"q129","format":"json-compact","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":5935,"outputTokens":338,"latencyMs":2851.1660830001347},{"questionId":"q129","format":"toon","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":7062,"outputTokens":224,"latencyMs":2713.321124999784},{"questionId":"q129","format":"xml","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":9431,"outputTokens":167,"latencyMs":2212.3508750000037},{"questionId":"q129","format":"yaml","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":6993,"outputTokens":307,"latencyMs":2859.359375},{"questionId":"q130","format":"json-pretty","model":"gemini-3-flash-preview","expected":"298","actual":"298","isCorrect":true,"inputTokens":8609,"outputTokens":289,"latencyMs":3026.115749999881},{"questionId":"q130","format":"json-compact","model":"gemini-3-flash-preview","expected":"298","actual":"298","isCorrect":true,"inputTokens":5936,"outputTokens":183,"latencyMs":1973.9259999999776},{"questionId":"q130","format":"toon","model":"gemini-3-flash-preview","expected":"298","actual":"298","isCorrect":true,"inputTokens":7063,"outputTokens":394,"latencyMs":3659.440957999788},{"questionId":"q130","format":"xml","model":"gemini-3-flash-preview","expected":"298","actual":"298","isCorrect":true,"inputTokens":9432,"outputTokens":256,"latencyMs":3382.314666000195},{"questionId":"q130","format":"yaml","model":"gemini-3-flash-preview","expected":"298","actual":"298","isCorrect":true,"inputTokens":6994,"outputTokens":259,"latencyMs":2465.3808339997195},{"questionId":"q131","format":"json-pretty","model":"gemini-3-flash-preview","expected":"398","actual":"398","isCorrect":true,"inputTokens":8609,"outputTokens":286,"latencyMs":3041.4478339999914},{"questionId":"q131","format":"json-compact","model":"gemini-3-flash-preview","expected":"398","actual":"398","isCorrect":true,"inputTokens":5936,"outputTokens":325,"latencyMs":2796.949624999892},{"questionId":"q131","format":"toon","model":"gemini-3-flash-preview","expected":"398","actual":"398","isCorrect":true,"inputTokens":7063,"outputTokens":282,"latencyMs":2852.4671669998206},{"questionId":"q131","format":"xml","model":"gemini-3-flash-preview","expected":"398","actual":"398","isCorrect":true,"inputTokens":9432,"outputTokens":333,"latencyMs":3065.3007499999367},{"questionId":"q131","format":"yaml","model":"gemini-3-flash-preview","expected":"398","actual":"398","isCorrect":true,"inputTokens":6994,"outputTokens":297,"latencyMs":2544.2407920002006},{"questionId":"q132","format":"json-pretty","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":8608,"outputTokens":180,"latencyMs":2260.6082500000484},{"questionId":"q132","format":"json-compact","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":5935,"outputTokens":168,"latencyMs":1874.333333000075},{"questionId":"q132","format":"toon","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":7062,"outputTokens":188,"latencyMs":2354.53370800009},{"questionId":"q132","format":"xml","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":9431,"outputTokens":247,"latencyMs":2749.997084000148},{"questionId":"q132","format":"yaml","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":6993,"outputTokens":255,"latencyMs":2349.754457999952},{"questionId":"q133","format":"json-pretty","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":8608,"outputTokens":356,"latencyMs":3148.9514159997925},{"questionId":"q133","format":"json-compact","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":5935,"outputTokens":281,"latencyMs":2589.4035420003347},{"questionId":"q133","format":"toon","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":7062,"outputTokens":281,"latencyMs":2554.130750000011},{"questionId":"q133","format":"xml","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":9431,"outputTokens":258,"latencyMs":2656.17191700032},{"questionId":"q133","format":"yaml","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":6993,"outputTokens":283,"latencyMs":2506.751500000246},{"questionId":"q134","format":"json-pretty","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":8583,"outputTokens":5157,"latencyMs":28712.846333000343},{"questionId":"q134","format":"json-compact","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":5910,"outputTokens":7487,"latencyMs":36580.23633399978},{"questionId":"q134","format":"toon","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":7037,"outputTokens":2525,"latencyMs":13479.526708999649},{"questionId":"q134","format":"xml","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":9406,"outputTokens":6391,"latencyMs":33424.45512499986},{"questionId":"q134","format":"yaml","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":6968,"outputTokens":10872,"latencyMs":48255.78658300033},{"questionId":"q135","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2665.00","actual":"2665","isCorrect":true,"inputTokens":8584,"outputTokens":11831,"latencyMs":64820.602624999825},{"questionId":"q135","format":"json-compact","model":"gemini-3-flash-preview","expected":"2665.00","actual":"2665","isCorrect":true,"inputTokens":5911,"outputTokens":10114,"latencyMs":49019.72883299971},{"questionId":"q135","format":"toon","model":"gemini-3-flash-preview","expected":"2665.00","actual":"2665","isCorrect":true,"inputTokens":7038,"outputTokens":43656,"latencyMs":199324.32404199988},{"questionId":"q135","format":"xml","model":"gemini-3-flash-preview","expected":"2665.00","actual":"2665","isCorrect":true,"inputTokens":9407,"outputTokens":10414,"latencyMs":54203.37504100008},{"questionId":"q135","format":"yaml","model":"gemini-3-flash-preview","expected":"2665.00","actual":"2665","isCorrect":true,"inputTokens":6969,"outputTokens":12477,"latencyMs":55254.101749999914},{"questionId":"q136","format":"json-pretty","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":8583,"outputTokens":18508,"latencyMs":100329.93887499999},{"questionId":"q136","format":"json-compact","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":5910,"outputTokens":14007,"latencyMs":67178.57879200019},{"questionId":"q136","format":"toon","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":7037,"outputTokens":14524,"latencyMs":69145.21383399982},{"questionId":"q136","format":"xml","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":9406,"outputTokens":15462,"latencyMs":82450.48729200009},{"questionId":"q136","format":"yaml","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":6968,"outputTokens":10848,"latencyMs":48664.41645799996},{"questionId":"q137","format":"json-pretty","model":"gemini-3-flash-preview","expected":"30","actual":"30","isCorrect":true,"inputTokens":8583,"outputTokens":5620,"latencyMs":30888.40512500005},{"questionId":"q137","format":"json-compact","model":"gemini-3-flash-preview","expected":"30","actual":"30","isCorrect":true,"inputTokens":5910,"outputTokens":7693,"latencyMs":36660.243540999945},{"questionId":"q137","format":"toon","model":"gemini-3-flash-preview","expected":"30","actual":"30","isCorrect":true,"inputTokens":7037,"outputTokens":2920,"latencyMs":15143.715332999825},{"questionId":"q137","format":"xml","model":"gemini-3-flash-preview","expected":"30","actual":"30","isCorrect":true,"inputTokens":9406,"outputTokens":8984,"latencyMs":46835.20883300016},{"questionId":"q137","format":"yaml","model":"gemini-3-flash-preview","expected":"30","actual":"30","isCorrect":true,"inputTokens":6968,"outputTokens":6174,"latencyMs":28497.19212500006},{"questionId":"q138","format":"json-pretty","model":"gemini-3-flash-preview","expected":"19","actual":"19","isCorrect":true,"inputTokens":8583,"outputTokens":9725,"latencyMs":54943.2827920001},{"questionId":"q138","format":"json-compact","model":"gemini-3-flash-preview","expected":"19","actual":"19","isCorrect":true,"inputTokens":5910,"outputTokens":8986,"latencyMs":43876.19408400031},{"questionId":"q138","format":"toon","model":"gemini-3-flash-preview","expected":"19","actual":"19","isCorrect":true,"inputTokens":7037,"outputTokens":6542,"latencyMs":31148.786208000034},{"questionId":"q138","format":"xml","model":"gemini-3-flash-preview","expected":"19","actual":"19","isCorrect":true,"inputTokens":9406,"outputTokens":16071,"latencyMs":82436.98754200013},{"questionId":"q138","format":"yaml","model":"gemini-3-flash-preview","expected":"19","actual":"19","isCorrect":true,"inputTokens":6968,"outputTokens":7150,"latencyMs":32330.752042000182},{"questionId":"q139","format":"json-pretty","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":8586,"outputTokens":5601,"latencyMs":32229.26049999986},{"questionId":"q139","format":"json-compact","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":5913,"outputTokens":11803,"latencyMs":57398.489041999914},{"questionId":"q139","format":"toon","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":7040,"outputTokens":4700,"latencyMs":23756.146958000027},{"questionId":"q139","format":"xml","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":9409,"outputTokens":6010,"latencyMs":31691.516667000018},{"questionId":"q139","format":"yaml","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":6971,"outputTokens":4894,"latencyMs":22449.997041999828},{"questionId":"q140","format":"json-pretty","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":8586,"outputTokens":3607,"latencyMs":21293.889042000286},{"questionId":"q140","format":"json-compact","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":5913,"outputTokens":7229,"latencyMs":35180.18087500008},{"questionId":"q140","format":"toon","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":7040,"outputTokens":4773,"latencyMs":24345.19016700005},{"questionId":"q140","format":"xml","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":9409,"outputTokens":5434,"latencyMs":28506.110791000072},{"questionId":"q140","format":"yaml","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":6971,"outputTokens":8644,"latencyMs":39661.89812499983},{"questionId":"q141","format":"json-pretty","model":"gemini-3-flash-preview","expected":"33","actual":"33","isCorrect":true,"inputTokens":8592,"outputTokens":14362,"latencyMs":81133.86466600001},{"questionId":"q141","format":"json-compact","model":"gemini-3-flash-preview","expected":"33","actual":"33","isCorrect":true,"inputTokens":5919,"outputTokens":14987,"latencyMs":104340.38850000035},{"questionId":"q141","format":"toon","model":"gemini-3-flash-preview","expected":"33","actual":"33","isCorrect":true,"inputTokens":7046,"outputTokens":14072,"latencyMs":69169.39199999999},{"questionId":"q141","format":"xml","model":"gemini-3-flash-preview","expected":"33","actual":"33","isCorrect":true,"inputTokens":9415,"outputTokens":15517,"latencyMs":80654.56216700003},{"questionId":"q141","format":"yaml","model":"gemini-3-flash-preview","expected":"33","actual":"33","isCorrect":true,"inputTokens":6977,"outputTokens":8356,"latencyMs":38407.33191600023},{"questionId":"q142","format":"json-pretty","model":"gemini-3-flash-preview","expected":"42","actual":"42","isCorrect":true,"inputTokens":8592,"outputTokens":7512,"latencyMs":42338.74445799971},{"questionId":"q142","format":"json-compact","model":"gemini-3-flash-preview","expected":"42","actual":"42","isCorrect":true,"inputTokens":5919,"outputTokens":14699,"latencyMs":102866.6781250001},{"questionId":"q142","format":"toon","model":"gemini-3-flash-preview","expected":"42","actual":"42","isCorrect":true,"inputTokens":7046,"outputTokens":10024,"latencyMs":48049.216624999885},{"questionId":"q142","format":"xml","model":"gemini-3-flash-preview","expected":"42","actual":"42","isCorrect":true,"inputTokens":9415,"outputTokens":4110,"latencyMs":22297.574332999997},{"questionId":"q142","format":"yaml","model":"gemini-3-flash-preview","expected":"42","actual":"42","isCorrect":true,"inputTokens":6977,"outputTokens":8176,"latencyMs":37879.22820900008},{"questionId":"q143","format":"json-pretty","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":8584,"outputTokens":8433,"latencyMs":48164.083540999796},{"questionId":"q143","format":"json-compact","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":5911,"outputTokens":9618,"latencyMs":67983.92641700013},{"questionId":"q143","format":"toon","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":7038,"outputTokens":16426,"latencyMs":76317.16970800003},{"questionId":"q143","format":"xml","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":9407,"outputTokens":8567,"latencyMs":44877.11583300028},{"questionId":"q143","format":"yaml","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":6969,"outputTokens":11861,"latencyMs":53283.976916999556},{"questionId":"q144","format":"json-pretty","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":8592,"outputTokens":12164,"latencyMs":68852.63249999983},{"questionId":"q144","format":"json-compact","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":5919,"outputTokens":11643,"latencyMs":56538.2484579999},{"questionId":"q144","format":"toon","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":7046,"outputTokens":5223,"latencyMs":27300.982749999966},{"questionId":"q144","format":"xml","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":9415,"outputTokens":10864,"latencyMs":56605.348084000405},{"questionId":"q144","format":"yaml","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":6977,"outputTokens":12708,"latencyMs":57055.8494160003},{"questionId":"q145","format":"json-pretty","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":8592,"outputTokens":5474,"latencyMs":31819.555292000063},{"questionId":"q145","format":"json-compact","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":5919,"outputTokens":6151,"latencyMs":30285.51412500022},{"questionId":"q145","format":"toon","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":7046,"outputTokens":5350,"latencyMs":25504.35120799998},{"questionId":"q145","format":"xml","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":9415,"outputTokens":6781,"latencyMs":34964.004333000164},{"questionId":"q145","format":"yaml","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":6977,"outputTokens":5966,"latencyMs":27963.87108300021},{"questionId":"q146","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":8595,"outputTokens":6499,"latencyMs":39847.565542000346},{"questionId":"q146","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":5922,"outputTokens":8538,"latencyMs":41626.66154199978},{"questionId":"q146","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":7049,"outputTokens":8894,"latencyMs":41383.13070899993},{"questionId":"q146","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":9418,"outputTokens":15584,"latencyMs":83139.89533400023},{"questionId":"q146","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":6980,"outputTokens":11573,"latencyMs":53524.50162500003},{"questionId":"q147","format":"json-pretty","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":8595,"outputTokens":5956,"latencyMs":34166.95324999979},{"questionId":"q147","format":"json-compact","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":5922,"outputTokens":2397,"latencyMs":12619.9861659999},{"questionId":"q147","format":"toon","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":7049,"outputTokens":9155,"latencyMs":43449.04179200018},{"questionId":"q147","format":"xml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":9418,"outputTokens":5531,"latencyMs":29418.45379099995},{"questionId":"q147","format":"yaml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":6980,"outputTokens":6409,"latencyMs":29086.21683300007},{"questionId":"q148","format":"json-pretty","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":8595,"outputTokens":7705,"latencyMs":43872.01258300012},{"questionId":"q148","format":"json-compact","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":5922,"outputTokens":7170,"latencyMs":35578.25637499988},{"questionId":"q148","format":"toon","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":7049,"outputTokens":9908,"latencyMs":46831.90787500003},{"questionId":"q148","format":"xml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":9418,"outputTokens":19837,"latencyMs":103368.64858300006},{"questionId":"q148","format":"yaml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":6980,"outputTokens":15097,"latencyMs":68301.71233400004},{"questionId":"q149","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":8591,"outputTokens":9884,"latencyMs":56434.583709000144},{"questionId":"q149","format":"json-compact","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":5918,"outputTokens":12809,"latencyMs":63979.966874999925},{"questionId":"q149","format":"toon","model":"gemini-3-flash-preview","expected":"2","actual":"Answer: 2","isCorrect":true,"inputTokens":7045,"outputTokens":8870,"latencyMs":40109.1437090002},{"questionId":"q149","format":"xml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":9414,"outputTokens":9366,"latencyMs":49063.199750000145},{"questionId":"q149","format":"yaml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":6976,"outputTokens":8560,"latencyMs":38944.46016700007},{"questionId":"q150","format":"json-pretty","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":8591,"outputTokens":10687,"latencyMs":60147.765959000215},{"questionId":"q150","format":"json-compact","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":5918,"outputTokens":21709,"latencyMs":106119.25466600014},{"questionId":"q150","format":"toon","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":7045,"outputTokens":12294,"latencyMs":59010.86845900025},{"questionId":"q150","format":"xml","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":9414,"outputTokens":5382,"latencyMs":28824.669832999818},{"questionId":"q150","format":"yaml","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":6976,"outputTokens":17566,"latencyMs":79004.90779099986},{"questionId":"q151","format":"json-pretty","model":"gemini-3-flash-preview","expected":"development","actual":"development","isCorrect":true,"inputTokens":1237,"outputTokens":94,"latencyMs":1356.483208999969},{"questionId":"q151","format":"json-compact","model":"gemini-3-flash-preview","expected":"development","actual":"development","isCorrect":true,"inputTokens":722,"outputTokens":178,"latencyMs":1744.4732080004178},{"questionId":"q151","format":"toon","model":"gemini-3-flash-preview","expected":"development","actual":"development","isCorrect":true,"inputTokens":863,"outputTokens":69,"latencyMs":1523.4334580004215},{"questionId":"q151","format":"xml","model":"gemini-3-flash-preview","expected":"development","actual":"development","isCorrect":true,"inputTokens":1296,"outputTokens":115,"latencyMs":1761.98837500019},{"questionId":"q151","format":"yaml","model":"gemini-3-flash-preview","expected":"development","actual":"development","isCorrect":true,"inputTokens":886,"outputTokens":159,"latencyMs":1813.217042000033},{"questionId":"q152","format":"json-pretty","model":"gemini-3-flash-preview","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":1235,"outputTokens":165,"latencyMs":1766.4899579999037},{"questionId":"q152","format":"json-compact","model":"gemini-3-flash-preview","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":720,"outputTokens":161,"latencyMs":2303.5573749998584},{"questionId":"q152","format":"toon","model":"gemini-3-flash-preview","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":861,"outputTokens":199,"latencyMs":1999.6343749999069},{"questionId":"q152","format":"xml","model":"gemini-3-flash-preview","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":1294,"outputTokens":344,"latencyMs":2810.8215000000782},{"questionId":"q152","format":"yaml","model":"gemini-3-flash-preview","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":884,"outputTokens":177,"latencyMs":4135.895332999993},{"questionId":"q153","format":"json-pretty","model":"gemini-3-flash-preview","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1235,"outputTokens":104,"latencyMs":1963.6168749998324},{"questionId":"q153","format":"json-compact","model":"gemini-3-flash-preview","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":720,"outputTokens":447,"latencyMs":2891.421833000146},{"questionId":"q153","format":"toon","model":"gemini-3-flash-preview","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":861,"outputTokens":225,"latencyMs":3080.291000000201},{"questionId":"q153","format":"xml","model":"gemini-3-flash-preview","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1294,"outputTokens":333,"latencyMs":3431.1725840000436},{"questionId":"q153","format":"yaml","model":"gemini-3-flash-preview","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":884,"outputTokens":299,"latencyMs":4925.9569580000825},{"questionId":"q154","format":"json-pretty","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":1237,"outputTokens":313,"latencyMs":2861.0692920000292},{"questionId":"q154","format":"json-compact","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":722,"outputTokens":265,"latencyMs":3006.8672500001267},{"questionId":"q154","format":"toon","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":863,"outputTokens":255,"latencyMs":3160.707832999993},{"questionId":"q154","format":"xml","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":1296,"outputTokens":243,"latencyMs":2785.681791000068},{"questionId":"q154","format":"yaml","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":886,"outputTokens":145,"latencyMs":1815.165542000439},{"questionId":"q155","format":"json-pretty","model":"gemini-3-flash-preview","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1235,"outputTokens":105,"latencyMs":3050.7756249997765},{"questionId":"q155","format":"json-compact","model":"gemini-3-flash-preview","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":720,"outputTokens":232,"latencyMs":2096.2955419998616},{"questionId":"q155","format":"toon","model":"gemini-3-flash-preview","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":861,"outputTokens":201,"latencyMs":2643.581917000003},{"questionId":"q155","format":"xml","model":"gemini-3-flash-preview","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1294,"outputTokens":180,"latencyMs":1802.0862079998478},{"questionId":"q155","format":"yaml","model":"gemini-3-flash-preview","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":884,"outputTokens":190,"latencyMs":3095.989082999993},{"questionId":"q156","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1237,"outputTokens":252,"latencyMs":2563.272583000362},{"questionId":"q156","format":"json-compact","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":722,"outputTokens":362,"latencyMs":3544.412500000093},{"questionId":"q156","format":"toon","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":863,"outputTokens":191,"latencyMs":1886.8387090000324},{"questionId":"q156","format":"xml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1296,"outputTokens":166,"latencyMs":2480.9494579997845},{"questionId":"q156","format":"yaml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":886,"outputTokens":195,"latencyMs":3016.8389580002986},{"questionId":"q157","format":"json-pretty","model":"gemini-3-flash-preview","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1237,"outputTokens":260,"latencyMs":3002.749042000156},{"questionId":"q157","format":"json-compact","model":"gemini-3-flash-preview","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":722,"outputTokens":184,"latencyMs":2796.544875000138},{"questionId":"q157","format":"toon","model":"gemini-3-flash-preview","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":863,"outputTokens":211,"latencyMs":2436.493083000183},{"questionId":"q157","format":"xml","model":"gemini-3-flash-preview","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1296,"outputTokens":264,"latencyMs":2561.323708000127},{"questionId":"q157","format":"yaml","model":"gemini-3-flash-preview","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":886,"outputTokens":216,"latencyMs":1830.2423749999143},{"questionId":"q158","format":"json-pretty","model":"gemini-3-flash-preview","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":1235,"outputTokens":130,"latencyMs":2081.5682910000905},{"questionId":"q158","format":"json-compact","model":"gemini-3-flash-preview","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":720,"outputTokens":296,"latencyMs":3103.874292000197},{"questionId":"q158","format":"toon","model":"gemini-3-flash-preview","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":861,"outputTokens":293,"latencyMs":2729.4198330002837},{"questionId":"q158","format":"xml","model":"gemini-3-flash-preview","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":1294,"outputTokens":152,"latencyMs":2323.6319999997504},{"questionId":"q158","format":"yaml","model":"gemini-3-flash-preview","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":884,"outputTokens":235,"latencyMs":2060.8165839998983},{"questionId":"q159","format":"json-pretty","model":"gemini-3-flash-preview","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1236,"outputTokens":227,"latencyMs":2162.90774999978},{"questionId":"q159","format":"json-compact","model":"gemini-3-flash-preview","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":721,"outputTokens":179,"latencyMs":2358.5181660000235},{"questionId":"q159","format":"toon","model":"gemini-3-flash-preview","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":862,"outputTokens":195,"latencyMs":2127.304125000257},{"questionId":"q159","format":"xml","model":"gemini-3-flash-preview","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1295,"outputTokens":225,"latencyMs":2038.2347920001484},{"questionId":"q159","format":"yaml","model":"gemini-3-flash-preview","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":885,"outputTokens":219,"latencyMs":2214.910457999911},{"questionId":"q160","format":"json-pretty","model":"gemini-3-flash-preview","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":1237,"outputTokens":91,"latencyMs":1939.7651250003837},{"questionId":"q160","format":"json-compact","model":"gemini-3-flash-preview","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":722,"outputTokens":167,"latencyMs":2750.511458000168},{"questionId":"q160","format":"toon","model":"gemini-3-flash-preview","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":863,"outputTokens":167,"latencyMs":1936.0097090001218},{"questionId":"q160","format":"xml","model":"gemini-3-flash-preview","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":1296,"outputTokens":246,"latencyMs":2371.70804100018},{"questionId":"q160","format":"yaml","model":"gemini-3-flash-preview","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":886,"outputTokens":446,"latencyMs":3801.999749999959},{"questionId":"q161","format":"json-pretty","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":1237,"outputTokens":285,"latencyMs":3205.5163340000436},{"questionId":"q161","format":"json-compact","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":722,"outputTokens":187,"latencyMs":3857.28429099964},{"questionId":"q161","format":"toon","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":863,"outputTokens":226,"latencyMs":2249.7077500000596},{"questionId":"q161","format":"xml","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":1296,"outputTokens":228,"latencyMs":2719.9549580002204},{"questionId":"q161","format":"yaml","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":886,"outputTokens":220,"latencyMs":2244.919209000189},{"questionId":"q162","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1237,"outputTokens":143,"latencyMs":2071.9482090002857},{"questionId":"q162","format":"json-compact","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":722,"outputTokens":249,"latencyMs":1951.5478330003098},{"questionId":"q162","format":"toon","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":863,"outputTokens":218,"latencyMs":3030.7930410001427},{"questionId":"q162","format":"xml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1296,"outputTokens":179,"latencyMs":1702.7638750001788},{"questionId":"q162","format":"yaml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":886,"outputTokens":112,"latencyMs":2511.13204200007},{"questionId":"q163","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1236,"outputTokens":182,"latencyMs":2075.898541999981},{"questionId":"q163","format":"json-compact","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":721,"outputTokens":429,"latencyMs":3667.821416999679},{"questionId":"q163","format":"toon","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":862,"outputTokens":231,"latencyMs":2555.316041999962},{"questionId":"q163","format":"xml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1295,"outputTokens":209,"latencyMs":2018.4738340000622},{"questionId":"q163","format":"yaml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":885,"outputTokens":186,"latencyMs":1863.105624999851},{"questionId":"q164","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1236,"outputTokens":105,"latencyMs":1381.3430409999564},{"questionId":"q164","format":"json-compact","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":721,"outputTokens":197,"latencyMs":2837.34924999997},{"questionId":"q164","format":"toon","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":862,"outputTokens":170,"latencyMs":1930.47120800009},{"questionId":"q164","format":"xml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1295,"outputTokens":377,"latencyMs":3279.6485000001267},{"questionId":"q164","format":"yaml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":885,"outputTokens":143,"latencyMs":1744.5412499997765},{"questionId":"q165","format":"json-pretty","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":1236,"outputTokens":216,"latencyMs":2265.6218750001863},{"questionId":"q165","format":"json-compact","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":721,"outputTokens":287,"latencyMs":2761.6886659995653},{"questionId":"q165","format":"toon","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":862,"outputTokens":247,"latencyMs":3142.6592079997063},{"questionId":"q165","format":"xml","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":1295,"outputTokens":436,"latencyMs":3082.7662080000155},{"questionId":"q165","format":"yaml","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":885,"outputTokens":233,"latencyMs":2283.4882499999367},{"questionId":"q166","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1240,"outputTokens":249,"latencyMs":2116.1106660002843},{"questionId":"q166","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":725,"outputTokens":377,"latencyMs":2812.1302919997834},{"questionId":"q166","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":866,"outputTokens":450,"latencyMs":3136.8454169998877},{"questionId":"q166","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1299,"outputTokens":441,"latencyMs":2868.9416249999776},{"questionId":"q166","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":889,"outputTokens":382,"latencyMs":3159.447166000027},{"questionId":"q167","format":"json-pretty","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":1236,"outputTokens":280,"latencyMs":2177.99850000022},{"questionId":"q167","format":"json-compact","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":721,"outputTokens":208,"latencyMs":3065.321750000119},{"questionId":"q167","format":"toon","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":862,"outputTokens":377,"latencyMs":3650.124165999703},{"questionId":"q167","format":"xml","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":1295,"outputTokens":223,"latencyMs":2429.2223749998957},{"questionId":"q167","format":"yaml","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":885,"outputTokens":229,"latencyMs":3948.2972499998286},{"questionId":"q168","format":"json-pretty","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":1238,"outputTokens":307,"latencyMs":2680.3013749998063},{"questionId":"q168","format":"json-compact","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":723,"outputTokens":304,"latencyMs":3679.1967079997994},{"questionId":"q168","format":"toon","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":864,"outputTokens":204,"latencyMs":3266.1833329997025},{"questionId":"q168","format":"xml","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":1297,"outputTokens":204,"latencyMs":3318.4111250001006},{"questionId":"q168","format":"yaml","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":887,"outputTokens":238,"latencyMs":2364.597916000057},{"questionId":"q169","format":"json-pretty","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":1240,"outputTokens":372,"latencyMs":3096.718915999867},{"questionId":"q169","format":"json-compact","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":725,"outputTokens":281,"latencyMs":2706.015875000041},{"questionId":"q169","format":"toon","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":866,"outputTokens":2807,"latencyMs":18313.180917000398},{"questionId":"q169","format":"xml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":1299,"outputTokens":1733,"latencyMs":10828.579791999888},{"questionId":"q169","format":"yaml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":889,"outputTokens":707,"latencyMs":5018.436459000222},{"questionId":"q170","format":"json-pretty","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":1239,"outputTokens":392,"latencyMs":3230.3105409997515},{"questionId":"q170","format":"json-compact","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":724,"outputTokens":385,"latencyMs":4350.139124999754},{"questionId":"q170","format":"toon","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":865,"outputTokens":1904,"latencyMs":11660.972834000364},{"questionId":"q170","format":"xml","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":1298,"outputTokens":460,"latencyMs":3677.423874999862},{"questionId":"q170","format":"yaml","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":888,"outputTokens":289,"latencyMs":2875.7883750000037},{"questionId":"q171","format":"json-pretty","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":1241,"outputTokens":365,"latencyMs":3135.994249999989},{"questionId":"q171","format":"json-compact","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":726,"outputTokens":207,"latencyMs":2582.6173749999143},{"questionId":"q171","format":"toon","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":867,"outputTokens":300,"latencyMs":2855.8550419998355},{"questionId":"q171","format":"xml","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":1300,"outputTokens":605,"latencyMs":5532.035957999993},{"questionId":"q171","format":"yaml","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":890,"outputTokens":167,"latencyMs":1623.3711669999175},{"questionId":"q172","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1241,"outputTokens":256,"latencyMs":3167.6805420001037},{"questionId":"q172","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":726,"outputTokens":317,"latencyMs":3493.365625000093},{"questionId":"q172","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":867,"outputTokens":347,"latencyMs":3740.0589590002783},{"questionId":"q172","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1300,"outputTokens":356,"latencyMs":2963.488791999873},{"questionId":"q172","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":890,"outputTokens":316,"latencyMs":3080.344166999683},{"questionId":"q173","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1243,"outputTokens":192,"latencyMs":1708.4010409996845},{"questionId":"q173","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":728,"outputTokens":210,"latencyMs":2411.453290999867},{"questionId":"q173","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":869,"outputTokens":307,"latencyMs":2766.541458000429},{"questionId":"q173","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1302,"outputTokens":228,"latencyMs":3558.0790419997647},{"questionId":"q173","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":892,"outputTokens":233,"latencyMs":4017.5527500002645},{"questionId":"q174","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1239,"outputTokens":273,"latencyMs":2889.037250000052},{"questionId":"q174","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":724,"outputTokens":358,"latencyMs":2522.6696670004167},{"questionId":"q174","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":865,"outputTokens":283,"latencyMs":2806.383666999638},{"questionId":"q174","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1298,"outputTokens":325,"latencyMs":3174.8865000000224},{"questionId":"q174","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":888,"outputTokens":330,"latencyMs":4017.2965839998797},{"questionId":"q175","format":"json-pretty","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":1244,"outputTokens":370,"latencyMs":3330.693208000157},{"questionId":"q175","format":"json-compact","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":729,"outputTokens":475,"latencyMs":3981.611374999862},{"questionId":"q175","format":"toon","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":870,"outputTokens":12121,"latencyMs":64797.92758300016},{"questionId":"q175","format":"xml","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":1303,"outputTokens":7249,"latencyMs":40270.52766599972},{"questionId":"q175","format":"yaml","model":"gemini-3-flash-preview","expected":"0","actual":"Answer: 0","isCorrect":true,"inputTokens":893,"outputTokens":12428,"latencyMs":71397.56554099964},{"questionId":"q176","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1237,"outputTokens":255,"latencyMs":3975.222832999658},{"questionId":"q176","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":722,"outputTokens":322,"latencyMs":3168.289000000339},{"questionId":"q176","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":863,"outputTokens":470,"latencyMs":3961.8239999995567},{"questionId":"q176","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1296,"outputTokens":363,"latencyMs":2776.368209000211},{"questionId":"q176","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":886,"outputTokens":183,"latencyMs":2042.706125000026},{"questionId":"q177","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1247,"outputTokens":398,"latencyMs":3183.3096249997616},{"questionId":"q177","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":732,"outputTokens":601,"latencyMs":4003.3801659997553},{"questionId":"q177","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":873,"outputTokens":460,"latencyMs":3824.3355840002187},{"questionId":"q177","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1306,"outputTokens":335,"latencyMs":2670.33212500019},{"questionId":"q177","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":896,"outputTokens":509,"latencyMs":3931.6993750003166},{"questionId":"q178","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1240,"outputTokens":202,"latencyMs":2379.3624999998137},{"questionId":"q178","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":725,"outputTokens":343,"latencyMs":2364.520125000272},{"questionId":"q178","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":866,"outputTokens":360,"latencyMs":3147.515875000041},{"questionId":"q178","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1299,"outputTokens":560,"latencyMs":3410.046916999854},{"questionId":"q178","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":889,"outputTokens":251,"latencyMs":2061.3066670000553},{"questionId":"q179","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1239,"outputTokens":264,"latencyMs":2578.9504999998026},{"questionId":"q179","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":724,"outputTokens":411,"latencyMs":3026.957417000085},{"questionId":"q179","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":865,"outputTokens":384,"latencyMs":3062.3282920001075},{"questionId":"q179","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1298,"outputTokens":1253,"latencyMs":8271.817874999717},{"questionId":"q179","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":888,"outputTokens":1088,"latencyMs":6691.79045900004},{"questionId":"q180","format":"json-pretty","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":7952,"outputTokens":315,"latencyMs":3943.8354580001906},{"questionId":"q180","format":"json-compact","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":4742,"outputTokens":322,"latencyMs":3352.6187920002267},{"questionId":"q180","format":"toon","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":3385,"outputTokens":423,"latencyMs":3825.752249999903},{"questionId":"q180","format":"csv","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":3237,"outputTokens":348,"latencyMs":3906.314874999691},{"questionId":"q180","format":"xml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":9139,"outputTokens":292,"latencyMs":3257.018541999627},{"questionId":"q180","format":"yaml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":5792,"outputTokens":201,"latencyMs":2769.807792000007},{"questionId":"q181","format":"json-pretty","model":"gemini-3-flash-preview","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":7958,"outputTokens":197,"latencyMs":2067.750457999762},{"questionId":"q181","format":"json-compact","model":"gemini-3-flash-preview","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":4748,"outputTokens":298,"latencyMs":3136.904000000097},{"questionId":"q181","format":"toon","model":"gemini-3-flash-preview","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":3391,"outputTokens":162,"latencyMs":2419.802917000372},{"questionId":"q181","format":"csv","model":"gemini-3-flash-preview","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":3243,"outputTokens":238,"latencyMs":2899.084875000175},{"questionId":"q181","format":"xml","model":"gemini-3-flash-preview","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":9145,"outputTokens":338,"latencyMs":3334.7015829999},{"questionId":"q181","format":"yaml","model":"gemini-3-flash-preview","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":5798,"outputTokens":204,"latencyMs":2331.2722080000676},{"questionId":"q182","format":"json-pretty","model":"gemini-3-flash-preview","expected":"email","actual":"email","isCorrect":true,"inputTokens":7955,"outputTokens":186,"latencyMs":1860.572040999774},{"questionId":"q182","format":"json-compact","model":"gemini-3-flash-preview","expected":"email","actual":"email","isCorrect":true,"inputTokens":4745,"outputTokens":188,"latencyMs":2353.6782500003465},{"questionId":"q182","format":"toon","model":"gemini-3-flash-preview","expected":"email","actual":"email","isCorrect":true,"inputTokens":3388,"outputTokens":210,"latencyMs":2166.1137500000186},{"questionId":"q182","format":"csv","model":"gemini-3-flash-preview","expected":"email","actual":"email","isCorrect":true,"inputTokens":3240,"outputTokens":156,"latencyMs":2828.5353749999776},{"questionId":"q182","format":"xml","model":"gemini-3-flash-preview","expected":"email","actual":"email","isCorrect":true,"inputTokens":9142,"outputTokens":309,"latencyMs":3038.5298750000075},{"questionId":"q182","format":"yaml","model":"gemini-3-flash-preview","expected":"email","actual":"email","isCorrect":true,"inputTokens":5795,"outputTokens":191,"latencyMs":2307.4157910002396},{"questionId":"q183","format":"json-pretty","model":"gemini-3-flash-preview","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":7956,"outputTokens":255,"latencyMs":2322.740791999735},{"questionId":"q183","format":"json-compact","model":"gemini-3-flash-preview","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":4746,"outputTokens":322,"latencyMs":3037.849916000385},{"questionId":"q183","format":"toon","model":"gemini-3-flash-preview","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":3389,"outputTokens":447,"latencyMs":3962.5759999998845},{"questionId":"q183","format":"csv","model":"gemini-3-flash-preview","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":3241,"outputTokens":242,"latencyMs":3107.0507089998573},{"questionId":"q183","format":"xml","model":"gemini-3-flash-preview","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":9143,"outputTokens":345,"latencyMs":3499.3677920000628},{"questionId":"q183","format":"yaml","model":"gemini-3-flash-preview","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":5796,"outputTokens":275,"latencyMs":2694.6957910000347},{"questionId":"q184","format":"json-pretty","model":"gemini-3-flash-preview","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":7956,"outputTokens":225,"latencyMs":2593.3629159997217},{"questionId":"q184","format":"json-compact","model":"gemini-3-flash-preview","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":4746,"outputTokens":249,"latencyMs":3521.3388749998994},{"questionId":"q184","format":"toon","model":"gemini-3-flash-preview","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":3389,"outputTokens":290,"latencyMs":3582.416000000201},{"questionId":"q184","format":"csv","model":"gemini-3-flash-preview","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":3241,"outputTokens":303,"latencyMs":3177.1879170001484},{"questionId":"q184","format":"xml","model":"gemini-3-flash-preview","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":9143,"outputTokens":278,"latencyMs":3262.093291999772},{"questionId":"q184","format":"yaml","model":"gemini-3-flash-preview","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":5796,"outputTokens":201,"latencyMs":2818.5692920000292},{"questionId":"q185","format":"json-pretty","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":7953,"outputTokens":557,"latencyMs":3882.9571249997243},{"questionId":"q185","format":"json-compact","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":4743,"outputTokens":285,"latencyMs":3122.202416999731},{"questionId":"q185","format":"toon","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":3386,"outputTokens":244,"latencyMs":2954.8477909998037},{"questionId":"q185","format":"csv","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":3238,"outputTokens":405,"latencyMs":3678.0608749999665},{"questionId":"q185","format":"xml","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":9140,"outputTokens":307,"latencyMs":3485.0552500002086},{"questionId":"q185","format":"yaml","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":5793,"outputTokens":251,"latencyMs":2684.3431250001304},{"questionId":"q186","format":"json-pretty","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":14515,"outputTokens":384,"latencyMs":3508.5958750001155},{"questionId":"q186","format":"json-compact","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":8888,"outputTokens":372,"latencyMs":3151.7762500001118},{"questionId":"q186","format":"toon","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":9421,"outputTokens":249,"latencyMs":2743.1820829999633},{"questionId":"q186","format":"xml","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":16018,"outputTokens":748,"latencyMs":4888.485166999977},{"questionId":"q186","format":"yaml","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":10583,"outputTokens":710,"latencyMs":4790.436832999811},{"questionId":"q187","format":"json-pretty","model":"gemini-3-flash-preview","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":14524,"outputTokens":258,"latencyMs":2594.7981249997392},{"questionId":"q187","format":"json-compact","model":"gemini-3-flash-preview","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":8897,"outputTokens":310,"latencyMs":3308.189792000223},{"questionId":"q187","format":"toon","model":"gemini-3-flash-preview","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":9430,"outputTokens":631,"latencyMs":4832.389708000235},{"questionId":"q187","format":"xml","model":"gemini-3-flash-preview","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":16027,"outputTokens":4325,"latencyMs":25736.56224999996},{"questionId":"q187","format":"yaml","model":"gemini-3-flash-preview","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":10592,"outputTokens":1492,"latencyMs":9646.41858400032},{"questionId":"q188","format":"json-pretty","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":14520,"outputTokens":398,"latencyMs":3531.046916999854},{"questionId":"q188","format":"json-compact","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":8893,"outputTokens":368,"latencyMs":3435.5410830001347},{"questionId":"q188","format":"toon","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":9426,"outputTokens":764,"latencyMs":5684.054332999978},{"questionId":"q188","format":"xml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":16023,"outputTokens":700,"latencyMs":5889.028833999764},{"questionId":"q188","format":"yaml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":10588,"outputTokens":365,"latencyMs":3188.390374999959},{"questionId":"q189","format":"json-pretty","model":"gemini-3-flash-preview","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":14524,"outputTokens":235,"latencyMs":2662.8358329995535},{"questionId":"q189","format":"json-compact","model":"gemini-3-flash-preview","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":8897,"outputTokens":467,"latencyMs":4048.8038749997504},{"questionId":"q189","format":"toon","model":"gemini-3-flash-preview","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":9430,"outputTokens":332,"latencyMs":3812.723042000085},{"questionId":"q189","format":"xml","model":"gemini-3-flash-preview","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":16027,"outputTokens":1380,"latencyMs":9444.909583},{"questionId":"q189","format":"yaml","model":"gemini-3-flash-preview","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":10592,"outputTokens":327,"latencyMs":3314.23037499981},{"questionId":"q190","format":"json-pretty","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":14519,"outputTokens":408,"latencyMs":3413.136832999997},{"questionId":"q190","format":"json-compact","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8892,"outputTokens":285,"latencyMs":2905.8485830002464},{"questionId":"q190","format":"toon","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":9425,"outputTokens":584,"latencyMs":5310.31254200032},{"questionId":"q190","format":"xml","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":16022,"outputTokens":403,"latencyMs":3149.7518750000745},{"questionId":"q190","format":"yaml","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":10587,"outputTokens":400,"latencyMs":3436.1740419999696},{"questionId":"q191","format":"json-pretty","model":"gemini-3-flash-preview","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":14525,"outputTokens":470,"latencyMs":4506.274041000288},{"questionId":"q191","format":"json-compact","model":"gemini-3-flash-preview","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":8898,"outputTokens":442,"latencyMs":4879.77358300006},{"questionId":"q191","format":"toon","model":"gemini-3-flash-preview","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":9431,"outputTokens":238,"latencyMs":2592.435749999713},{"questionId":"q191","format":"xml","model":"gemini-3-flash-preview","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":16028,"outputTokens":317,"latencyMs":2828.340749999974},{"questionId":"q191","format":"yaml","model":"gemini-3-flash-preview","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":10593,"outputTokens":539,"latencyMs":4051.061791999731},{"questionId":"q192","format":"json-pretty","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":4842,"outputTokens":492,"latencyMs":3239.620291000232},{"questionId":"q192","format":"json-compact","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":3090,"outputTokens":1226,"latencyMs":7195.048208000138},{"questionId":"q192","format":"toon","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":2353,"outputTokens":1234,"latencyMs":7177.8317080000415},{"questionId":"q192","format":"csv","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":2267,"outputTokens":1411,"latencyMs":7650.357165999711},{"questionId":"q192","format":"xml","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":5485,"outputTokens":1266,"latencyMs":7596.692000000272},{"questionId":"q192","format":"yaml","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":3871,"outputTokens":1206,"latencyMs":6596.45620799996},{"questionId":"q193","format":"json-pretty","model":"gemini-3-flash-preview","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":4847,"outputTokens":209,"latencyMs":1896.434499999974},{"questionId":"q193","format":"json-compact","model":"gemini-3-flash-preview","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":3095,"outputTokens":258,"latencyMs":2658.780958000105},{"questionId":"q193","format":"toon","model":"gemini-3-flash-preview","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":2358,"outputTokens":197,"latencyMs":2764.855624999851},{"questionId":"q193","format":"csv","model":"gemini-3-flash-preview","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":2272,"outputTokens":454,"latencyMs":3435.9187500001863},{"questionId":"q193","format":"xml","model":"gemini-3-flash-preview","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":5490,"outputTokens":209,"latencyMs":3226.861624999903},{"questionId":"q193","format":"yaml","model":"gemini-3-flash-preview","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":3876,"outputTokens":190,"latencyMs":2388.0288749998435},{"questionId":"q194","format":"json-pretty","model":"gemini-3-flash-preview","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":4845,"outputTokens":166,"latencyMs":1974.8230409999378},{"questionId":"q194","format":"json-compact","model":"gemini-3-flash-preview","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":3093,"outputTokens":270,"latencyMs":2943.581833000295},{"questionId":"q194","format":"toon","model":"gemini-3-flash-preview","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":2356,"outputTokens":193,"latencyMs":2672.7335839997977},{"questionId":"q194","format":"csv","model":"gemini-3-flash-preview","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":2270,"outputTokens":253,"latencyMs":2323.008790999651},{"questionId":"q194","format":"xml","model":"gemini-3-flash-preview","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":5488,"outputTokens":270,"latencyMs":3329.949667000212},{"questionId":"q194","format":"yaml","model":"gemini-3-flash-preview","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":3874,"outputTokens":258,"latencyMs":2539.2681660000235},{"questionId":"q195","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":4846,"outputTokens":256,"latencyMs":1998.8394589996897},{"questionId":"q195","format":"json-compact","model":"gemini-3-flash-preview","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":3094,"outputTokens":364,"latencyMs":3745.7140000001527},{"questionId":"q195","format":"toon","model":"gemini-3-flash-preview","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":2357,"outputTokens":548,"latencyMs":3837.162624999881},{"questionId":"q195","format":"csv","model":"gemini-3-flash-preview","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":2271,"outputTokens":427,"latencyMs":3214.532333999872},{"questionId":"q195","format":"xml","model":"gemini-3-flash-preview","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":5489,"outputTokens":210,"latencyMs":2239.8655829997733},{"questionId":"q195","format":"yaml","model":"gemini-3-flash-preview","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":3875,"outputTokens":298,"latencyMs":2552.7652079998516},{"questionId":"q196","format":"json-pretty","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":4842,"outputTokens":191,"latencyMs":1745.297875000164},{"questionId":"q196","format":"json-compact","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":3090,"outputTokens":289,"latencyMs":3169.089292000048},{"questionId":"q196","format":"toon","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":2353,"outputTokens":245,"latencyMs":2550.201374999713},{"questionId":"q196","format":"csv","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":2267,"outputTokens":324,"latencyMs":2538.415124999825},{"questionId":"q196","format":"xml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":5485,"outputTokens":241,"latencyMs":2524.748583999928},{"questionId":"q196","format":"yaml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":3871,"outputTokens":439,"latencyMs":3012.655792000238},{"questionId":"q197","format":"json-pretty","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":20056,"outputTokens":1118,"latencyMs":8078.058124999981},{"questionId":"q197","format":"json-compact","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":15094,"outputTokens":2900,"latencyMs":16296.94633299997},{"questionId":"q197","format":"toon","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":12426,"outputTokens":5207,"latencyMs":29083.650500000454},{"questionId":"q197","format":"csv","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":12273,"outputTokens":3685,"latencyMs":27621.482290999964},{"questionId":"q197","format":"xml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":21942,"outputTokens":1658,"latencyMs":12432.322666999884},{"questionId":"q197","format":"yaml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":17140,"outputTokens":6480,"latencyMs":39716.9951249999},{"questionId":"q198","format":"json-pretty","model":"gemini-3-flash-preview","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":20062,"outputTokens":493,"latencyMs":6124.747957999818},{"questionId":"q198","format":"json-compact","model":"gemini-3-flash-preview","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":15100,"outputTokens":428,"latencyMs":3494.3070420003496},{"questionId":"q198","format":"toon","model":"gemini-3-flash-preview","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":12432,"outputTokens":341,"latencyMs":3434.7930000000633},{"questionId":"q198","format":"csv","model":"gemini-3-flash-preview","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":12279,"outputTokens":310,"latencyMs":3622.276000000071},{"questionId":"q198","format":"xml","model":"gemini-3-flash-preview","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":21948,"outputTokens":518,"latencyMs":5547.129416999873},{"questionId":"q198","format":"yaml","model":"gemini-3-flash-preview","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":17146,"outputTokens":362,"latencyMs":3922.9449999998324},{"questionId":"q199","format":"json-pretty","model":"gemini-3-flash-preview","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":20060,"outputTokens":429,"latencyMs":5872.227500000037},{"questionId":"q199","format":"json-compact","model":"gemini-3-flash-preview","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":15098,"outputTokens":401,"latencyMs":3150.725666999817},{"questionId":"q199","format":"toon","model":"gemini-3-flash-preview","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":12430,"outputTokens":274,"latencyMs":2704.2370000001974},{"questionId":"q199","format":"csv","model":"gemini-3-flash-preview","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":12277,"outputTokens":232,"latencyMs":3072.834290999919},{"questionId":"q199","format":"xml","model":"gemini-3-flash-preview","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":21946,"outputTokens":376,"latencyMs":3661.298082999885},{"questionId":"q199","format":"yaml","model":"gemini-3-flash-preview","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":17144,"outputTokens":398,"latencyMs":3777.305541999638},{"questionId":"q200","format":"json-pretty","model":"gemini-3-flash-preview","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":20060,"outputTokens":341,"latencyMs":3306.3240839997306},{"questionId":"q200","format":"json-compact","model":"gemini-3-flash-preview","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":15098,"outputTokens":245,"latencyMs":2386.4694590000436},{"questionId":"q200","format":"toon","model":"gemini-3-flash-preview","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":12430,"outputTokens":538,"latencyMs":4471.466667000204},{"questionId":"q200","format":"csv","model":"gemini-3-flash-preview","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":12277,"outputTokens":626,"latencyMs":6235.981041999999},{"questionId":"q200","format":"xml","model":"gemini-3-flash-preview","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":21946,"outputTokens":379,"latencyMs":4212.068833000027},{"questionId":"q200","format":"yaml","model":"gemini-3-flash-preview","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":17144,"outputTokens":408,"latencyMs":4292.555374999996},{"questionId":"q201","format":"json-pretty","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":20057,"outputTokens":526,"latencyMs":4570.304167000111},{"questionId":"q201","format":"json-compact","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":15095,"outputTokens":469,"latencyMs":4114.027958999854},{"questionId":"q201","format":"toon","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":12427,"outputTokens":483,"latencyMs":4718.540916999802},{"questionId":"q201","format":"csv","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":12274,"outputTokens":265,"latencyMs":3433.0660839998163},{"questionId":"q201","format":"xml","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":21943,"outputTokens":395,"latencyMs":3935.7870419998653},{"questionId":"q201","format":"yaml","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":17141,"outputTokens":569,"latencyMs":5149.430582999717},{"questionId":"q202","format":"json-pretty","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":8583,"outputTokens":3184,"latencyMs":18665.33362499997},{"questionId":"q202","format":"json-compact","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":5910,"outputTokens":4136,"latencyMs":20826.376708999742},{"questionId":"q202","format":"toon","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":7037,"outputTokens":4510,"latencyMs":21621.044499999844},{"questionId":"q202","format":"xml","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":9406,"outputTokens":6379,"latencyMs":34324.00137499999},{"questionId":"q202","format":"yaml","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":6968,"outputTokens":11381,"latencyMs":50759.27483400004},{"questionId":"q203","format":"json-pretty","model":"gemini-3-flash-preview","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":8593,"outputTokens":15868,"latencyMs":102603.057792},{"questionId":"q203","format":"json-compact","model":"gemini-3-flash-preview","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error,message,stack,retryable","isCorrect":false,"inputTokens":5920,"outputTokens":10808,"latencyMs":60124.26979199983},{"questionId":"q203","format":"toon","model":"gemini-3-flash-preview","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error,message,stack,retryable","isCorrect":false,"inputTokens":7047,"outputTokens":15092,"latencyMs":80030.22683299985},{"questionId":"q203","format":"xml","model":"gemini-3-flash-preview","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error,message,stack,retryable","isCorrect":false,"inputTokens":9416,"outputTokens":9526,"latencyMs":58497.83049999969},{"questionId":"q203","format":"yaml","model":"gemini-3-flash-preview","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":6978,"outputTokens":11767,"latencyMs":61055.251166999806},{"questionId":"q204","format":"json-pretty","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":8587,"outputTokens":207,"latencyMs":2769.122083000373},{"questionId":"q204","format":"json-compact","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":5914,"outputTokens":291,"latencyMs":2656.535166000016},{"questionId":"q204","format":"toon","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":7041,"outputTokens":382,"latencyMs":2987.1058339998126},{"questionId":"q204","format":"xml","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":9410,"outputTokens":529,"latencyMs":4111.459499999881},{"questionId":"q204","format":"yaml","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":6972,"outputTokens":356,"latencyMs":2771.627915999852},{"questionId":"q205","format":"json-pretty","model":"gemini-3-flash-preview","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1679,"outputTokens":364,"latencyMs":3299.07416700013},{"questionId":"q205","format":"json-compact","model":"gemini-3-flash-preview","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1027,"outputTokens":383,"latencyMs":4504.669333000202},{"questionId":"q205","format":"toon","model":"gemini-3-flash-preview","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":791,"outputTokens":5093,"latencyMs":26681.41149999993},{"questionId":"q205","format":"csv","model":"gemini-3-flash-preview","expected":"YES","actual":"NO","isCorrect":false,"inputTokens":740,"outputTokens":1047,"latencyMs":5415.330792000052},{"questionId":"q205","format":"xml","model":"gemini-3-flash-preview","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1904,"outputTokens":17764,"latencyMs":100895.7315410003},{"questionId":"q205","format":"yaml","model":"gemini-3-flash-preview","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1234,"outputTokens":200,"latencyMs":2292.195166000165},{"questionId":"q206","format":"json-pretty","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1439,"outputTokens":351,"latencyMs":3390.177709000185},{"questionId":"q206","format":"json-compact","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":883,"outputTokens":1294,"latencyMs":7823.7045840001665},{"questionId":"q206","format":"toon","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":689,"outputTokens":1630,"latencyMs":8854.233624999877},{"questionId":"q206","format":"csv","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":641,"outputTokens":1091,"latencyMs":6341.708249999676},{"questionId":"q206","format":"xml","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1628,"outputTokens":2333,"latencyMs":14825.134624999948},{"questionId":"q206","format":"yaml","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1059,"outputTokens":502,"latencyMs":4344.449874999933},{"questionId":"q207","format":"json-pretty","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1906,"outputTokens":10728,"latencyMs":60138.674999999814},{"questionId":"q207","format":"json-compact","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1158,"outputTokens":11149,"latencyMs":60800.22116599977},{"questionId":"q207","format":"toon","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":880,"outputTokens":14565,"latencyMs":73282.9954169998},{"questionId":"q207","format":"csv","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":826,"outputTokens":872,"latencyMs":5295.445292000193},{"questionId":"q207","format":"xml","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":2167,"outputTokens":2846,"latencyMs":18577.68866700027},{"questionId":"q207","format":"yaml","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1396,"outputTokens":1407,"latencyMs":9823.643208999652},{"questionId":"q208","format":"json-pretty","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1667,"outputTokens":747,"latencyMs":5737.518333999906},{"questionId":"q208","format":"json-compact","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1019,"outputTokens":2318,"latencyMs":12557.172875000164},{"questionId":"q208","format":"toon","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1252,"outputTokens":6575,"latencyMs":37929.264667000156},{"questionId":"q208","format":"csv","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":734,"outputTokens":1047,"latencyMs":5862.587791000027},{"questionId":"q208","format":"xml","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1891,"outputTokens":398,"latencyMs":3451.928125000093},{"questionId":"q208","format":"yaml","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1224,"outputTokens":586,"latencyMs":4121.9415830001235},{"questionId":"q209","format":"json-pretty","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1615,"outputTokens":370,"latencyMs":2777.636208000127},{"questionId":"q209","format":"json-compact","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":979,"outputTokens":1354,"latencyMs":7486.286458000075},{"questionId":"q209","format":"toon","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1210,"outputTokens":20177,"latencyMs":113554.60433300026},{"questionId":"q209","format":"csv","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":536,"outputTokens":1066,"latencyMs":5578.249165999703},{"questionId":"q209","format":"xml","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1836,"outputTokens":16221,"latencyMs":89791.01670799963},{"questionId":"q209","format":"yaml","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1182,"outputTokens":191,"latencyMs":1986.013666999992}] ================================================ FILE: benchmarks/results/accuracy/models/gpt-5-nano ================================================ [{"questionId":"q1","format":"json-pretty","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":6453,"outputTokens":72,"latencyMs":2972.5673339999994},{"questionId":"q1","format":"json-compact","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":4046,"outputTokens":136,"latencyMs":3134.6921249999996},{"questionId":"q1","format":"toon","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":2610,"outputTokens":136,"latencyMs":3895.96425},{"questionId":"q1","format":"csv","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":2444,"outputTokens":136,"latencyMs":3544.769292},{"questionId":"q1","format":"xml","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":7415,"outputTokens":264,"latencyMs":7031.435208000001},{"questionId":"q1","format":"yaml","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":5073,"outputTokens":136,"latencyMs":3521.167166},{"questionId":"q2","format":"json-pretty","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":6453,"outputTokens":135,"latencyMs":3347.5196659999997},{"questionId":"q2","format":"json-compact","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":4046,"outputTokens":263,"latencyMs":5199.591958},{"questionId":"q2","format":"toon","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":2610,"outputTokens":199,"latencyMs":3885.8805},{"questionId":"q2","format":"csv","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":2444,"outputTokens":135,"latencyMs":3269.682833},{"questionId":"q2","format":"xml","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":7415,"outputTokens":199,"latencyMs":6971.545625000001},{"questionId":"q2","format":"yaml","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":5073,"outputTokens":135,"latencyMs":5154.9494159999995},{"questionId":"q3","format":"json-pretty","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":6455,"outputTokens":204,"latencyMs":3733.00875},{"questionId":"q3","format":"json-compact","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":4048,"outputTokens":268,"latencyMs":3525.1048329999994},{"questionId":"q3","format":"toon","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":2612,"outputTokens":268,"latencyMs":8655.205000000002},{"questionId":"q3","format":"csv","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":2446,"outputTokens":204,"latencyMs":6536.7831670000005},{"questionId":"q3","format":"xml","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":7417,"outputTokens":268,"latencyMs":5954.473},{"questionId":"q3","format":"yaml","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":5075,"outputTokens":268,"latencyMs":3160.4033339999996},{"questionId":"q4","format":"json-pretty","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":6455,"outputTokens":199,"latencyMs":3313.826750000001},{"questionId":"q4","format":"json-compact","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":4048,"outputTokens":199,"latencyMs":3131.2316250000003},{"questionId":"q4","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2612,"outputTokens":199,"latencyMs":5442.334000000001},{"questionId":"q4","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2446,"outputTokens":199,"latencyMs":3414.6267500000013},{"questionId":"q4","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":7417,"outputTokens":327,"latencyMs":8402.244708},{"questionId":"q4","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":5075,"outputTokens":199,"latencyMs":3475.437167},{"questionId":"q5","format":"json-pretty","model":"gpt-5-nano","expected":"no","actual":"false","isCorrect":true,"inputTokens":6451,"outputTokens":263,"latencyMs":4907.695667},{"questionId":"q5","format":"json-compact","model":"gpt-5-nano","expected":"no","actual":"false","isCorrect":true,"inputTokens":4044,"outputTokens":327,"latencyMs":4696.913291999999},{"questionId":"q5","format":"toon","model":"gpt-5-nano","expected":"no","actual":"false","isCorrect":true,"inputTokens":2608,"outputTokens":391,"latencyMs":3964.1052500000005},{"questionId":"q5","format":"csv","model":"gpt-5-nano","expected":"no","actual":"0","isCorrect":true,"inputTokens":2442,"outputTokens":391,"latencyMs":4546.052125},{"questionId":"q5","format":"xml","model":"gpt-5-nano","expected":"no","actual":"false","isCorrect":true,"inputTokens":7413,"outputTokens":263,"latencyMs":12447.142749999999},{"questionId":"q5","format":"yaml","model":"gpt-5-nano","expected":"no","actual":"false","isCorrect":true,"inputTokens":5071,"outputTokens":327,"latencyMs":5445.827582999998},{"questionId":"q6","format":"json-pretty","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":6452,"outputTokens":200,"latencyMs":2964.3259579999994},{"questionId":"q6","format":"json-compact","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":4045,"outputTokens":136,"latencyMs":2476.4536669999998},{"questionId":"q6","format":"toon","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":2609,"outputTokens":264,"latencyMs":10081.144249999998},{"questionId":"q6","format":"csv","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":2443,"outputTokens":200,"latencyMs":3566.9233750000003},{"questionId":"q6","format":"xml","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":7414,"outputTokens":200,"latencyMs":3328.505791999998},{"questionId":"q6","format":"yaml","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":5072,"outputTokens":136,"latencyMs":2627.9952919999996},{"questionId":"q7","format":"json-pretty","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":6453,"outputTokens":199,"latencyMs":6494.151333},{"questionId":"q7","format":"json-compact","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":4046,"outputTokens":327,"latencyMs":7962.695958},{"questionId":"q7","format":"toon","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":2610,"outputTokens":135,"latencyMs":3294.491083000001},{"questionId":"q7","format":"csv","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":2444,"outputTokens":71,"latencyMs":2967.147291999998},{"questionId":"q7","format":"xml","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":7415,"outputTokens":199,"latencyMs":5902.4375},{"questionId":"q7","format":"yaml","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":5073,"outputTokens":135,"latencyMs":2749.074292000001},{"questionId":"q8","format":"json-pretty","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":6454,"outputTokens":397,"latencyMs":6029.30125},{"questionId":"q8","format":"json-compact","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":4047,"outputTokens":525,"latencyMs":9383.724667000002},{"questionId":"q8","format":"toon","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":2611,"outputTokens":205,"latencyMs":6669.472833},{"questionId":"q8","format":"csv","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":2445,"outputTokens":269,"latencyMs":4525.544707999998},{"questionId":"q8","format":"xml","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":7416,"outputTokens":205,"latencyMs":3729.2334999999985},{"questionId":"q8","format":"yaml","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":5074,"outputTokens":205,"latencyMs":5299.043415999997},{"questionId":"q9","format":"json-pretty","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":6456,"outputTokens":199,"latencyMs":8089.861666000001},{"questionId":"q9","format":"json-compact","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":4049,"outputTokens":135,"latencyMs":3481.7531249999993},{"questionId":"q9","format":"toon","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":2613,"outputTokens":519,"latencyMs":6562.223791000004},{"questionId":"q9","format":"csv","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":2447,"outputTokens":199,"latencyMs":6497.464292000004},{"questionId":"q9","format":"xml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":7418,"outputTokens":263,"latencyMs":4308.770542000006},{"questionId":"q9","format":"yaml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":5076,"outputTokens":455,"latencyMs":7165.029666999995},{"questionId":"q10","format":"json-pretty","model":"gpt-5-nano","expected":"yes","actual":"true","isCorrect":true,"inputTokens":6453,"outputTokens":199,"latencyMs":7847.269749999992},{"questionId":"q10","format":"json-compact","model":"gpt-5-nano","expected":"yes","actual":"true","isCorrect":true,"inputTokens":4046,"outputTokens":263,"latencyMs":5162.878791000003},{"questionId":"q10","format":"toon","model":"gpt-5-nano","expected":"yes","actual":"false","isCorrect":false,"inputTokens":2610,"outputTokens":3015,"latencyMs":44119.86958299999},{"questionId":"q10","format":"csv","model":"gpt-5-nano","expected":"yes","actual":"1","isCorrect":true,"inputTokens":2444,"outputTokens":711,"latencyMs":10732.124499999998},{"questionId":"q10","format":"xml","model":"gpt-5-nano","expected":"yes","actual":"true","isCorrect":true,"inputTokens":7415,"outputTokens":455,"latencyMs":7670.041000000005},{"questionId":"q10","format":"yaml","model":"gpt-5-nano","expected":"yes","actual":"false","isCorrect":false,"inputTokens":5073,"outputTokens":1671,"latencyMs":32276.611915999994},{"questionId":"q11","format":"json-pretty","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":6453,"outputTokens":200,"latencyMs":5021.2276249999995},{"questionId":"q11","format":"json-compact","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":4046,"outputTokens":136,"latencyMs":4285.859167000002},{"questionId":"q11","format":"toon","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":2610,"outputTokens":136,"latencyMs":2937.344834000003},{"questionId":"q11","format":"csv","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":2444,"outputTokens":136,"latencyMs":2420.909792000006},{"questionId":"q11","format":"xml","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":7415,"outputTokens":136,"latencyMs":2657.114041000008},{"questionId":"q11","format":"yaml","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":5073,"outputTokens":136,"latencyMs":4141.019750000007},{"questionId":"q12","format":"json-pretty","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":6451,"outputTokens":327,"latencyMs":8326.443249999997},{"questionId":"q12","format":"json-compact","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":4044,"outputTokens":71,"latencyMs":1961.1611669999984},{"questionId":"q12","format":"toon","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":2608,"outputTokens":455,"latencyMs":9416.313375000012},{"questionId":"q12","format":"csv","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":2442,"outputTokens":199,"latencyMs":3679.7195409999986},{"questionId":"q12","format":"xml","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":7413,"outputTokens":391,"latencyMs":4654.398666000008},{"questionId":"q12","format":"yaml","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":5071,"outputTokens":263,"latencyMs":5227.314999999988},{"questionId":"q13","format":"json-pretty","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":6450,"outputTokens":583,"latencyMs":11891.486790999988},{"questionId":"q13","format":"json-compact","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":4043,"outputTokens":903,"latencyMs":11808.044291999991},{"questionId":"q13","format":"toon","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2607,"outputTokens":1031,"latencyMs":13869.627583000009},{"questionId":"q13","format":"csv","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2441,"outputTokens":1095,"latencyMs":11982.031124999994},{"questionId":"q13","format":"xml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":7412,"outputTokens":1159,"latencyMs":12268.117834000004},{"questionId":"q13","format":"yaml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":5070,"outputTokens":903,"latencyMs":14245.349500000011},{"questionId":"q14","format":"json-pretty","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":6450,"outputTokens":583,"latencyMs":10854.242750000005},{"questionId":"q14","format":"json-compact","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":4043,"outputTokens":1543,"latencyMs":14848.513416999995},{"questionId":"q14","format":"toon","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2607,"outputTokens":1351,"latencyMs":18436.125499999995},{"questionId":"q14","format":"csv","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2441,"outputTokens":583,"latencyMs":7873.872290999992},{"questionId":"q14","format":"xml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":7412,"outputTokens":1159,"latencyMs":16891.326750000007},{"questionId":"q14","format":"yaml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":5070,"outputTokens":775,"latencyMs":11653.730375},{"questionId":"q15","format":"json-pretty","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":6450,"outputTokens":711,"latencyMs":10938.143958},{"questionId":"q15","format":"json-compact","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":4043,"outputTokens":839,"latencyMs":9845.249500000005},{"questionId":"q15","format":"toon","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2607,"outputTokens":1351,"latencyMs":17360.869999999995},{"questionId":"q15","format":"csv","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2441,"outputTokens":583,"latencyMs":6245.138082999998},{"questionId":"q15","format":"xml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":7412,"outputTokens":583,"latencyMs":6926.366333000013},{"questionId":"q15","format":"yaml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":5070,"outputTokens":1159,"latencyMs":10682.860499999995},{"questionId":"q16","format":"json-pretty","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":6455,"outputTokens":2887,"latencyMs":26044.378417},{"questionId":"q16","format":"json-compact","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":4048,"outputTokens":2311,"latencyMs":20886.991708999994},{"questionId":"q16","format":"toon","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":2612,"outputTokens":3207,"latencyMs":38211.708791},{"questionId":"q16","format":"csv","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":2446,"outputTokens":3591,"latencyMs":34722.982124999995},{"questionId":"q16","format":"xml","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":7417,"outputTokens":2759,"latencyMs":27677.160040999996},{"questionId":"q16","format":"yaml","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":5075,"outputTokens":2183,"latencyMs":21999.34112499999},{"questionId":"q17","format":"json-pretty","model":"gpt-5-nano","expected":"67","actual":"68","isCorrect":false,"inputTokens":6455,"outputTokens":2375,"latencyMs":24885.276625},{"questionId":"q17","format":"json-compact","model":"gpt-5-nano","expected":"67","actual":"67","isCorrect":true,"inputTokens":4048,"outputTokens":2567,"latencyMs":23865.78125},{"questionId":"q17","format":"toon","model":"gpt-5-nano","expected":"67","actual":"67","isCorrect":true,"inputTokens":2612,"outputTokens":3271,"latencyMs":33953.05562499999},{"questionId":"q17","format":"csv","model":"gpt-5-nano","expected":"67","actual":"66","isCorrect":false,"inputTokens":2446,"outputTokens":2695,"latencyMs":28272.904916999993},{"questionId":"q17","format":"xml","model":"gpt-5-nano","expected":"67","actual":"66","isCorrect":false,"inputTokens":7417,"outputTokens":2183,"latencyMs":20128.4455},{"questionId":"q17","format":"yaml","model":"gpt-5-nano","expected":"67","actual":"67","isCorrect":true,"inputTokens":5075,"outputTokens":2887,"latencyMs":27213.535542000012},{"questionId":"q18","format":"json-pretty","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":6455,"outputTokens":2119,"latencyMs":23143.38708300001},{"questionId":"q18","format":"json-compact","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":4048,"outputTokens":1735,"latencyMs":19879.81758399999},{"questionId":"q18","format":"toon","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":2612,"outputTokens":1159,"latencyMs":11657.12725000002},{"questionId":"q18","format":"csv","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":2446,"outputTokens":1607,"latencyMs":17132.650707999986},{"questionId":"q18","format":"xml","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":7417,"outputTokens":2119,"latencyMs":27134.673207999993},{"questionId":"q18","format":"yaml","model":"gpt-5-nano","expected":"41","actual":"42","isCorrect":false,"inputTokens":5075,"outputTokens":1671,"latencyMs":17106.441208000004},{"questionId":"q19","format":"json-pretty","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":6451,"outputTokens":135,"latencyMs":2293.6627090000256},{"questionId":"q19","format":"json-compact","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":4044,"outputTokens":135,"latencyMs":2780.268083999981},{"questionId":"q19","format":"toon","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":2608,"outputTokens":199,"latencyMs":4030.7550000000047},{"questionId":"q19","format":"csv","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":2442,"outputTokens":327,"latencyMs":4502.81479199999},{"questionId":"q19","format":"xml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":7413,"outputTokens":199,"latencyMs":5037.844874999981},{"questionId":"q19","format":"yaml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":5071,"outputTokens":199,"latencyMs":2760.7317500000063},{"questionId":"q20","format":"json-pretty","model":"gpt-5-nano","expected":"96503","actual":"96503.32","isCorrect":true,"inputTokens":6452,"outputTokens":4554,"latencyMs":44195.818792000005},{"questionId":"q20","format":"json-compact","model":"gpt-5-nano","expected":"96503","actual":"96493.32","isCorrect":false,"inputTokens":4045,"outputTokens":4362,"latencyMs":49358.45320799999},{"questionId":"q20","format":"toon","model":"gpt-5-nano","expected":"96503","actual":"96503.32","isCorrect":true,"inputTokens":2609,"outputTokens":7882,"latencyMs":80184.430708},{"questionId":"q20","format":"csv","model":"gpt-5-nano","expected":"96503","actual":"96503.32","isCorrect":true,"inputTokens":2443,"outputTokens":8778,"latencyMs":95858.013875},{"questionId":"q20","format":"xml","model":"gpt-5-nano","expected":"96503","actual":"97054.06","isCorrect":false,"inputTokens":7414,"outputTokens":4426,"latencyMs":53834.34679099999},{"questionId":"q20","format":"yaml","model":"gpt-5-nano","expected":"96503","actual":"96503.32","isCorrect":true,"inputTokens":5072,"outputTokens":6666,"latencyMs":84141.234291},{"questionId":"q21","format":"json-pretty","model":"gpt-5-nano","expected":"78","actual":"78","isCorrect":true,"inputTokens":6449,"outputTokens":1415,"latencyMs":16387.246041000006},{"questionId":"q21","format":"json-compact","model":"gpt-5-nano","expected":"78","actual":"78","isCorrect":true,"inputTokens":4042,"outputTokens":1671,"latencyMs":25604.649542},{"questionId":"q21","format":"toon","model":"gpt-5-nano","expected":"78","actual":"78","isCorrect":true,"inputTokens":2606,"outputTokens":1735,"latencyMs":26253.37858400002},{"questionId":"q21","format":"csv","model":"gpt-5-nano","expected":"78","actual":"80","isCorrect":false,"inputTokens":2440,"outputTokens":2439,"latencyMs":42881.88175},{"questionId":"q21","format":"xml","model":"gpt-5-nano","expected":"78","actual":"78","isCorrect":true,"inputTokens":7411,"outputTokens":1543,"latencyMs":18824.236875000002},{"questionId":"q21","format":"yaml","model":"gpt-5-nano","expected":"78","actual":"78","isCorrect":true,"inputTokens":5069,"outputTokens":1351,"latencyMs":19731.303249999997},{"questionId":"q22","format":"json-pretty","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":6449,"outputTokens":1031,"latencyMs":13784.215957999986},{"questionId":"q22","format":"json-compact","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":4042,"outputTokens":1223,"latencyMs":13702.792790999985},{"questionId":"q22","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2606,"outputTokens":1223,"latencyMs":14749.500458000024},{"questionId":"q22","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2440,"outputTokens":1927,"latencyMs":21055.402042},{"questionId":"q22","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":7411,"outputTokens":1031,"latencyMs":16298.56808300002},{"questionId":"q22","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":5069,"outputTokens":1031,"latencyMs":15148.553374999989},{"questionId":"q23","format":"json-pretty","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":6457,"outputTokens":1223,"latencyMs":23748.731792000006},{"questionId":"q23","format":"json-compact","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":4050,"outputTokens":1095,"latencyMs":13191.228082999995},{"questionId":"q23","format":"toon","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2614,"outputTokens":1799,"latencyMs":19708.061292},{"questionId":"q23","format":"csv","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2448,"outputTokens":1863,"latencyMs":17711.145375000022},{"questionId":"q23","format":"xml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":7419,"outputTokens":1287,"latencyMs":14610.536499999987},{"questionId":"q23","format":"yaml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":5077,"outputTokens":1927,"latencyMs":21769.996958999982},{"questionId":"q24","format":"json-pretty","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6457,"outputTokens":1159,"latencyMs":14361.063832999993},{"questionId":"q24","format":"json-compact","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":4050,"outputTokens":903,"latencyMs":10174.464332999982},{"questionId":"q24","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2614,"outputTokens":1415,"latencyMs":14791.934709000023},{"questionId":"q24","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2448,"outputTokens":903,"latencyMs":10505.627374999982},{"questionId":"q24","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":7419,"outputTokens":1415,"latencyMs":15141.258583000017},{"questionId":"q24","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5077,"outputTokens":2503,"latencyMs":25832.709875},{"questionId":"q25","format":"json-pretty","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6457,"outputTokens":1415,"latencyMs":17542.110833000013},{"questionId":"q25","format":"json-compact","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":4050,"outputTokens":1415,"latencyMs":15454.643125000002},{"questionId":"q25","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2614,"outputTokens":1671,"latencyMs":18372.684709000023},{"questionId":"q25","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2448,"outputTokens":1607,"latencyMs":18959.7825},{"questionId":"q25","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":7419,"outputTokens":1351,"latencyMs":17531.771584000002},{"questionId":"q25","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5077,"outputTokens":1863,"latencyMs":20752.042625000002},{"questionId":"q26","format":"json-pretty","model":"gpt-5-nano","expected":"12","actual":"11","isCorrect":false,"inputTokens":6457,"outputTokens":903,"latencyMs":11434.620958999993},{"questionId":"q26","format":"json-compact","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":4050,"outputTokens":1799,"latencyMs":26667.313249999977},{"questionId":"q26","format":"toon","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2614,"outputTokens":1543,"latencyMs":19438.584040999995},{"questionId":"q26","format":"csv","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2448,"outputTokens":1863,"latencyMs":24016.536166999984},{"questionId":"q26","format":"xml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":7419,"outputTokens":1031,"latencyMs":12378.093457999988},{"questionId":"q26","format":"yaml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":5077,"outputTokens":1735,"latencyMs":18425.30349999998},{"questionId":"q27","format":"json-pretty","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6457,"outputTokens":1031,"latencyMs":13011.211625000025},{"questionId":"q27","format":"json-compact","model":"gpt-5-nano","expected":"11","actual":"10","isCorrect":false,"inputTokens":4050,"outputTokens":1031,"latencyMs":12447.250166999991},{"questionId":"q27","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2614,"outputTokens":1799,"latencyMs":21169.804915999994},{"questionId":"q27","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2448,"outputTokens":1095,"latencyMs":13325.901125000004},{"questionId":"q27","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":7419,"outputTokens":1799,"latencyMs":20041.018916},{"questionId":"q27","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5077,"outputTokens":839,"latencyMs":9830.15854200002},{"questionId":"q28","format":"json-pretty","model":"gpt-5-nano","expected":"63","actual":"63","isCorrect":true,"inputTokens":6456,"outputTokens":2695,"latencyMs":34929.70891699998},{"questionId":"q28","format":"json-compact","model":"gpt-5-nano","expected":"63","actual":"63","isCorrect":true,"inputTokens":4049,"outputTokens":2695,"latencyMs":32068.38629199998},{"questionId":"q28","format":"toon","model":"gpt-5-nano","expected":"63","actual":"63","isCorrect":true,"inputTokens":2613,"outputTokens":3207,"latencyMs":35007.753916999965},{"questionId":"q28","format":"csv","model":"gpt-5-nano","expected":"63","actual":"64","isCorrect":false,"inputTokens":2447,"outputTokens":4295,"latencyMs":44454.070875000034},{"questionId":"q28","format":"xml","model":"gpt-5-nano","expected":"63","actual":"63","isCorrect":true,"inputTokens":7418,"outputTokens":4359,"latencyMs":53078.10720800003},{"questionId":"q28","format":"yaml","model":"gpt-5-nano","expected":"63","actual":"63","isCorrect":true,"inputTokens":5076,"outputTokens":5383,"latencyMs":62424.056374999986},{"questionId":"q29","format":"json-pretty","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":6456,"outputTokens":3527,"latencyMs":38803.08712500002},{"questionId":"q29","format":"json-compact","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":4049,"outputTokens":2631,"latencyMs":36088.47983299999},{"questionId":"q29","format":"toon","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":2613,"outputTokens":3463,"latencyMs":37541.076541999995},{"questionId":"q29","format":"csv","model":"gpt-5-nano","expected":"53","actual":"54","isCorrect":false,"inputTokens":2447,"outputTokens":4423,"latencyMs":47284.71529199998},{"questionId":"q29","format":"xml","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":7418,"outputTokens":2375,"latencyMs":26526.219709000026},{"questionId":"q29","format":"yaml","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":5076,"outputTokens":2183,"latencyMs":22403.027584000025},{"questionId":"q30","format":"json-pretty","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":6456,"outputTokens":1607,"latencyMs":17674.940834000008},{"questionId":"q30","format":"json-compact","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":4049,"outputTokens":3591,"latencyMs":36116.21895800001},{"questionId":"q30","format":"toon","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":2613,"outputTokens":3271,"latencyMs":28577.131834},{"questionId":"q30","format":"csv","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":2447,"outputTokens":3527,"latencyMs":35360.875459},{"questionId":"q30","format":"xml","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":7418,"outputTokens":2247,"latencyMs":22521.478082999995},{"questionId":"q30","format":"yaml","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":5076,"outputTokens":2503,"latencyMs":25049.46987499995},{"questionId":"q31","format":"json-pretty","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6457,"outputTokens":2375,"latencyMs":23942.841625},{"questionId":"q31","format":"json-compact","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":4050,"outputTokens":1223,"latencyMs":32165.900916000013},{"questionId":"q31","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2614,"outputTokens":1351,"latencyMs":14162.266958999971},{"questionId":"q31","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2448,"outputTokens":1479,"latencyMs":17867.209082999965},{"questionId":"q31","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":7419,"outputTokens":1671,"latencyMs":16419.887457999983},{"questionId":"q31","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5077,"outputTokens":1735,"latencyMs":18486.571375},{"questionId":"q32","format":"json-pretty","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":6457,"outputTokens":1351,"latencyMs":13326.963042000018},{"questionId":"q32","format":"json-compact","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":4050,"outputTokens":1287,"latencyMs":12924.029000000039},{"questionId":"q32","format":"toon","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":2614,"outputTokens":1671,"latencyMs":68753.054917},{"questionId":"q32","format":"csv","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":2448,"outputTokens":1735,"latencyMs":20531.763833000034},{"questionId":"q32","format":"xml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":7419,"outputTokens":1479,"latencyMs":25654.030582999985},{"questionId":"q32","format":"yaml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":5077,"outputTokens":1159,"latencyMs":14334.90933299996},{"questionId":"q33","format":"json-pretty","model":"gpt-5-nano","expected":"15","actual":"16","isCorrect":false,"inputTokens":6457,"outputTokens":1991,"latencyMs":22980.818790999998},{"questionId":"q33","format":"json-compact","model":"gpt-5-nano","expected":"15","actual":"15","isCorrect":true,"inputTokens":4050,"outputTokens":1223,"latencyMs":14265.010167},{"questionId":"q33","format":"toon","model":"gpt-5-nano","expected":"15","actual":"15","isCorrect":true,"inputTokens":2614,"outputTokens":1223,"latencyMs":16021.799958000018},{"questionId":"q33","format":"csv","model":"gpt-5-nano","expected":"15","actual":"15","isCorrect":true,"inputTokens":2448,"outputTokens":1095,"latencyMs":12047.490749999997},{"questionId":"q33","format":"xml","model":"gpt-5-nano","expected":"15","actual":"15","isCorrect":true,"inputTokens":7419,"outputTokens":1735,"latencyMs":20477.510917000007},{"questionId":"q33","format":"yaml","model":"gpt-5-nano","expected":"15","actual":"15","isCorrect":true,"inputTokens":5077,"outputTokens":1159,"latencyMs":12986.920958000002},{"questionId":"q34","format":"json-pretty","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":6451,"outputTokens":647,"latencyMs":7786.604583000008},{"questionId":"q34","format":"json-compact","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":4044,"outputTokens":1159,"latencyMs":12812.286625000008},{"questionId":"q34","format":"toon","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2608,"outputTokens":1287,"latencyMs":13650.962291000003},{"questionId":"q34","format":"csv","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2442,"outputTokens":1543,"latencyMs":22916.750333000033},{"questionId":"q34","format":"xml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":7413,"outputTokens":1735,"latencyMs":27190.99129199999},{"questionId":"q34","format":"yaml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":5071,"outputTokens":1223,"latencyMs":18113.423624999996},{"questionId":"q35","format":"json-pretty","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6451,"outputTokens":839,"latencyMs":14319.936749999993},{"questionId":"q35","format":"json-compact","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":4044,"outputTokens":1159,"latencyMs":19572.69550000003},{"questionId":"q35","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2608,"outputTokens":2183,"latencyMs":27670.17745899997},{"questionId":"q35","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2442,"outputTokens":2055,"latencyMs":30155.347083},{"questionId":"q35","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":7413,"outputTokens":1735,"latencyMs":17707.77020899998},{"questionId":"q35","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5071,"outputTokens":1671,"latencyMs":17215.395207999973},{"questionId":"q36","format":"json-pretty","model":"gpt-5-nano","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":10807,"outputTokens":329,"latencyMs":7442.371583},{"questionId":"q36","format":"json-compact","model":"gpt-5-nano","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":6888,"outputTokens":329,"latencyMs":6396.415417000011},{"questionId":"q36","format":"toon","model":"gpt-5-nano","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":7325,"outputTokens":393,"latencyMs":4602.9237079999875},{"questionId":"q36","format":"xml","model":"gpt-5-nano","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":12115,"outputTokens":265,"latencyMs":4785.739166999992},{"questionId":"q36","format":"yaml","model":"gpt-5-nano","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":8439,"outputTokens":137,"latencyMs":7061.959375000035},{"questionId":"q37","format":"json-pretty","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":10807,"outputTokens":328,"latencyMs":6893.168333999987},{"questionId":"q37","format":"json-compact","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":6888,"outputTokens":392,"latencyMs":5599.888457999972},{"questionId":"q37","format":"toon","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":7325,"outputTokens":136,"latencyMs":3737.4124590000138},{"questionId":"q37","format":"xml","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":12115,"outputTokens":200,"latencyMs":6961.4233330000425},{"questionId":"q37","format":"yaml","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":8439,"outputTokens":264,"latencyMs":5140.496583},{"questionId":"q38","format":"json-pretty","model":"gpt-5-nano","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":10807,"outputTokens":585,"latencyMs":7864.396957999968},{"questionId":"q38","format":"json-compact","model":"gpt-5-nano","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":6888,"outputTokens":521,"latencyMs":6421.59816600004},{"questionId":"q38","format":"toon","model":"gpt-5-nano","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":7325,"outputTokens":265,"latencyMs":3733.0714159999625},{"questionId":"q38","format":"xml","model":"gpt-5-nano","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":12115,"outputTokens":393,"latencyMs":9006.905709000013},{"questionId":"q38","format":"yaml","model":"gpt-5-nano","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":8439,"outputTokens":329,"latencyMs":6850.975207999989},{"questionId":"q39","format":"json-pretty","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":10807,"outputTokens":391,"latencyMs":5922.136790999968},{"questionId":"q39","format":"json-compact","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":6888,"outputTokens":391,"latencyMs":5514.317334000021},{"questionId":"q39","format":"toon","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":7325,"outputTokens":199,"latencyMs":5819.365749999997},{"questionId":"q39","format":"xml","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":12115,"outputTokens":327,"latencyMs":4577.510000000009},{"questionId":"q39","format":"yaml","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":8439,"outputTokens":199,"latencyMs":3300.337999999989},{"questionId":"q40","format":"json-pretty","model":"gpt-5-nano","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":10807,"outputTokens":522,"latencyMs":10160.32945900003},{"questionId":"q40","format":"json-compact","model":"gpt-5-nano","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":6888,"outputTokens":586,"latencyMs":8726.273291999998},{"questionId":"q40","format":"toon","model":"gpt-5-nano","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":7325,"outputTokens":458,"latencyMs":7168.197749999992},{"questionId":"q40","format":"xml","model":"gpt-5-nano","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":12115,"outputTokens":330,"latencyMs":9622.921792000008},{"questionId":"q40","format":"yaml","model":"gpt-5-nano","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":8439,"outputTokens":458,"latencyMs":12277.717749999953},{"questionId":"q41","format":"json-pretty","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":10807,"outputTokens":647,"latencyMs":10452.618916000007},{"questionId":"q41","format":"json-compact","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":6888,"outputTokens":327,"latencyMs":7993.501333000022},{"questionId":"q41","format":"toon","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":7325,"outputTokens":391,"latencyMs":6246.408166999987},{"questionId":"q41","format":"xml","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":12115,"outputTokens":327,"latencyMs":6159.920333000016},{"questionId":"q41","format":"yaml","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":8439,"outputTokens":135,"latencyMs":4936.892832999991},{"questionId":"q42","format":"json-pretty","model":"gpt-5-nano","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":10807,"outputTokens":330,"latencyMs":6513.42883400002},{"questionId":"q42","format":"json-compact","model":"gpt-5-nano","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":6888,"outputTokens":266,"latencyMs":5169.701290999947},{"questionId":"q42","format":"toon","model":"gpt-5-nano","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":7325,"outputTokens":394,"latencyMs":6359.4133749999455},{"questionId":"q42","format":"xml","model":"gpt-5-nano","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":12115,"outputTokens":458,"latencyMs":7373.879582999973},{"questionId":"q42","format":"yaml","model":"gpt-5-nano","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":8439,"outputTokens":586,"latencyMs":9219.977708999999},{"questionId":"q43","format":"json-pretty","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":10807,"outputTokens":456,"latencyMs":6681.0621670000255},{"questionId":"q43","format":"json-compact","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":6888,"outputTokens":456,"latencyMs":6576.267416999966},{"questionId":"q43","format":"toon","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":7325,"outputTokens":328,"latencyMs":5022.24679200002},{"questionId":"q43","format":"xml","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":12115,"outputTokens":200,"latencyMs":3512.9928749999963},{"questionId":"q43","format":"yaml","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8439,"outputTokens":264,"latencyMs":5002.190166999993},{"questionId":"q44","format":"json-pretty","model":"gpt-5-nano","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":10808,"outputTokens":332,"latencyMs":4617.067666999996},{"questionId":"q44","format":"json-compact","model":"gpt-5-nano","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":6889,"outputTokens":780,"latencyMs":10020.49199999997},{"questionId":"q44","format":"toon","model":"gpt-5-nano","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":7326,"outputTokens":268,"latencyMs":7557.820707999985},{"questionId":"q44","format":"xml","model":"gpt-5-nano","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":12116,"outputTokens":588,"latencyMs":8139.475207999989},{"questionId":"q44","format":"yaml","model":"gpt-5-nano","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":8440,"outputTokens":268,"latencyMs":4885.101999999955},{"questionId":"q45","format":"json-pretty","model":"gpt-5-nano","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":10808,"outputTokens":399,"latencyMs":5571.73229200003},{"questionId":"q45","format":"json-compact","model":"gpt-5-nano","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":6889,"outputTokens":655,"latencyMs":10517.11179200001},{"questionId":"q45","format":"toon","model":"gpt-5-nano","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":7326,"outputTokens":271,"latencyMs":4059.1732920000213},{"questionId":"q45","format":"xml","model":"gpt-5-nano","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":12116,"outputTokens":143,"latencyMs":3110.5762920000125},{"questionId":"q45","format":"yaml","model":"gpt-5-nano","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":8440,"outputTokens":207,"latencyMs":3715.018374999985},{"questionId":"q46","format":"json-pretty","model":"gpt-5-nano","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":10808,"outputTokens":844,"latencyMs":10064.53433299996},{"questionId":"q46","format":"json-compact","model":"gpt-5-nano","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":6889,"outputTokens":716,"latencyMs":11513.566083000042},{"questionId":"q46","format":"toon","model":"gpt-5-nano","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":7326,"outputTokens":332,"latencyMs":4870.685041000019},{"questionId":"q46","format":"xml","model":"gpt-5-nano","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":12116,"outputTokens":396,"latencyMs":5502.409417000017},{"questionId":"q46","format":"yaml","model":"gpt-5-nano","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":8440,"outputTokens":204,"latencyMs":3546.790500000003},{"questionId":"q47","format":"json-pretty","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":10807,"outputTokens":711,"latencyMs":8666.248917000019},{"questionId":"q47","format":"json-compact","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":6888,"outputTokens":391,"latencyMs":5154.15979200002},{"questionId":"q47","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":7325,"outputTokens":455,"latencyMs":6469.648125000007},{"questionId":"q47","format":"xml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":12115,"outputTokens":967,"latencyMs":12026.984790999966},{"questionId":"q47","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":8439,"outputTokens":327,"latencyMs":4457.898167000036},{"questionId":"q48","format":"json-pretty","model":"gpt-5-nano","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":10808,"outputTokens":266,"latencyMs":4677.685125000018},{"questionId":"q48","format":"json-compact","model":"gpt-5-nano","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":6889,"outputTokens":458,"latencyMs":7593.830499999982},{"questionId":"q48","format":"toon","model":"gpt-5-nano","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":7326,"outputTokens":330,"latencyMs":7164.58362499997},{"questionId":"q48","format":"xml","model":"gpt-5-nano","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":12116,"outputTokens":202,"latencyMs":4569.4382500000065},{"questionId":"q48","format":"yaml","model":"gpt-5-nano","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":8440,"outputTokens":266,"latencyMs":8655.245208000008},{"questionId":"q49","format":"json-pretty","model":"gpt-5-nano","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":10808,"outputTokens":333,"latencyMs":9397.804249999986},{"questionId":"q49","format":"json-compact","model":"gpt-5-nano","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":6889,"outputTokens":525,"latencyMs":7369.637999999977},{"questionId":"q49","format":"toon","model":"gpt-5-nano","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":7326,"outputTokens":269,"latencyMs":7476.084625000018},{"questionId":"q49","format":"xml","model":"gpt-5-nano","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":12116,"outputTokens":205,"latencyMs":4457.102749999962},{"questionId":"q49","format":"yaml","model":"gpt-5-nano","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":8440,"outputTokens":205,"latencyMs":3285.5180420000106},{"questionId":"q50","format":"json-pretty","model":"gpt-5-nano","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":10808,"outputTokens":332,"latencyMs":5102.7447909999755},{"questionId":"q50","format":"json-compact","model":"gpt-5-nano","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":6889,"outputTokens":652,"latencyMs":8489.679457999999},{"questionId":"q50","format":"toon","model":"gpt-5-nano","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":7326,"outputTokens":204,"latencyMs":7314.751374999993},{"questionId":"q50","format":"xml","model":"gpt-5-nano","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":12116,"outputTokens":332,"latencyMs":5297.356582999986},{"questionId":"q50","format":"yaml","model":"gpt-5-nano","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":8440,"outputTokens":460,"latencyMs":5892.525124999986},{"questionId":"q51","format":"json-pretty","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":10807,"outputTokens":519,"latencyMs":7676.625582999957},{"questionId":"q51","format":"json-compact","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":6888,"outputTokens":711,"latencyMs":10736.315040999965},{"questionId":"q51","format":"toon","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":7325,"outputTokens":327,"latencyMs":7610.965416999999},{"questionId":"q51","format":"xml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":12115,"outputTokens":647,"latencyMs":9436.054707999981},{"questionId":"q51","format":"yaml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":8439,"outputTokens":583,"latencyMs":7257.893417000014},{"questionId":"q52","format":"json-pretty","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":10804,"outputTokens":1031,"latencyMs":12227.468916999991},{"questionId":"q52","format":"json-compact","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6885,"outputTokens":903,"latencyMs":10091.694916000008},{"questionId":"q52","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":7322,"outputTokens":839,"latencyMs":10802.154916999978},{"questionId":"q52","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":12112,"outputTokens":1287,"latencyMs":15510.80912499997},{"questionId":"q52","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":8436,"outputTokens":775,"latencyMs":11378.500208000012},{"questionId":"q53","format":"json-pretty","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":10804,"outputTokens":775,"latencyMs":8308.13866700005},{"questionId":"q53","format":"json-compact","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6885,"outputTokens":775,"latencyMs":9544.55587500002},{"questionId":"q53","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":7322,"outputTokens":775,"latencyMs":17279.684707999986},{"questionId":"q53","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":12112,"outputTokens":967,"latencyMs":9357.227749999962},{"questionId":"q53","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":8436,"outputTokens":647,"latencyMs":7357.085124999983},{"questionId":"q54","format":"json-pretty","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":10805,"outputTokens":903,"latencyMs":9983.735792000021},{"questionId":"q54","format":"json-compact","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6886,"outputTokens":775,"latencyMs":8389.489208000014},{"questionId":"q54","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":7323,"outputTokens":711,"latencyMs":8864.96325000003},{"questionId":"q54","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":12113,"outputTokens":903,"latencyMs":11110.859708000033},{"questionId":"q54","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":8437,"outputTokens":647,"latencyMs":9254.883999999962},{"questionId":"q55","format":"json-pretty","model":"gpt-5-nano","expected":"34904.81","actual":"34682.16","isCorrect":false,"inputTokens":10805,"outputTokens":3210,"latencyMs":35287.34470799996},{"questionId":"q55","format":"json-compact","model":"gpt-5-nano","expected":"34904.81","actual":"35004.81","isCorrect":false,"inputTokens":6886,"outputTokens":3082,"latencyMs":53411.28241600003},{"questionId":"q55","format":"toon","model":"gpt-5-nano","expected":"34904.81","actual":"34904.81","isCorrect":true,"inputTokens":7323,"outputTokens":5386,"latencyMs":65610.21349999995},{"questionId":"q55","format":"xml","model":"gpt-5-nano","expected":"34904.81","actual":"35004.81","isCorrect":false,"inputTokens":12113,"outputTokens":3082,"latencyMs":30146.02779099997},{"questionId":"q55","format":"yaml","model":"gpt-5-nano","expected":"34904.81","actual":"34904.81","isCorrect":true,"inputTokens":8437,"outputTokens":5194,"latencyMs":47443.21795800002},{"questionId":"q56","format":"json-pretty","model":"gpt-5-nano","expected":"698.10","actual":"698.10","isCorrect":true,"inputTokens":10803,"outputTokens":7433,"latencyMs":99668.25054200005},{"questionId":"q56","format":"json-compact","model":"gpt-5-nano","expected":"698.10","actual":"698.10","isCorrect":true,"inputTokens":6884,"outputTokens":2825,"latencyMs":29726.150667000038},{"questionId":"q56","format":"toon","model":"gpt-5-nano","expected":"698.10","actual":"698.0962","isCorrect":true,"inputTokens":7321,"outputTokens":3402,"latencyMs":29333.87345799996},{"questionId":"q56","format":"xml","model":"gpt-5-nano","expected":"698.10","actual":"698.10","isCorrect":true,"inputTokens":12111,"outputTokens":8713,"latencyMs":88680.83875000005},{"questionId":"q56","format":"yaml","model":"gpt-5-nano","expected":"698.10","actual":"700.10","isCorrect":false,"inputTokens":8435,"outputTokens":2185,"latencyMs":22504.17745899997},{"questionId":"q57","format":"json-pretty","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":10804,"outputTokens":263,"latencyMs":3635.718082999985},{"questionId":"q57","format":"json-compact","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":6885,"outputTokens":455,"latencyMs":6344.6660000000265},{"questionId":"q57","format":"toon","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":7322,"outputTokens":135,"latencyMs":2214.3594589999993},{"questionId":"q57","format":"xml","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":12112,"outputTokens":199,"latencyMs":3028.1589169999934},{"questionId":"q57","format":"yaml","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":8436,"outputTokens":263,"latencyMs":3726.436791999964},{"questionId":"q58","format":"json-pretty","model":"gpt-5-nano","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":10803,"outputTokens":1034,"latencyMs":17297.540125000058},{"questionId":"q58","format":"json-compact","model":"gpt-5-nano","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":6884,"outputTokens":1226,"latencyMs":15772.636416999972},{"questionId":"q58","format":"toon","model":"gpt-5-nano","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":7321,"outputTokens":714,"latencyMs":7838.149208000046},{"questionId":"q58","format":"xml","model":"gpt-5-nano","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":12111,"outputTokens":1418,"latencyMs":18521.650375000027},{"questionId":"q58","format":"yaml","model":"gpt-5-nano","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":8435,"outputTokens":1546,"latencyMs":16359.941374999937},{"questionId":"q59","format":"json-pretty","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":10807,"outputTokens":1543,"latencyMs":27746.01329100004},{"questionId":"q59","format":"json-compact","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":6888,"outputTokens":1799,"latencyMs":21588.628625000012},{"questionId":"q59","format":"toon","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":7325,"outputTokens":1607,"latencyMs":22641.205915999948},{"questionId":"q59","format":"xml","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":12115,"outputTokens":2119,"latencyMs":21735.99774999998},{"questionId":"q59","format":"yaml","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":8439,"outputTokens":1415,"latencyMs":15331.604541999986},{"questionId":"q60","format":"json-pretty","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":10807,"outputTokens":1671,"latencyMs":23176.00108299998},{"questionId":"q60","format":"json-compact","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":6888,"outputTokens":1031,"latencyMs":11122.897291000001},{"questionId":"q60","format":"toon","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":7325,"outputTokens":2119,"latencyMs":44203.42183300003},{"questionId":"q60","format":"xml","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":12115,"outputTokens":1415,"latencyMs":21619.647499999963},{"questionId":"q60","format":"yaml","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":8439,"outputTokens":2055,"latencyMs":20646.457915999927},{"questionId":"q61","format":"json-pretty","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":10807,"outputTokens":1799,"latencyMs":22455.639375000028},{"questionId":"q61","format":"json-compact","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":6888,"outputTokens":1223,"latencyMs":12465.433750000084},{"questionId":"q61","format":"toon","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":7325,"outputTokens":1543,"latencyMs":17901.21987499995},{"questionId":"q61","format":"xml","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":12115,"outputTokens":1351,"latencyMs":21725.661124999984},{"questionId":"q61","format":"yaml","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":8439,"outputTokens":1479,"latencyMs":14143.484124999959},{"questionId":"q62","format":"json-pretty","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":10811,"outputTokens":1095,"latencyMs":11719.594000000041},{"questionId":"q62","format":"json-compact","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":6892,"outputTokens":1543,"latencyMs":16026.440790999914},{"questionId":"q62","format":"toon","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":7329,"outputTokens":839,"latencyMs":8525.078959000064},{"questionId":"q62","format":"xml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":12119,"outputTokens":1031,"latencyMs":11283.568582999986},{"questionId":"q62","format":"yaml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":8443,"outputTokens":1159,"latencyMs":13248.422166000004},{"questionId":"q63","format":"json-pretty","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":10811,"outputTokens":903,"latencyMs":10624.811125000007},{"questionId":"q63","format":"json-compact","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":6892,"outputTokens":1287,"latencyMs":15027.778207999887},{"questionId":"q63","format":"toon","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":7329,"outputTokens":967,"latencyMs":10102.057166000013},{"questionId":"q63","format":"xml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":12119,"outputTokens":1223,"latencyMs":14080.474375000107},{"questionId":"q63","format":"yaml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":8443,"outputTokens":839,"latencyMs":10806.409125000006},{"questionId":"q64","format":"json-pretty","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":10812,"outputTokens":647,"latencyMs":8619.796208000043},{"questionId":"q64","format":"json-compact","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6893,"outputTokens":1095,"latencyMs":11266.89533300011},{"questionId":"q64","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":7330,"outputTokens":903,"latencyMs":10153.941749999998},{"questionId":"q64","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":12120,"outputTokens":903,"latencyMs":10022.941333999974},{"questionId":"q64","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":8444,"outputTokens":1031,"latencyMs":11607.239833},{"questionId":"q65","format":"json-pretty","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":10812,"outputTokens":839,"latencyMs":9680.83516700007},{"questionId":"q65","format":"json-compact","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":6893,"outputTokens":1351,"latencyMs":13172.463165999972},{"questionId":"q65","format":"toon","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":7330,"outputTokens":967,"latencyMs":13761.158374999999},{"questionId":"q65","format":"xml","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":12120,"outputTokens":775,"latencyMs":8579.024916999973},{"questionId":"q65","format":"yaml","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":8444,"outputTokens":967,"latencyMs":11227.277834000066},{"questionId":"q66","format":"json-pretty","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":10811,"outputTokens":1095,"latencyMs":11719.79470800003},{"questionId":"q66","format":"json-compact","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":6892,"outputTokens":1287,"latencyMs":12974.757499999949},{"questionId":"q66","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":7329,"outputTokens":1159,"latencyMs":12100.158374999999},{"questionId":"q66","format":"xml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":12119,"outputTokens":1607,"latencyMs":18089.76549999998},{"questionId":"q66","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":8443,"outputTokens":1351,"latencyMs":14901.317249999964},{"questionId":"q67","format":"json-pretty","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":10811,"outputTokens":967,"latencyMs":13135.226917000022},{"questionId":"q67","format":"json-compact","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":6892,"outputTokens":1479,"latencyMs":17517.77812500007},{"questionId":"q67","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":7329,"outputTokens":1159,"latencyMs":14243.380082999938},{"questionId":"q67","format":"xml","model":"gpt-5-nano","expected":"3","actual":"8","isCorrect":false,"inputTokens":12119,"outputTokens":4551,"latencyMs":60141.679957999964},{"questionId":"q67","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":8443,"outputTokens":1223,"latencyMs":14046.266375000007},{"questionId":"q68","format":"json-pretty","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":10812,"outputTokens":1095,"latencyMs":13080.296417000005},{"questionId":"q68","format":"json-compact","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":6893,"outputTokens":1095,"latencyMs":12948.556707999902},{"questionId":"q68","format":"toon","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":7330,"outputTokens":967,"latencyMs":11987.064000000013},{"questionId":"q68","format":"xml","model":"gpt-5-nano","expected":"5","actual":"4","isCorrect":false,"inputTokens":12120,"outputTokens":2375,"latencyMs":25660.169375000056},{"questionId":"q68","format":"yaml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":8444,"outputTokens":1095,"latencyMs":13157.490292000002},{"questionId":"q69","format":"json-pretty","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":10813,"outputTokens":1991,"latencyMs":22131.704499999993},{"questionId":"q69","format":"json-compact","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":6894,"outputTokens":2503,"latencyMs":35345.66633400007},{"questionId":"q69","format":"toon","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":7331,"outputTokens":1799,"latencyMs":20451.80954200006},{"questionId":"q69","format":"xml","model":"gpt-5-nano","expected":"20","actual":"23","isCorrect":false,"inputTokens":12121,"outputTokens":4423,"latencyMs":45258.41941600002},{"questionId":"q69","format":"yaml","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":8445,"outputTokens":3335,"latencyMs":34735.526083000004},{"questionId":"q70","format":"json-pretty","model":"gpt-5-nano","expected":"19","actual":"19","isCorrect":true,"inputTokens":10813,"outputTokens":2887,"latencyMs":41997.15891700005},{"questionId":"q70","format":"json-compact","model":"gpt-5-nano","expected":"19","actual":"19","isCorrect":true,"inputTokens":6894,"outputTokens":2503,"latencyMs":39163.075124999974},{"questionId":"q70","format":"toon","model":"gpt-5-nano","expected":"19","actual":"19","isCorrect":true,"inputTokens":7331,"outputTokens":2503,"latencyMs":27668.932207999984},{"questionId":"q70","format":"xml","model":"gpt-5-nano","expected":"19","actual":"30","isCorrect":false,"inputTokens":12121,"outputTokens":7303,"latencyMs":115098.00795900007},{"questionId":"q70","format":"yaml","model":"gpt-5-nano","expected":"19","actual":"18","isCorrect":false,"inputTokens":8445,"outputTokens":2247,"latencyMs":46982.19175},{"questionId":"q71","format":"json-pretty","model":"gpt-5-nano","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":3772,"outputTokens":72,"latencyMs":2913.5479589999886},{"questionId":"q71","format":"json-compact","model":"gpt-5-nano","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":2445,"outputTokens":136,"latencyMs":2803.939374999958},{"questionId":"q71","format":"toon","model":"gpt-5-nano","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":1643,"outputTokens":200,"latencyMs":6143.997375000035},{"questionId":"q71","format":"csv","model":"gpt-5-nano","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":1501,"outputTokens":264,"latencyMs":3737.9859579999465},{"questionId":"q71","format":"xml","model":"gpt-5-nano","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":4478,"outputTokens":136,"latencyMs":6451.102792000049},{"questionId":"q71","format":"yaml","model":"gpt-5-nano","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":3043,"outputTokens":136,"latencyMs":3775.4380420000525},{"questionId":"q72","format":"json-pretty","model":"gpt-5-nano","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":3772,"outputTokens":202,"latencyMs":5033.039417000022},{"questionId":"q72","format":"json-compact","model":"gpt-5-nano","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":2445,"outputTokens":266,"latencyMs":4111.494624999934},{"questionId":"q72","format":"toon","model":"gpt-5-nano","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":1643,"outputTokens":394,"latencyMs":8638.389167000074},{"questionId":"q72","format":"csv","model":"gpt-5-nano","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":1501,"outputTokens":458,"latencyMs":8524.106499999994},{"questionId":"q72","format":"xml","model":"gpt-5-nano","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":4478,"outputTokens":330,"latencyMs":3970.999124999973},{"questionId":"q72","format":"yaml","model":"gpt-5-nano","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":3043,"outputTokens":394,"latencyMs":4908.646542000002},{"questionId":"q73","format":"json-pretty","model":"gpt-5-nano","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":3773,"outputTokens":393,"latencyMs":5176.399416},{"questionId":"q73","format":"json-compact","model":"gpt-5-nano","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":2446,"outputTokens":393,"latencyMs":4801.460207999917},{"questionId":"q73","format":"toon","model":"gpt-5-nano","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":1644,"outputTokens":393,"latencyMs":6057.693375000032},{"questionId":"q73","format":"csv","model":"gpt-5-nano","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":1502,"outputTokens":265,"latencyMs":3750.650166999898},{"questionId":"q73","format":"xml","model":"gpt-5-nano","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":4479,"outputTokens":265,"latencyMs":3854.7792090000585},{"questionId":"q73","format":"yaml","model":"gpt-5-nano","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":3044,"outputTokens":393,"latencyMs":6347.295958000002},{"questionId":"q74","format":"json-pretty","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":3773,"outputTokens":199,"latencyMs":2818.6481669999193},{"questionId":"q74","format":"json-compact","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":2446,"outputTokens":583,"latencyMs":7016.024041999946},{"questionId":"q74","format":"toon","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":1644,"outputTokens":327,"latencyMs":11679.409291999997},{"questionId":"q74","format":"csv","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":1502,"outputTokens":263,"latencyMs":8099.556624999968},{"questionId":"q74","format":"xml","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":4479,"outputTokens":263,"latencyMs":5838.912250000052},{"questionId":"q74","format":"yaml","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":3044,"outputTokens":199,"latencyMs":4037.007084000041},{"questionId":"q75","format":"json-pretty","model":"gpt-5-nano","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":3772,"outputTokens":200,"latencyMs":3196.2014169999165},{"questionId":"q75","format":"json-compact","model":"gpt-5-nano","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":2445,"outputTokens":456,"latencyMs":5860.588916999986},{"questionId":"q75","format":"toon","model":"gpt-5-nano","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":1643,"outputTokens":136,"latencyMs":7298.248332999996},{"questionId":"q75","format":"csv","model":"gpt-5-nano","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":1501,"outputTokens":136,"latencyMs":2674.1493330000667},{"questionId":"q75","format":"xml","model":"gpt-5-nano","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":4478,"outputTokens":136,"latencyMs":4064.031667000032},{"questionId":"q75","format":"yaml","model":"gpt-5-nano","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":3043,"outputTokens":264,"latencyMs":5344.256874999963},{"questionId":"q76","format":"json-pretty","model":"gpt-5-nano","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":3772,"outputTokens":522,"latencyMs":7330.072791999904},{"questionId":"q76","format":"json-compact","model":"gpt-5-nano","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":2445,"outputTokens":650,"latencyMs":6851.327042000019},{"questionId":"q76","format":"toon","model":"gpt-5-nano","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":1643,"outputTokens":522,"latencyMs":5857.433624999947},{"questionId":"q76","format":"csv","model":"gpt-5-nano","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":1501,"outputTokens":522,"latencyMs":6343.291249999893},{"questionId":"q76","format":"xml","model":"gpt-5-nano","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":4478,"outputTokens":266,"latencyMs":4330.397083999938},{"questionId":"q76","format":"yaml","model":"gpt-5-nano","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":3043,"outputTokens":394,"latencyMs":5960.957958000014},{"questionId":"q77","format":"json-pretty","model":"gpt-5-nano","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":3773,"outputTokens":329,"latencyMs":5319.284916999983},{"questionId":"q77","format":"json-compact","model":"gpt-5-nano","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":2446,"outputTokens":329,"latencyMs":4361.255540999933},{"questionId":"q77","format":"toon","model":"gpt-5-nano","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":1644,"outputTokens":585,"latencyMs":7900.699583000038},{"questionId":"q77","format":"csv","model":"gpt-5-nano","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":1502,"outputTokens":457,"latencyMs":5575.410874999943},{"questionId":"q77","format":"xml","model":"gpt-5-nano","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":4479,"outputTokens":521,"latencyMs":8356.306833000039},{"questionId":"q77","format":"yaml","model":"gpt-5-nano","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":3044,"outputTokens":329,"latencyMs":7722.601000000024},{"questionId":"q78","format":"json-pretty","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":3773,"outputTokens":263,"latencyMs":5472.443542000023},{"questionId":"q78","format":"json-compact","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":2446,"outputTokens":135,"latencyMs":3127.7964580000844},{"questionId":"q78","format":"toon","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":1644,"outputTokens":263,"latencyMs":3561.4773750000168},{"questionId":"q78","format":"csv","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":1502,"outputTokens":199,"latencyMs":2641.831709000049},{"questionId":"q78","format":"xml","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":4479,"outputTokens":263,"latencyMs":3984.2812919999706},{"questionId":"q78","format":"yaml","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":3044,"outputTokens":327,"latencyMs":4146.360374999931},{"questionId":"q79","format":"json-pretty","model":"gpt-5-nano","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":3772,"outputTokens":200,"latencyMs":3001.3016669999342},{"questionId":"q79","format":"json-compact","model":"gpt-5-nano","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":2445,"outputTokens":200,"latencyMs":3991.4621250000782},{"questionId":"q79","format":"toon","model":"gpt-5-nano","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":1643,"outputTokens":456,"latencyMs":7423.164000000106},{"questionId":"q79","format":"csv","model":"gpt-5-nano","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":1501,"outputTokens":328,"latencyMs":3855.023082999978},{"questionId":"q79","format":"xml","model":"gpt-5-nano","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":4478,"outputTokens":264,"latencyMs":4289.763542000088},{"questionId":"q79","format":"yaml","model":"gpt-5-nano","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":3043,"outputTokens":200,"latencyMs":3733.6695410000393},{"questionId":"q80","format":"json-pretty","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":3769,"outputTokens":903,"latencyMs":9863.215167000075},{"questionId":"q80","format":"json-compact","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":2442,"outputTokens":775,"latencyMs":9231.59695799998},{"questionId":"q80","format":"toon","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":1640,"outputTokens":391,"latencyMs":5704.973082999932},{"questionId":"q80","format":"csv","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":1498,"outputTokens":583,"latencyMs":8583.113332999987},{"questionId":"q80","format":"xml","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":4475,"outputTokens":327,"latencyMs":5083.1711249999935},{"questionId":"q80","format":"yaml","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":3040,"outputTokens":455,"latencyMs":6700.828666999936},{"questionId":"q81","format":"json-pretty","model":"gpt-5-nano","expected":"328320","actual":"328320","isCorrect":true,"inputTokens":3770,"outputTokens":1608,"latencyMs":18562.350708000013},{"questionId":"q81","format":"json-compact","model":"gpt-5-nano","expected":"328320","actual":"328320","isCorrect":true,"inputTokens":2443,"outputTokens":2312,"latencyMs":21658.89724999992},{"questionId":"q81","format":"toon","model":"gpt-5-nano","expected":"328320","actual":"328320","isCorrect":true,"inputTokens":1641,"outputTokens":2760,"latencyMs":26115.369374999893},{"questionId":"q81","format":"csv","model":"gpt-5-nano","expected":"328320","actual":"328320","isCorrect":true,"inputTokens":1499,"outputTokens":2760,"latencyMs":27018.220417000004},{"questionId":"q81","format":"xml","model":"gpt-5-nano","expected":"328320","actual":"328320","isCorrect":true,"inputTokens":4476,"outputTokens":3208,"latencyMs":35106.79574999993},{"questionId":"q81","format":"yaml","model":"gpt-5-nano","expected":"328320","actual":"328320","isCorrect":true,"inputTokens":3041,"outputTokens":3144,"latencyMs":28490.40104200004},{"questionId":"q82","format":"json-pretty","model":"gpt-5-nano","expected":"1791","actual":"1791","isCorrect":true,"inputTokens":3770,"outputTokens":2056,"latencyMs":22616.934583000024},{"questionId":"q82","format":"json-compact","model":"gpt-5-nano","expected":"1791","actual":"1791","isCorrect":true,"inputTokens":2443,"outputTokens":2376,"latencyMs":29998.364584000083},{"questionId":"q82","format":"toon","model":"gpt-5-nano","expected":"1791","actual":"1791","isCorrect":true,"inputTokens":1641,"outputTokens":3080,"latencyMs":32440.0187919999},{"questionId":"q82","format":"csv","model":"gpt-5-nano","expected":"1791","actual":"1791","isCorrect":true,"inputTokens":1499,"outputTokens":1864,"latencyMs":32674.871374999988},{"questionId":"q82","format":"xml","model":"gpt-5-nano","expected":"1791","actual":"1791","isCorrect":true,"inputTokens":4476,"outputTokens":1992,"latencyMs":18328.068833000027},{"questionId":"q82","format":"yaml","model":"gpt-5-nano","expected":"1791","actual":"1791","isCorrect":true,"inputTokens":3041,"outputTokens":2440,"latencyMs":26599.709708000068},{"questionId":"q83","format":"json-pretty","model":"gpt-5-nano","expected":"311695.88","actual":"311695.88","isCorrect":true,"inputTokens":3768,"outputTokens":3594,"latencyMs":31722.614457999938},{"questionId":"q83","format":"json-compact","model":"gpt-5-nano","expected":"311695.88","actual":"311695.88","isCorrect":true,"inputTokens":2441,"outputTokens":6346,"latencyMs":61432.27987500001},{"questionId":"q83","format":"toon","model":"gpt-5-nano","expected":"311695.88","actual":"310695.88","isCorrect":false,"inputTokens":1639,"outputTokens":6922,"latencyMs":67581.54674999998},{"questionId":"q83","format":"csv","model":"gpt-5-nano","expected":"311695.88","actual":"311695.88","isCorrect":true,"inputTokens":1497,"outputTokens":3658,"latencyMs":34084.16679099994},{"questionId":"q83","format":"xml","model":"gpt-5-nano","expected":"311695.88","actual":"310795.88","isCorrect":false,"inputTokens":4474,"outputTokens":7178,"latencyMs":72630.23875000002},{"questionId":"q83","format":"yaml","model":"gpt-5-nano","expected":"311695.88","actual":"311695.88","isCorrect":true,"inputTokens":3039,"outputTokens":5898,"latencyMs":57679.497999999905},{"questionId":"q84","format":"json-pretty","model":"gpt-5-nano","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":3766,"outputTokens":7369,"latencyMs":67930.0475000001},{"questionId":"q84","format":"json-compact","model":"gpt-5-nano","expected":"0.53","actual":"0.5278333333333333","isCorrect":true,"inputTokens":2439,"outputTokens":4942,"latencyMs":46617.24916699994},{"questionId":"q84","format":"toon","model":"gpt-5-nano","expected":"0.53","actual":"0.527833","isCorrect":true,"inputTokens":1637,"outputTokens":4874,"latencyMs":46005.71258299996},{"questionId":"q84","format":"csv","model":"gpt-5-nano","expected":"0.53","actual":"0.5278333333","isCorrect":true,"inputTokens":1495,"outputTokens":3532,"latencyMs":33943.250624999986},{"questionId":"q84","format":"xml","model":"gpt-5-nano","expected":"0.53","actual":"0.5298333333","isCorrect":true,"inputTokens":4472,"outputTokens":4044,"latencyMs":37510.19779100001},{"questionId":"q84","format":"yaml","model":"gpt-5-nano","expected":"0.53","actual":"0.5278333333","isCorrect":true,"inputTokens":3037,"outputTokens":4556,"latencyMs":55096.31458399992},{"questionId":"q85","format":"json-pretty","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3770,"outputTokens":1351,"latencyMs":13053.103250000044},{"questionId":"q85","format":"json-compact","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2443,"outputTokens":1031,"latencyMs":11122.031167000066},{"questionId":"q85","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1641,"outputTokens":1863,"latencyMs":19323.529084000038},{"questionId":"q85","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1499,"outputTokens":1479,"latencyMs":18111.95750000002},{"questionId":"q85","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":4476,"outputTokens":1671,"latencyMs":26929.74866599997},{"questionId":"q85","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3041,"outputTokens":1479,"latencyMs":14530.826667000074},{"questionId":"q86","format":"json-pretty","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":3769,"outputTokens":1543,"latencyMs":13622.04912500002},{"questionId":"q86","format":"json-compact","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":2442,"outputTokens":2055,"latencyMs":23578.416874999995},{"questionId":"q86","format":"toon","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":1640,"outputTokens":3143,"latencyMs":42258.583417000016},{"questionId":"q86","format":"csv","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":1498,"outputTokens":2631,"latencyMs":24667.923749999958},{"questionId":"q86","format":"xml","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":4475,"outputTokens":2247,"latencyMs":20338.54649999994},{"questionId":"q86","format":"yaml","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":3040,"outputTokens":1863,"latencyMs":19668.138083999977},{"questionId":"q87","format":"json-pretty","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":3776,"outputTokens":1735,"latencyMs":17175.41733299999},{"questionId":"q87","format":"json-compact","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":2449,"outputTokens":2119,"latencyMs":21621.97758299997},{"questionId":"q87","format":"toon","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":1647,"outputTokens":2183,"latencyMs":20844.98749999993},{"questionId":"q87","format":"csv","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":1505,"outputTokens":1671,"latencyMs":17007.475209000055},{"questionId":"q87","format":"xml","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":4482,"outputTokens":2247,"latencyMs":20809.911709000007},{"questionId":"q87","format":"yaml","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":3047,"outputTokens":2439,"latencyMs":23743.970708000008},{"questionId":"q88","format":"json-pretty","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":3776,"outputTokens":1479,"latencyMs":13780.006082999986},{"questionId":"q88","format":"json-compact","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":2449,"outputTokens":2567,"latencyMs":28457.375708000036},{"questionId":"q88","format":"toon","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":1647,"outputTokens":1927,"latencyMs":19491.54999999993},{"questionId":"q88","format":"csv","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":1505,"outputTokens":1223,"latencyMs":18888.93787500006},{"questionId":"q88","format":"xml","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":4482,"outputTokens":2055,"latencyMs":19879.96829199989},{"questionId":"q88","format":"yaml","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":3047,"outputTokens":2439,"latencyMs":37028.00633300003},{"questionId":"q89","format":"json-pretty","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3776,"outputTokens":1799,"latencyMs":20709.313749999972},{"questionId":"q89","format":"json-compact","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2449,"outputTokens":3143,"latencyMs":28282.831917000003},{"questionId":"q89","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1647,"outputTokens":2055,"latencyMs":19895.249166999944},{"questionId":"q89","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1505,"outputTokens":1927,"latencyMs":24627.73229199997},{"questionId":"q89","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":4482,"outputTokens":2631,"latencyMs":26081.29570799996},{"questionId":"q89","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3047,"outputTokens":1991,"latencyMs":19726.860791999963},{"questionId":"q90","format":"json-pretty","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3776,"outputTokens":2567,"latencyMs":24450.25691700005},{"questionId":"q90","format":"json-compact","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2449,"outputTokens":3079,"latencyMs":30192.66949999996},{"questionId":"q90","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1647,"outputTokens":2695,"latencyMs":30800.806582999998},{"questionId":"q90","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1505,"outputTokens":1735,"latencyMs":17525.293582999962},{"questionId":"q90","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":4482,"outputTokens":2503,"latencyMs":24294.877791999956},{"questionId":"q90","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3047,"outputTokens":2887,"latencyMs":27950.61812499992},{"questionId":"q91","format":"json-pretty","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":3776,"outputTokens":2695,"latencyMs":41832.30825},{"questionId":"q91","format":"json-compact","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":2449,"outputTokens":2311,"latencyMs":25780.131374999997},{"questionId":"q91","format":"toon","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":1647,"outputTokens":2247,"latencyMs":25229.332374999998},{"questionId":"q91","format":"csv","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":1505,"outputTokens":2311,"latencyMs":22338.296957999934},{"questionId":"q91","format":"xml","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":4482,"outputTokens":2887,"latencyMs":38730.74291699997},{"questionId":"q91","format":"yaml","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":3047,"outputTokens":3655,"latencyMs":35328.02654200001},{"questionId":"q92","format":"json-pretty","model":"gpt-5-nano","expected":"32","actual":"33","isCorrect":false,"inputTokens":3775,"outputTokens":2247,"latencyMs":25864.640083999955},{"questionId":"q92","format":"json-compact","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":2448,"outputTokens":2503,"latencyMs":25477.055166000035},{"questionId":"q92","format":"toon","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":1646,"outputTokens":3399,"latencyMs":31212.052334000007},{"questionId":"q92","format":"csv","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":1504,"outputTokens":1607,"latencyMs":16476.131416000077},{"questionId":"q92","format":"xml","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":4481,"outputTokens":2695,"latencyMs":40359.28500000003},{"questionId":"q92","format":"yaml","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":3046,"outputTokens":2439,"latencyMs":30665.69058299996},{"questionId":"q93","format":"json-pretty","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":3775,"outputTokens":1927,"latencyMs":21325.32583300001},{"questionId":"q93","format":"json-compact","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":2448,"outputTokens":2183,"latencyMs":22164.555750000058},{"questionId":"q93","format":"toon","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":1646,"outputTokens":2119,"latencyMs":21662.80249999999},{"questionId":"q93","format":"csv","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":1504,"outputTokens":1927,"latencyMs":20278.839750000043},{"questionId":"q93","format":"xml","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":4481,"outputTokens":1479,"latencyMs":16767.767083000042},{"questionId":"q93","format":"yaml","model":"gpt-5-nano","expected":"9","actual":"2025-01-17,2025-01-20,2025-01-27,2025-01-28,2025-01-30,2025-02-06,2025-02-10,2025-02-11,2025-02-12","isCorrect":false,"inputTokens":3046,"outputTokens":2180,"latencyMs":27798.925208},{"questionId":"q94","format":"json-pretty","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3778,"outputTokens":2439,"latencyMs":34332.84691700002},{"questionId":"q94","format":"json-compact","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2451,"outputTokens":2183,"latencyMs":28876.632458999986},{"questionId":"q94","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1649,"outputTokens":3399,"latencyMs":29958.79008299997},{"questionId":"q94","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1507,"outputTokens":3463,"latencyMs":36254.60154099995},{"questionId":"q94","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":4484,"outputTokens":3399,"latencyMs":33635.2489169999},{"questionId":"q94","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3049,"outputTokens":2951,"latencyMs":26639.627958000056},{"questionId":"q95","format":"json-pretty","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3778,"outputTokens":2375,"latencyMs":24043.562916999916},{"questionId":"q95","format":"json-compact","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2451,"outputTokens":3399,"latencyMs":36727.34020800004},{"questionId":"q95","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1649,"outputTokens":2695,"latencyMs":28240.418083999888},{"questionId":"q95","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1507,"outputTokens":3143,"latencyMs":32131.14024999994},{"questionId":"q95","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":4484,"outputTokens":3079,"latencyMs":30882.657292000018},{"questionId":"q95","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3049,"outputTokens":3143,"latencyMs":34199.13054200006},{"questionId":"q96","format":"json-pretty","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":15252,"outputTokens":136,"latencyMs":3174.137834000052},{"questionId":"q96","format":"json-compact","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":11559,"outputTokens":264,"latencyMs":4427.513500000001},{"questionId":"q96","format":"toon","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":8873,"outputTokens":328,"latencyMs":4197.75774999999},{"questionId":"q96","format":"csv","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":8621,"outputTokens":264,"latencyMs":4365.791124999989},{"questionId":"q96","format":"xml","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":17198,"outputTokens":328,"latencyMs":8591.86379199999},{"questionId":"q96","format":"yaml","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":13234,"outputTokens":136,"latencyMs":3006.3902920000255},{"questionId":"q97","format":"json-pretty","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":15254,"outputTokens":328,"latencyMs":5031.756458000047},{"questionId":"q97","format":"json-compact","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":11561,"outputTokens":328,"latencyMs":4921.948124999995},{"questionId":"q97","format":"toon","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":8875,"outputTokens":328,"latencyMs":13561.781500000041},{"questionId":"q97","format":"csv","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":8623,"outputTokens":328,"latencyMs":11962.30929200002},{"questionId":"q97","format":"xml","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":17200,"outputTokens":328,"latencyMs":8242.271916999947},{"questionId":"q97","format":"yaml","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":13236,"outputTokens":264,"latencyMs":7252.942959000007},{"questionId":"q98","format":"json-pretty","model":"gpt-5-nano","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":15249,"outputTokens":264,"latencyMs":4860.508666999987},{"questionId":"q98","format":"json-compact","model":"gpt-5-nano","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":11556,"outputTokens":392,"latencyMs":5948.768499999889},{"questionId":"q98","format":"toon","model":"gpt-5-nano","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":8870,"outputTokens":264,"latencyMs":3931.105542000034},{"questionId":"q98","format":"csv","model":"gpt-5-nano","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":8618,"outputTokens":648,"latencyMs":11943.225541999913},{"questionId":"q98","format":"xml","model":"gpt-5-nano","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":17195,"outputTokens":328,"latencyMs":5136.993999999948},{"questionId":"q98","format":"yaml","model":"gpt-5-nano","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":13231,"outputTokens":264,"latencyMs":3974.5073329999577},{"questionId":"q99","format":"json-pretty","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":15255,"outputTokens":583,"latencyMs":10160.71020900004},{"questionId":"q99","format":"json-compact","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":11562,"outputTokens":519,"latencyMs":10971.125875000027},{"questionId":"q99","format":"toon","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":8876,"outputTokens":327,"latencyMs":4399.097415999975},{"questionId":"q99","format":"csv","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":8624,"outputTokens":327,"latencyMs":5930.175540999975},{"questionId":"q99","format":"xml","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":17201,"outputTokens":391,"latencyMs":7765.284375000047},{"questionId":"q99","format":"yaml","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":13237,"outputTokens":391,"latencyMs":4973.2529159999685},{"questionId":"q100","format":"json-pretty","model":"gpt-5-nano","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":15249,"outputTokens":136,"latencyMs":3776.490165999974},{"questionId":"q100","format":"json-compact","model":"gpt-5-nano","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":11556,"outputTokens":328,"latencyMs":7673.985999999917},{"questionId":"q100","format":"toon","model":"gpt-5-nano","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":8870,"outputTokens":392,"latencyMs":8448.222042000038},{"questionId":"q100","format":"csv","model":"gpt-5-nano","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":8618,"outputTokens":328,"latencyMs":5834.679583000019},{"questionId":"q100","format":"xml","model":"gpt-5-nano","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":17195,"outputTokens":328,"latencyMs":4700.3877090000315},{"questionId":"q100","format":"yaml","model":"gpt-5-nano","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":13231,"outputTokens":328,"latencyMs":4259.143250000081},{"questionId":"q101","format":"json-pretty","model":"gpt-5-nano","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":15254,"outputTokens":264,"latencyMs":4328.258417000063},{"questionId":"q101","format":"json-compact","model":"gpt-5-nano","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":11561,"outputTokens":264,"latencyMs":3439.5950410000514},{"questionId":"q101","format":"toon","model":"gpt-5-nano","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":8875,"outputTokens":584,"latencyMs":6392.925374999992},{"questionId":"q101","format":"csv","model":"gpt-5-nano","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":8623,"outputTokens":264,"latencyMs":4369.833875000011},{"questionId":"q101","format":"xml","model":"gpt-5-nano","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":17200,"outputTokens":264,"latencyMs":4026.3382080000592},{"questionId":"q101","format":"yaml","model":"gpt-5-nano","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":13236,"outputTokens":328,"latencyMs":3988.631625000038},{"questionId":"q102","format":"json-pretty","model":"gpt-5-nano","expected":"678","actual":"678","isCorrect":true,"inputTokens":15253,"outputTokens":263,"latencyMs":6256.574583000038},{"questionId":"q102","format":"json-compact","model":"gpt-5-nano","expected":"678","actual":"678","isCorrect":true,"inputTokens":11560,"outputTokens":263,"latencyMs":4265.1440410000505},{"questionId":"q102","format":"toon","model":"gpt-5-nano","expected":"678","actual":"678","isCorrect":true,"inputTokens":8874,"outputTokens":455,"latencyMs":10965.912249999936},{"questionId":"q102","format":"csv","model":"gpt-5-nano","expected":"678","actual":"678","isCorrect":true,"inputTokens":8622,"outputTokens":391,"latencyMs":7398.446083000046},{"questionId":"q102","format":"xml","model":"gpt-5-nano","expected":"678","actual":"678","isCorrect":true,"inputTokens":17199,"outputTokens":391,"latencyMs":5909.86704199994},{"questionId":"q102","format":"yaml","model":"gpt-5-nano","expected":"678","actual":"678","isCorrect":true,"inputTokens":13235,"outputTokens":199,"latencyMs":3187.290791999898},{"questionId":"q103","format":"json-pretty","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":15252,"outputTokens":519,"latencyMs":6490.3748749999795},{"questionId":"q103","format":"json-compact","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":11559,"outputTokens":583,"latencyMs":7666.581874999916},{"questionId":"q103","format":"toon","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":8873,"outputTokens":327,"latencyMs":6757.961624999996},{"questionId":"q103","format":"csv","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":8621,"outputTokens":263,"latencyMs":4993.553249999997},{"questionId":"q103","format":"xml","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":17198,"outputTokens":327,"latencyMs":8451.123458999908},{"questionId":"q103","format":"yaml","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":13234,"outputTokens":199,"latencyMs":3843.894541000016},{"questionId":"q104","format":"json-pretty","model":"gpt-5-nano","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":15256,"outputTokens":392,"latencyMs":11129.69512499997},{"questionId":"q104","format":"json-compact","model":"gpt-5-nano","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":11563,"outputTokens":392,"latencyMs":8905.517333000083},{"questionId":"q104","format":"toon","model":"gpt-5-nano","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":8877,"outputTokens":392,"latencyMs":6319.255083000055},{"questionId":"q104","format":"csv","model":"gpt-5-nano","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":8625,"outputTokens":328,"latencyMs":7528.265875000041},{"questionId":"q104","format":"xml","model":"gpt-5-nano","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":17202,"outputTokens":392,"latencyMs":13579.722625000053},{"questionId":"q104","format":"yaml","model":"gpt-5-nano","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":13238,"outputTokens":328,"latencyMs":10575.226957999985},{"questionId":"q105","format":"json-pretty","model":"gpt-5-nano","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":15252,"outputTokens":200,"latencyMs":5185.377082999912},{"questionId":"q105","format":"json-compact","model":"gpt-5-nano","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":11559,"outputTokens":264,"latencyMs":4393.0949580000015},{"questionId":"q105","format":"toon","model":"gpt-5-nano","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":8873,"outputTokens":456,"latencyMs":6270.737916999962},{"questionId":"q105","format":"csv","model":"gpt-5-nano","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":8621,"outputTokens":328,"latencyMs":4558.527000000002},{"questionId":"q105","format":"xml","model":"gpt-5-nano","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":17198,"outputTokens":328,"latencyMs":5035.306250000023},{"questionId":"q105","format":"yaml","model":"gpt-5-nano","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":13234,"outputTokens":328,"latencyMs":6407.646999999997},{"questionId":"q106","format":"json-pretty","model":"gpt-5-nano","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":15257,"outputTokens":136,"latencyMs":3376.4645419999724},{"questionId":"q106","format":"json-compact","model":"gpt-5-nano","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":11564,"outputTokens":392,"latencyMs":4399.214457999915},{"questionId":"q106","format":"toon","model":"gpt-5-nano","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":8878,"outputTokens":456,"latencyMs":5250.13595799997},{"questionId":"q106","format":"csv","model":"gpt-5-nano","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":8626,"outputTokens":584,"latencyMs":10384.269833000028},{"questionId":"q106","format":"xml","model":"gpt-5-nano","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":17203,"outputTokens":328,"latencyMs":4593.475250000018},{"questionId":"q106","format":"yaml","model":"gpt-5-nano","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":13239,"outputTokens":328,"latencyMs":9014.461250000051},{"questionId":"q107","format":"json-pretty","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":15248,"outputTokens":2375,"latencyMs":24542.61075000011},{"questionId":"q107","format":"json-compact","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":11555,"outputTokens":711,"latencyMs":7960.394625000074},{"questionId":"q107","format":"toon","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":8869,"outputTokens":199,"latencyMs":2844.534417000017},{"questionId":"q107","format":"csv","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":8617,"outputTokens":4167,"latencyMs":85494.36879199988},{"questionId":"q107","format":"xml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":17194,"outputTokens":2503,"latencyMs":24175.646917000064},{"questionId":"q107","format":"yaml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":13230,"outputTokens":3783,"latencyMs":37382.685666000005},{"questionId":"q108","format":"json-pretty","model":"gpt-5-nano","expected":"15413563","actual":"12284443","isCorrect":false,"inputTokens":15251,"outputTokens":4745,"latencyMs":52110.05108299991},{"questionId":"q108","format":"json-compact","model":"gpt-5-nano","expected":"15413563","actual":"15527138","isCorrect":false,"inputTokens":11558,"outputTokens":9417,"latencyMs":101241.22870800004},{"questionId":"q108","format":"toon","model":"gpt-5-nano","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":8872,"outputTokens":5257,"latencyMs":54524.812208000105},{"questionId":"q108","format":"csv","model":"gpt-5-nano","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":8620,"outputTokens":5321,"latencyMs":62112.67933299986},{"questionId":"q108","format":"xml","model":"gpt-5-nano","expected":"15413563","actual":"18856320","isCorrect":false,"inputTokens":17197,"outputTokens":11785,"latencyMs":138915.09487500007},{"questionId":"q108","format":"yaml","model":"gpt-5-nano","expected":"15413563","actual":"18617253","isCorrect":false,"inputTokens":13233,"outputTokens":9097,"latencyMs":90010.27312499995},{"questionId":"q109","format":"json-pretty","model":"gpt-5-nano","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":15251,"outputTokens":7561,"latencyMs":100665.4815},{"questionId":"q109","format":"json-compact","model":"gpt-5-nano","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":11558,"outputTokens":5385,"latencyMs":63163.41504199989},{"questionId":"q109","format":"toon","model":"gpt-5-nano","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":8872,"outputTokens":13577,"latencyMs":136019.44545800006},{"questionId":"q109","format":"csv","model":"gpt-5-nano","expected":"2528243","actual":"2831139","isCorrect":false,"inputTokens":8620,"outputTokens":8137,"latencyMs":83168.071},{"questionId":"q109","format":"xml","model":"gpt-5-nano","expected":"2528243","actual":"3500000","isCorrect":false,"inputTokens":17197,"outputTokens":12745,"latencyMs":298602.1220839999},{"questionId":"q109","format":"yaml","model":"gpt-5-nano","expected":"2528243","actual":"11131566","isCorrect":false,"inputTokens":13233,"outputTokens":21577,"latencyMs":197418.5528330002},{"questionId":"q110","format":"json-pretty","model":"gpt-5-nano","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":15250,"outputTokens":10762,"latencyMs":100897.18295899988},{"questionId":"q110","format":"json-compact","model":"gpt-5-nano","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":11557,"outputTokens":6602,"latencyMs":69045.86083299993},{"questionId":"q110","format":"toon","model":"gpt-5-nano","expected":"154136","actual":"154125.63","isCorrect":false,"inputTokens":8871,"outputTokens":5642,"latencyMs":53949.60254199989},{"questionId":"q110","format":"csv","model":"gpt-5-nano","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":8619,"outputTokens":4874,"latencyMs":52207.93591700005},{"questionId":"q110","format":"xml","model":"gpt-5-nano","expected":"154136","actual":"157742.648351648","isCorrect":false,"inputTokens":17196,"outputTokens":4940,"latencyMs":71313.78808299988},{"questionId":"q110","format":"yaml","model":"gpt-5-nano","expected":"154136","actual":"154136","isCorrect":true,"inputTokens":13232,"outputTokens":5704,"latencyMs":55499.88716699998},{"questionId":"q111","format":"json-pretty","model":"gpt-5-nano","expected":"41","actual":"27","isCorrect":false,"inputTokens":15252,"outputTokens":2183,"latencyMs":21596.972249999875},{"questionId":"q111","format":"json-compact","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":11559,"outputTokens":2631,"latencyMs":30117.775832999963},{"questionId":"q111","format":"toon","model":"gpt-5-nano","expected":"41","actual":"40","isCorrect":false,"inputTokens":8873,"outputTokens":3015,"latencyMs":37560.24325000006},{"questionId":"q111","format":"csv","model":"gpt-5-nano","expected":"41","actual":"40","isCorrect":false,"inputTokens":8621,"outputTokens":3399,"latencyMs":39291.82483300008},{"questionId":"q111","format":"xml","model":"gpt-5-nano","expected":"41","actual":"27","isCorrect":false,"inputTokens":17198,"outputTokens":9607,"latencyMs":96459.7463750001},{"questionId":"q111","format":"yaml","model":"gpt-5-nano","expected":"41","actual":"39","isCorrect":false,"inputTokens":13234,"outputTokens":4743,"latencyMs":42681.62850000011},{"questionId":"q112","format":"json-pretty","model":"gpt-5-nano","expected":"53","actual":"61","isCorrect":false,"inputTokens":15252,"outputTokens":5831,"latencyMs":55109.62650000001},{"questionId":"q112","format":"json-compact","model":"gpt-5-nano","expected":"53","actual":"54","isCorrect":false,"inputTokens":11559,"outputTokens":7175,"latencyMs":94090.2629170001},{"questionId":"q112","format":"toon","model":"gpt-5-nano","expected":"53","actual":"60","isCorrect":false,"inputTokens":8873,"outputTokens":2439,"latencyMs":22994.73654199997},{"questionId":"q112","format":"csv","model":"gpt-5-nano","expected":"53","actual":"59","isCorrect":false,"inputTokens":8621,"outputTokens":7367,"latencyMs":62292.922792},{"questionId":"q112","format":"xml","model":"gpt-5-nano","expected":"53","actual":"60","isCorrect":false,"inputTokens":17198,"outputTokens":7687,"latencyMs":68955.88562500011},{"questionId":"q112","format":"yaml","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":13234,"outputTokens":2759,"latencyMs":25175.004457999952},{"questionId":"q113","format":"json-pretty","model":"gpt-5-nano","expected":"77","actual":"77","isCorrect":true,"inputTokens":15251,"outputTokens":2567,"latencyMs":31751.425124999834},{"questionId":"q113","format":"json-compact","model":"gpt-5-nano","expected":"77","actual":"64","isCorrect":false,"inputTokens":11558,"outputTokens":2695,"latencyMs":24752.101749999914},{"questionId":"q113","format":"toon","model":"gpt-5-nano","expected":"77","actual":"77","isCorrect":true,"inputTokens":8872,"outputTokens":4039,"latencyMs":58847.84733400005},{"questionId":"q113","format":"csv","model":"gpt-5-nano","expected":"77","actual":"77","isCorrect":true,"inputTokens":8620,"outputTokens":3015,"latencyMs":28625.518792000134},{"questionId":"q113","format":"xml","model":"gpt-5-nano","expected":"77","actual":"73","isCorrect":false,"inputTokens":17197,"outputTokens":4999,"latencyMs":43475.059833999956},{"questionId":"q113","format":"yaml","model":"gpt-5-nano","expected":"77","actual":"74","isCorrect":false,"inputTokens":13233,"outputTokens":15879,"latencyMs":133066.7952080001},{"questionId":"q114","format":"json-pretty","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":15251,"outputTokens":1607,"latencyMs":17589.96158400015},{"questionId":"q114","format":"json-compact","model":"gpt-5-nano","expected":"37","actual":"36","isCorrect":false,"inputTokens":11558,"outputTokens":10759,"latencyMs":91386.12729199999},{"questionId":"q114","format":"toon","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":8872,"outputTokens":2951,"latencyMs":28628.738499999978},{"questionId":"q114","format":"csv","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":8620,"outputTokens":2055,"latencyMs":16901.366458999924},{"questionId":"q114","format":"xml","model":"gpt-5-nano","expected":"37","actual":"27","isCorrect":false,"inputTokens":17197,"outputTokens":1863,"latencyMs":16063.757082999917},{"questionId":"q114","format":"yaml","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":13233,"outputTokens":3719,"latencyMs":40554.34516599984},{"questionId":"q115","format":"json-pretty","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":15251,"outputTokens":967,"latencyMs":15059.88116599992},{"questionId":"q115","format":"json-compact","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":11558,"outputTokens":1095,"latencyMs":10164.44991599978},{"questionId":"q115","format":"toon","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":8872,"outputTokens":3719,"latencyMs":44044.071624999866},{"questionId":"q115","format":"csv","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":8620,"outputTokens":2183,"latencyMs":19417.32862499985},{"questionId":"q115","format":"xml","model":"gpt-5-nano","expected":"16","actual":"15","isCorrect":false,"inputTokens":17197,"outputTokens":1607,"latencyMs":15047.773334000027},{"questionId":"q115","format":"yaml","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":13233,"outputTokens":1543,"latencyMs":36046.026458000066},{"questionId":"q116","format":"json-pretty","model":"gpt-5-nano","expected":"49","actual":"49","isCorrect":true,"inputTokens":15251,"outputTokens":3271,"latencyMs":76326.07350000017},{"questionId":"q116","format":"json-compact","model":"gpt-5-nano","expected":"49","actual":"49","isCorrect":true,"inputTokens":11558,"outputTokens":3655,"latencyMs":32130.930374999996},{"questionId":"q116","format":"toon","model":"gpt-5-nano","expected":"49","actual":"49","isCorrect":true,"inputTokens":8872,"outputTokens":12615,"latencyMs":155529.18491700012},{"questionId":"q116","format":"csv","model":"gpt-5-nano","expected":"49","actual":"49","isCorrect":true,"inputTokens":8620,"outputTokens":6407,"latencyMs":55902.06070800009},{"questionId":"q116","format":"xml","model":"gpt-5-nano","expected":"49","actual":"106","isCorrect":false,"inputTokens":17197,"outputTokens":7495,"latencyMs":64000.08562499983},{"questionId":"q116","format":"yaml","model":"gpt-5-nano","expected":"49","actual":"49","isCorrect":true,"inputTokens":13233,"outputTokens":3591,"latencyMs":31902.165125000058},{"questionId":"q117","format":"json-pretty","model":"gpt-5-nano","expected":"23","actual":"29","isCorrect":false,"inputTokens":15251,"outputTokens":7751,"latencyMs":65168.02249999996},{"questionId":"q117","format":"json-compact","model":"gpt-5-nano","expected":"23","actual":"21","isCorrect":false,"inputTokens":11558,"outputTokens":3207,"latencyMs":28594.051167000085},{"questionId":"q117","format":"toon","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":8872,"outputTokens":10503,"latencyMs":98151.87975000008},{"questionId":"q117","format":"csv","model":"gpt-5-nano","expected":"23","actual":"21","isCorrect":false,"inputTokens":8620,"outputTokens":2183,"latencyMs":20484.786165999947},{"questionId":"q117","format":"xml","model":"gpt-5-nano","expected":"23","actual":"21","isCorrect":false,"inputTokens":17197,"outputTokens":4615,"latencyMs":43518.77370800008},{"questionId":"q117","format":"yaml","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":13233,"outputTokens":3975,"latencyMs":35627.89633300016},{"questionId":"q118","format":"json-pretty","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":15251,"outputTokens":3015,"latencyMs":26725.319833999965},{"questionId":"q118","format":"json-compact","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":11558,"outputTokens":1799,"latencyMs":18761.737124999985},{"questionId":"q118","format":"toon","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":8872,"outputTokens":2759,"latencyMs":24030.234415999847},{"questionId":"q118","format":"csv","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":8620,"outputTokens":2311,"latencyMs":20654.35191700002},{"questionId":"q118","format":"xml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":17197,"outputTokens":1799,"latencyMs":17041.010666999966},{"questionId":"q118","format":"yaml","model":"gpt-5-nano","expected":"4","actual":"5","isCorrect":false,"inputTokens":13233,"outputTokens":3463,"latencyMs":33392.745709000155},{"questionId":"q119","format":"json-pretty","model":"gpt-5-nano","expected":"57","actual":"56","isCorrect":false,"inputTokens":15258,"outputTokens":4103,"latencyMs":38199.90779100009},{"questionId":"q119","format":"json-compact","model":"gpt-5-nano","expected":"57","actual":"73","isCorrect":false,"inputTokens":11565,"outputTokens":12743,"latencyMs":131865.93516699993},{"questionId":"q119","format":"toon","model":"gpt-5-nano","expected":"57","actual":"57","isCorrect":true,"inputTokens":8879,"outputTokens":4167,"latencyMs":39757.292166},{"questionId":"q119","format":"csv","model":"gpt-5-nano","expected":"57","actual":"57","isCorrect":true,"inputTokens":8627,"outputTokens":7239,"latencyMs":81548.7545830002},{"questionId":"q119","format":"xml","model":"gpt-5-nano","expected":"57","actual":"60","isCorrect":false,"inputTokens":17204,"outputTokens":15303,"latencyMs":134679.453584},{"questionId":"q119","format":"yaml","model":"gpt-5-nano","expected":"57","actual":"57","isCorrect":true,"inputTokens":13240,"outputTokens":4167,"latencyMs":38586.18929200014},{"questionId":"q120","format":"json-pretty","model":"gpt-5-nano","expected":"43","actual":"57","isCorrect":false,"inputTokens":15258,"outputTokens":6407,"latencyMs":74895.58804200008},{"questionId":"q120","format":"json-compact","model":"gpt-5-nano","expected":"43","actual":"32","isCorrect":false,"inputTokens":11565,"outputTokens":3015,"latencyMs":35560.51104200003},{"questionId":"q120","format":"toon","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":8879,"outputTokens":5191,"latencyMs":56691.20683400007},{"questionId":"q120","format":"csv","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":8627,"outputTokens":4231,"latencyMs":38942.962958000135},{"questionId":"q120","format":"xml","model":"gpt-5-nano","expected":"43","actual":"39","isCorrect":false,"inputTokens":17204,"outputTokens":11847,"latencyMs":143298.53979200008},{"questionId":"q120","format":"yaml","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":13240,"outputTokens":5767,"latencyMs":58260.77429199987},{"questionId":"q121","format":"json-pretty","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":15258,"outputTokens":2951,"latencyMs":30454.88316700002},{"questionId":"q121","format":"json-compact","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":11565,"outputTokens":4423,"latencyMs":44076.22399999993},{"questionId":"q121","format":"toon","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":8879,"outputTokens":4231,"latencyMs":37241.785167000024},{"questionId":"q121","format":"csv","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":8627,"outputTokens":5895,"latencyMs":90620.1480419999},{"questionId":"q121","format":"xml","model":"gpt-5-nano","expected":"25","actual":"18","isCorrect":false,"inputTokens":17204,"outputTokens":3975,"latencyMs":40301.10383399995},{"questionId":"q121","format":"yaml","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":13240,"outputTokens":3911,"latencyMs":53672.49758299999},{"questionId":"q122","format":"json-pretty","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":15258,"outputTokens":3463,"latencyMs":35417.804375000065},{"questionId":"q122","format":"json-compact","model":"gpt-5-nano","expected":"6","actual":"5","isCorrect":false,"inputTokens":11565,"outputTokens":4487,"latencyMs":46868.0803749999},{"questionId":"q122","format":"toon","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":8879,"outputTokens":4551,"latencyMs":45573.12279199995},{"questionId":"q122","format":"csv","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":8627,"outputTokens":2887,"latencyMs":28422.857124999864},{"questionId":"q122","format":"xml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":17204,"outputTokens":3143,"latencyMs":29955.825083999895},{"questionId":"q122","format":"yaml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":13240,"outputTokens":5575,"latencyMs":48339.364959000144},{"questionId":"q123","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":15258,"outputTokens":3143,"latencyMs":25295.857915999833},{"questionId":"q123","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":11565,"outputTokens":3079,"latencyMs":29557.22145800013},{"questionId":"q123","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":8879,"outputTokens":2503,"latencyMs":22121.652333999984},{"questionId":"q123","format":"csv","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":8627,"outputTokens":2695,"latencyMs":31378.58437499986},{"questionId":"q123","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":17204,"outputTokens":1799,"latencyMs":20389.638000000035},{"questionId":"q123","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":13240,"outputTokens":3079,"latencyMs":28901.529666000046},{"questionId":"q124","format":"json-pretty","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":6905,"outputTokens":263,"latencyMs":3115.7104579999577},{"questionId":"q124","format":"json-compact","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":4927,"outputTokens":327,"latencyMs":7230.5603749998845},{"questionId":"q124","format":"toon","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":5906,"outputTokens":263,"latencyMs":6441.958833999932},{"questionId":"q124","format":"xml","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":7815,"outputTokens":199,"latencyMs":3481.11870799982},{"questionId":"q124","format":"yaml","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":5933,"outputTokens":135,"latencyMs":2440.1648330001626},{"questionId":"q125","format":"json-pretty","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":6905,"outputTokens":841,"latencyMs":9103.615125000011},{"questionId":"q125","format":"json-compact","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":4927,"outputTokens":1097,"latencyMs":10853.658833999885},{"questionId":"q125","format":"toon","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":5906,"outputTokens":393,"latencyMs":4811.6409579999745},{"questionId":"q125","format":"xml","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":7815,"outputTokens":265,"latencyMs":3462.576958999969},{"questionId":"q125","format":"yaml","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":5933,"outputTokens":201,"latencyMs":4551.6412080000155},{"questionId":"q126","format":"json-pretty","model":"gpt-5-nano","expected":"424","actual":"424","isCorrect":true,"inputTokens":6906,"outputTokens":199,"latencyMs":5946.616957999999},{"questionId":"q126","format":"json-compact","model":"gpt-5-nano","expected":"424","actual":"424","isCorrect":true,"inputTokens":4928,"outputTokens":391,"latencyMs":4862.764540999895},{"questionId":"q126","format":"toon","model":"gpt-5-nano","expected":"424","actual":"424","isCorrect":true,"inputTokens":5907,"outputTokens":199,"latencyMs":4436.1078329999},{"questionId":"q126","format":"xml","model":"gpt-5-nano","expected":"424","actual":"424","isCorrect":true,"inputTokens":7816,"outputTokens":263,"latencyMs":3710.672332999995},{"questionId":"q126","format":"yaml","model":"gpt-5-nano","expected":"424","actual":"424","isCorrect":true,"inputTokens":5934,"outputTokens":263,"latencyMs":3584.6445420000236},{"questionId":"q127","format":"json-pretty","model":"gpt-5-nano","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":6906,"outputTokens":264,"latencyMs":3332.7081249998882},{"questionId":"q127","format":"json-compact","model":"gpt-5-nano","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":4928,"outputTokens":328,"latencyMs":6029.872375000035},{"questionId":"q127","format":"toon","model":"gpt-5-nano","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":5907,"outputTokens":264,"latencyMs":6360.82320899982},{"questionId":"q127","format":"xml","model":"gpt-5-nano","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":7816,"outputTokens":328,"latencyMs":4304.171290999977},{"questionId":"q127","format":"yaml","model":"gpt-5-nano","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":5934,"outputTokens":328,"latencyMs":6850.6273750001565},{"questionId":"q128","format":"json-pretty","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":6905,"outputTokens":1095,"latencyMs":25444.60245799995},{"questionId":"q128","format":"json-compact","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":4927,"outputTokens":839,"latencyMs":11782.655000000028},{"questionId":"q128","format":"toon","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":5906,"outputTokens":263,"latencyMs":5465.98116700002},{"questionId":"q128","format":"xml","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":7815,"outputTokens":263,"latencyMs":3371.2434590000194},{"questionId":"q128","format":"yaml","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":5933,"outputTokens":263,"latencyMs":3541.5000830001663},{"questionId":"q129","format":"json-pretty","model":"gpt-5-nano","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":6905,"outputTokens":905,"latencyMs":9463.865125000011},{"questionId":"q129","format":"json-compact","model":"gpt-5-nano","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":4927,"outputTokens":649,"latencyMs":6622.134208000032},{"questionId":"q129","format":"toon","model":"gpt-5-nano","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":5906,"outputTokens":329,"latencyMs":4798.912999999942},{"questionId":"q129","format":"xml","model":"gpt-5-nano","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":7815,"outputTokens":265,"latencyMs":5167.431041999953},{"questionId":"q129","format":"yaml","model":"gpt-5-nano","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":5933,"outputTokens":393,"latencyMs":4693.827333000023},{"questionId":"q130","format":"json-pretty","model":"gpt-5-nano","expected":"435","actual":"435","isCorrect":true,"inputTokens":6906,"outputTokens":199,"latencyMs":2821.266375000123},{"questionId":"q130","format":"json-compact","model":"gpt-5-nano","expected":"435","actual":"435","isCorrect":true,"inputTokens":4928,"outputTokens":839,"latencyMs":8631.246000000043},{"questionId":"q130","format":"toon","model":"gpt-5-nano","expected":"435","actual":"435","isCorrect":true,"inputTokens":5907,"outputTokens":327,"latencyMs":4855.562292000046},{"questionId":"q130","format":"xml","model":"gpt-5-nano","expected":"435","actual":"435","isCorrect":true,"inputTokens":7816,"outputTokens":519,"latencyMs":7240.806624999968},{"questionId":"q130","format":"yaml","model":"gpt-5-nano","expected":"435","actual":"435","isCorrect":true,"inputTokens":5934,"outputTokens":1031,"latencyMs":10435.050374999875},{"questionId":"q131","format":"json-pretty","model":"gpt-5-nano","expected":"408","actual":"408","isCorrect":true,"inputTokens":6906,"outputTokens":199,"latencyMs":3034.7289579999633},{"questionId":"q131","format":"json-compact","model":"gpt-5-nano","expected":"408","actual":"408","isCorrect":true,"inputTokens":4928,"outputTokens":327,"latencyMs":3241.3320420000236},{"questionId":"q131","format":"toon","model":"gpt-5-nano","expected":"408","actual":"408","isCorrect":true,"inputTokens":5907,"outputTokens":391,"latencyMs":5222.304125000024},{"questionId":"q131","format":"xml","model":"gpt-5-nano","expected":"408","actual":"408","isCorrect":true,"inputTokens":7816,"outputTokens":263,"latencyMs":3285.6503329998814},{"questionId":"q131","format":"yaml","model":"gpt-5-nano","expected":"408","actual":"408","isCorrect":true,"inputTokens":5934,"outputTokens":135,"latencyMs":3403.779457999859},{"questionId":"q132","format":"json-pretty","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":6905,"outputTokens":967,"latencyMs":12630.437167000026},{"questionId":"q132","format":"json-compact","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":4927,"outputTokens":1095,"latencyMs":12474.425874999957},{"questionId":"q132","format":"toon","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":5906,"outputTokens":1863,"latencyMs":15583.305916999932},{"questionId":"q132","format":"xml","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":7815,"outputTokens":455,"latencyMs":6376.325249999994},{"questionId":"q132","format":"yaml","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":5933,"outputTokens":327,"latencyMs":5189.892333999975},{"questionId":"q133","format":"json-pretty","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":6905,"outputTokens":457,"latencyMs":4789.633792000124},{"questionId":"q133","format":"json-compact","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":4927,"outputTokens":969,"latencyMs":9603.09604199999},{"questionId":"q133","format":"toon","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":5906,"outputTokens":265,"latencyMs":3472.784415999893},{"questionId":"q133","format":"xml","model":"gpt-5-nano","expected":"/api/users","actual":"/api/payments","isCorrect":false,"inputTokens":7815,"outputTokens":1098,"latencyMs":14847.909750000108},{"questionId":"q133","format":"yaml","model":"gpt-5-nano","expected":"/api/users","actual":"/api/payments","isCorrect":false,"inputTokens":5933,"outputTokens":1354,"latencyMs":14238.748833000194},{"questionId":"q134","format":"json-pretty","model":"gpt-5-nano","expected":"75","actual":"75","isCorrect":true,"inputTokens":6889,"outputTokens":3719,"latencyMs":29588.24908400001},{"questionId":"q134","format":"json-compact","model":"gpt-5-nano","expected":"75","actual":"75","isCorrect":true,"inputTokens":4911,"outputTokens":5319,"latencyMs":40931.71183300018},{"questionId":"q134","format":"toon","model":"gpt-5-nano","expected":"75","actual":"75","isCorrect":true,"inputTokens":5890,"outputTokens":391,"latencyMs":5362.043415999971},{"questionId":"q134","format":"xml","model":"gpt-5-nano","expected":"75","actual":"100","isCorrect":false,"inputTokens":7799,"outputTokens":2247,"latencyMs":34099.03204199998},{"questionId":"q134","format":"yaml","model":"gpt-5-nano","expected":"75","actual":"100","isCorrect":false,"inputTokens":5917,"outputTokens":20167,"latencyMs":191462.27824999997},{"questionId":"q135","format":"json-pretty","model":"gpt-5-nano","expected":"2453.41","actual":"2413.3866666667","isCorrect":false,"inputTokens":6890,"outputTokens":10189,"latencyMs":114932.37154199998},{"questionId":"q135","format":"json-compact","model":"gpt-5-nano","expected":"2453.41","actual":"2344","isCorrect":false,"inputTokens":4912,"outputTokens":12488,"latencyMs":178401.16920799995},{"questionId":"q135","format":"toon","model":"gpt-5-nano","expected":"2453.41","actual":"2399.5942028985507","isCorrect":false,"inputTokens":5891,"outputTokens":12494,"latencyMs":106734.32024999987},{"questionId":"q135","format":"xml","model":"gpt-5-nano","expected":"2453.41","actual":"2453.4133333333","isCorrect":true,"inputTokens":7800,"outputTokens":9613,"latencyMs":79749.86854199995},{"questionId":"q135","format":"yaml","model":"gpt-5-nano","expected":"2453.41","actual":"2513","isCorrect":false,"inputTokens":5918,"outputTokens":9352,"latencyMs":79398.59566699993},{"questionId":"q136","format":"json-pretty","model":"gpt-5-nano","expected":"29","actual":"29","isCorrect":true,"inputTokens":6889,"outputTokens":4615,"latencyMs":42187.74462500005},{"questionId":"q136","format":"json-compact","model":"gpt-5-nano","expected":"29","actual":"29","isCorrect":true,"inputTokens":4911,"outputTokens":3527,"latencyMs":32269.509624999948},{"questionId":"q136","format":"toon","model":"gpt-5-nano","expected":"29","actual":"23","isCorrect":false,"inputTokens":5890,"outputTokens":9671,"latencyMs":76910.37262500008},{"questionId":"q136","format":"xml","model":"gpt-5-nano","expected":"29","actual":"28","isCorrect":false,"inputTokens":7799,"outputTokens":10887,"latencyMs":88385.73287499999},{"questionId":"q136","format":"yaml","model":"gpt-5-nano","expected":"29","actual":"33","isCorrect":false,"inputTokens":5917,"outputTokens":13319,"latencyMs":135069.12662500003},{"questionId":"q137","format":"json-pretty","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":6889,"outputTokens":2503,"latencyMs":24155.693625000073},{"questionId":"q137","format":"json-compact","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":4911,"outputTokens":8519,"latencyMs":80310.74116600002},{"questionId":"q137","format":"toon","model":"gpt-5-nano","expected":"17","actual":"13","isCorrect":false,"inputTokens":5890,"outputTokens":6855,"latencyMs":60660.83295800001},{"questionId":"q137","format":"xml","model":"gpt-5-nano","expected":"17","actual":"13","isCorrect":false,"inputTokens":7799,"outputTokens":3847,"latencyMs":60666.268124999944},{"questionId":"q137","format":"yaml","model":"gpt-5-nano","expected":"17","actual":"14","isCorrect":false,"inputTokens":5917,"outputTokens":7303,"latencyMs":57974.093916999875},{"questionId":"q138","format":"json-pretty","model":"gpt-5-nano","expected":"29","actual":"23","isCorrect":false,"inputTokens":6889,"outputTokens":3463,"latencyMs":38306.33962500002},{"questionId":"q138","format":"json-compact","model":"gpt-5-nano","expected":"29","actual":"29","isCorrect":true,"inputTokens":4911,"outputTokens":3591,"latencyMs":29538.051624999847},{"questionId":"q138","format":"toon","model":"gpt-5-nano","expected":"29","actual":"38","isCorrect":false,"inputTokens":5890,"outputTokens":15815,"latencyMs":129636.9376660001},{"questionId":"q138","format":"xml","model":"gpt-5-nano","expected":"29","actual":"22","isCorrect":false,"inputTokens":7799,"outputTokens":10503,"latencyMs":76536.67662499985},{"questionId":"q138","format":"yaml","model":"gpt-5-nano","expected":"29","actual":"50","isCorrect":false,"inputTokens":5917,"outputTokens":13319,"latencyMs":151121.41308299988},{"questionId":"q139","format":"json-pretty","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6891,"outputTokens":4103,"latencyMs":56042.69449999998},{"questionId":"q139","format":"json-compact","model":"gpt-5-nano","expected":"11","actual":"10","isCorrect":false,"inputTokens":4913,"outputTokens":4743,"latencyMs":37126.876166999806},{"questionId":"q139","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5892,"outputTokens":5639,"latencyMs":40804.70775000006},{"questionId":"q139","format":"xml","model":"gpt-5-nano","expected":"11","actual":"6","isCorrect":false,"inputTokens":7801,"outputTokens":9799,"latencyMs":92226.40016699978},{"questionId":"q139","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5919,"outputTokens":12039,"latencyMs":93280.16320900014},{"questionId":"q140","format":"json-pretty","model":"gpt-5-nano","expected":"18","actual":"18","isCorrect":true,"inputTokens":6891,"outputTokens":4295,"latencyMs":41815.3666660001},{"questionId":"q140","format":"json-compact","model":"gpt-5-nano","expected":"18","actual":"18","isCorrect":true,"inputTokens":4913,"outputTokens":3335,"latencyMs":29692.3865410001},{"questionId":"q140","format":"toon","model":"gpt-5-nano","expected":"18","actual":"17","isCorrect":false,"inputTokens":5892,"outputTokens":6599,"latencyMs":97809.67475},{"questionId":"q140","format":"xml","model":"gpt-5-nano","expected":"18","actual":"15","isCorrect":false,"inputTokens":7801,"outputTokens":12039,"latencyMs":123416.02841699985},{"questionId":"q140","format":"yaml","model":"gpt-5-nano","expected":"18","actual":"17","isCorrect":false,"inputTokens":5919,"outputTokens":10567,"latencyMs":91551.0178749999},{"questionId":"q141","format":"json-pretty","model":"gpt-5-nano","expected":"33","actual":"33","isCorrect":true,"inputTokens":6896,"outputTokens":3271,"latencyMs":30817.282166999998},{"questionId":"q141","format":"json-compact","model":"gpt-5-nano","expected":"33","actual":"33","isCorrect":true,"inputTokens":4918,"outputTokens":5703,"latencyMs":63405.342667000135},{"questionId":"q141","format":"toon","model":"gpt-5-nano","expected":"33","actual":"34","isCorrect":false,"inputTokens":5897,"outputTokens":14151,"latencyMs":112034.90975000011},{"questionId":"q141","format":"xml","model":"gpt-5-nano","expected":"33","actual":"37","isCorrect":false,"inputTokens":7806,"outputTokens":15175,"latencyMs":145415.13762499997},{"questionId":"q141","format":"yaml","model":"gpt-5-nano","expected":"33","actual":"63","isCorrect":false,"inputTokens":5924,"outputTokens":18759,"latencyMs":151461.663834},{"questionId":"q142","format":"json-pretty","model":"gpt-5-nano","expected":"42","actual":"43","isCorrect":false,"inputTokens":6894,"outputTokens":5959,"latencyMs":47470.96787500009},{"questionId":"q142","format":"json-compact","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":4916,"outputTokens":5383,"latencyMs":53329.43183299992},{"questionId":"q142","format":"toon","model":"gpt-5-nano","expected":"42","actual":"43","isCorrect":false,"inputTokens":5895,"outputTokens":10375,"latencyMs":85387.93562499993},{"questionId":"q142","format":"xml","model":"gpt-5-nano","expected":"42","actual":"60","isCorrect":false,"inputTokens":7804,"outputTokens":12167,"latencyMs":93500.84408300021},{"questionId":"q142","format":"yaml","model":"gpt-5-nano","expected":"42","actual":"80","isCorrect":false,"inputTokens":5922,"outputTokens":9671,"latencyMs":379540.6658340001},{"questionId":"q143","format":"json-pretty","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":6890,"outputTokens":4743,"latencyMs":38821.55270899995},{"questionId":"q143","format":"json-compact","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":4912,"outputTokens":6727,"latencyMs":57143.98108300008},{"questionId":"q143","format":"toon","model":"gpt-5-nano","expected":"25","actual":"27","isCorrect":false,"inputTokens":5891,"outputTokens":14407,"latencyMs":121313.45120800007},{"questionId":"q143","format":"xml","model":"gpt-5-nano","expected":"25","actual":"19","isCorrect":false,"inputTokens":7800,"outputTokens":7559,"latencyMs":69642.35850000009},{"questionId":"q143","format":"yaml","model":"gpt-5-nano","expected":"25","actual":"17","isCorrect":false,"inputTokens":5918,"outputTokens":16135,"latencyMs":161343.49933400005},{"questionId":"q144","format":"json-pretty","model":"gpt-5-nano","expected":"29","actual":"34","isCorrect":false,"inputTokens":6896,"outputTokens":20167,"latencyMs":230741.27374999993},{"questionId":"q144","format":"json-compact","model":"gpt-5-nano","expected":"29","actual":"29","isCorrect":true,"inputTokens":4918,"outputTokens":7495,"latencyMs":63636.350584000116},{"questionId":"q144","format":"toon","model":"gpt-5-nano","expected":"29","actual":"36","isCorrect":false,"inputTokens":5897,"outputTokens":16263,"latencyMs":130179.59895799984},{"questionId":"q144","format":"xml","model":"gpt-5-nano","expected":"29","actual":"37","isCorrect":false,"inputTokens":7806,"outputTokens":11591,"latencyMs":113961.67562500015},{"questionId":"q144","format":"yaml","model":"gpt-5-nano","expected":"29","actual":"37","isCorrect":false,"inputTokens":5924,"outputTokens":16519,"latencyMs":140941.021834},{"questionId":"q145","format":"json-pretty","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":6896,"outputTokens":5447,"latencyMs":46888.73112499993},{"questionId":"q145","format":"json-compact","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":4918,"outputTokens":7111,"latencyMs":74291.24454099988},{"questionId":"q145","format":"toon","model":"gpt-5-nano","expected":"4","actual":"3","isCorrect":false,"inputTokens":5897,"outputTokens":4423,"latencyMs":38978.35370799992},{"questionId":"q145","format":"xml","model":"gpt-5-nano","expected":"4","actual":"3","isCorrect":false,"inputTokens":7806,"outputTokens":4039,"latencyMs":36546.21775000007},{"questionId":"q145","format":"yaml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":5924,"outputTokens":4167,"latencyMs":39038.52904099994},{"questionId":"q146","format":"json-pretty","model":"gpt-5-nano","expected":"5","actual":"4","isCorrect":false,"inputTokens":6898,"outputTokens":3015,"latencyMs":26637.197584000183},{"questionId":"q146","format":"json-compact","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":4920,"outputTokens":2887,"latencyMs":26040.22404200025},{"questionId":"q146","format":"toon","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":5899,"outputTokens":3463,"latencyMs":29262.439125000034},{"questionId":"q146","format":"xml","model":"gpt-5-nano","expected":"5","actual":"6","isCorrect":false,"inputTokens":7808,"outputTokens":6215,"latencyMs":58986.4326249999},{"questionId":"q146","format":"yaml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":5926,"outputTokens":4743,"latencyMs":38633.85925000021},{"questionId":"q147","format":"json-pretty","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":6898,"outputTokens":3079,"latencyMs":27888.549333999865},{"questionId":"q147","format":"json-compact","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":4920,"outputTokens":4679,"latencyMs":43776.82637499971},{"questionId":"q147","format":"toon","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":5899,"outputTokens":5447,"latencyMs":50479.701083000284},{"questionId":"q147","format":"xml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":7808,"outputTokens":4999,"latencyMs":58853.683209000155},{"questionId":"q147","format":"yaml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":5926,"outputTokens":3655,"latencyMs":31553.769249999896},{"questionId":"q148","format":"json-pretty","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":6898,"outputTokens":2759,"latencyMs":47123.68912500003},{"questionId":"q148","format":"json-compact","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":4920,"outputTokens":4359,"latencyMs":41352.168209000025},{"questionId":"q148","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":5899,"outputTokens":6343,"latencyMs":55856.09641700005},{"questionId":"q148","format":"xml","model":"gpt-5-nano","expected":"3","actual":"2","isCorrect":false,"inputTokens":7808,"outputTokens":4103,"latencyMs":35685.38595899986},{"questionId":"q148","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":5926,"outputTokens":6343,"latencyMs":51239.12116699992},{"questionId":"q149","format":"json-pretty","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":6896,"outputTokens":6855,"latencyMs":90414.68912500003},{"questionId":"q149","format":"json-compact","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":4918,"outputTokens":2695,"latencyMs":27534.36641700007},{"questionId":"q149","format":"toon","model":"gpt-5-nano","expected":"4","actual":"3","isCorrect":false,"inputTokens":5897,"outputTokens":4487,"latencyMs":42209.12216599984},{"questionId":"q149","format":"xml","model":"gpt-5-nano","expected":"4","actual":"3","isCorrect":false,"inputTokens":7806,"outputTokens":6279,"latencyMs":75733.36095800018},{"questionId":"q149","format":"yaml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":5924,"outputTokens":7687,"latencyMs":63058.29333399981},{"questionId":"q150","format":"json-pretty","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":6896,"outputTokens":4103,"latencyMs":40312.71462500002},{"questionId":"q150","format":"json-compact","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":4918,"outputTokens":4167,"latencyMs":35873.915792000014},{"questionId":"q150","format":"toon","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":5897,"outputTokens":5703,"latencyMs":57978.6901250002},{"questionId":"q150","format":"xml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":7806,"outputTokens":6023,"latencyMs":54796.04729200015},{"questionId":"q150","format":"yaml","model":"gpt-5-nano","expected":"5","actual":"4","isCorrect":false,"inputTokens":5924,"outputTokens":5127,"latencyMs":43498.875916999765},{"questionId":"q151","format":"json-pretty","model":"gpt-5-nano","expected":"development","actual":"development","isCorrect":true,"inputTokens":1023,"outputTokens":583,"latencyMs":6742.807415999938},{"questionId":"q151","format":"json-compact","model":"gpt-5-nano","expected":"development","actual":"development","isCorrect":true,"inputTokens":665,"outputTokens":455,"latencyMs":8014.11641700007},{"questionId":"q151","format":"toon","model":"gpt-5-nano","expected":"development","actual":"development","isCorrect":true,"inputTokens":756,"outputTokens":135,"latencyMs":3002.658749999944},{"questionId":"q151","format":"xml","model":"gpt-5-nano","expected":"development","actual":"development","isCorrect":true,"inputTokens":1107,"outputTokens":135,"latencyMs":2293.2929159998894},{"questionId":"q151","format":"yaml","model":"gpt-5-nano","expected":"development","actual":"development","isCorrect":true,"inputTokens":775,"outputTokens":135,"latencyMs":3505.548708999995},{"questionId":"q152","format":"json-pretty","model":"gpt-5-nano","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":1021,"outputTokens":268,"latencyMs":3357.363291999791},{"questionId":"q152","format":"json-compact","model":"gpt-5-nano","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":663,"outputTokens":396,"latencyMs":4544.950250000227},{"questionId":"q152","format":"toon","model":"gpt-5-nano","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":754,"outputTokens":204,"latencyMs":3910.6737079997547},{"questionId":"q152","format":"xml","model":"gpt-5-nano","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":1105,"outputTokens":140,"latencyMs":2807.8532920000143},{"questionId":"q152","format":"yaml","model":"gpt-5-nano","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":773,"outputTokens":204,"latencyMs":6755.754457999952},{"questionId":"q153","format":"json-pretty","model":"gpt-5-nano","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1021,"outputTokens":136,"latencyMs":2664.359124999959},{"questionId":"q153","format":"json-compact","model":"gpt-5-nano","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":663,"outputTokens":136,"latencyMs":2253.774665999692},{"questionId":"q153","format":"toon","model":"gpt-5-nano","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":754,"outputTokens":200,"latencyMs":5005.998707999941},{"questionId":"q153","format":"xml","model":"gpt-5-nano","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1105,"outputTokens":72,"latencyMs":2703.680333000142},{"questionId":"q153","format":"yaml","model":"gpt-5-nano","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":773,"outputTokens":72,"latencyMs":1897.0315409996547},{"questionId":"q154","format":"json-pretty","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":1023,"outputTokens":135,"latencyMs":3792.394541000016},{"questionId":"q154","format":"json-compact","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":665,"outputTokens":135,"latencyMs":4632.95924999984},{"questionId":"q154","format":"toon","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":756,"outputTokens":135,"latencyMs":2394.134250000119},{"questionId":"q154","format":"xml","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":1107,"outputTokens":135,"latencyMs":2430.5971249998547},{"questionId":"q154","format":"yaml","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":775,"outputTokens":135,"latencyMs":2441.4810000001453},{"questionId":"q155","format":"json-pretty","model":"gpt-5-nano","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1021,"outputTokens":72,"latencyMs":1814.3805419998243},{"questionId":"q155","format":"json-compact","model":"gpt-5-nano","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":663,"outputTokens":200,"latencyMs":3557.1527090002783},{"questionId":"q155","format":"toon","model":"gpt-5-nano","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":754,"outputTokens":136,"latencyMs":3990.5303329997696},{"questionId":"q155","format":"xml","model":"gpt-5-nano","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1105,"outputTokens":136,"latencyMs":3151.5690419999883},{"questionId":"q155","format":"yaml","model":"gpt-5-nano","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":773,"outputTokens":136,"latencyMs":2329.0516249998473},{"questionId":"q156","format":"json-pretty","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1023,"outputTokens":135,"latencyMs":1982.217999999877},{"questionId":"q156","format":"json-compact","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":665,"outputTokens":199,"latencyMs":2489.357166999951},{"questionId":"q156","format":"toon","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":756,"outputTokens":199,"latencyMs":3184.5403330000117},{"questionId":"q156","format":"xml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1107,"outputTokens":199,"latencyMs":3035.3530419999734},{"questionId":"q156","format":"yaml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":775,"outputTokens":199,"latencyMs":2576.0412079999223},{"questionId":"q157","format":"json-pretty","model":"gpt-5-nano","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1023,"outputTokens":136,"latencyMs":15388.45091599971},{"questionId":"q157","format":"json-compact","model":"gpt-5-nano","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":665,"outputTokens":200,"latencyMs":3027.245583000127},{"questionId":"q157","format":"toon","model":"gpt-5-nano","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":756,"outputTokens":200,"latencyMs":2682.324666999746},{"questionId":"q157","format":"xml","model":"gpt-5-nano","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1107,"outputTokens":72,"latencyMs":2551.2237090002745},{"questionId":"q157","format":"yaml","model":"gpt-5-nano","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":775,"outputTokens":136,"latencyMs":4109.2170409997925},{"questionId":"q158","format":"json-pretty","model":"gpt-5-nano","expected":"real","actual":"real","isCorrect":true,"inputTokens":1021,"outputTokens":199,"latencyMs":3716.7424159999937},{"questionId":"q158","format":"json-compact","model":"gpt-5-nano","expected":"real","actual":"real","isCorrect":true,"inputTokens":663,"outputTokens":455,"latencyMs":6808.871625000145},{"questionId":"q158","format":"toon","model":"gpt-5-nano","expected":"real","actual":"real","isCorrect":true,"inputTokens":754,"outputTokens":327,"latencyMs":4373.244665999897},{"questionId":"q158","format":"xml","model":"gpt-5-nano","expected":"real","actual":"real","isCorrect":true,"inputTokens":1105,"outputTokens":135,"latencyMs":2383.4876660001464},{"questionId":"q158","format":"yaml","model":"gpt-5-nano","expected":"real","actual":"real","isCorrect":true,"inputTokens":773,"outputTokens":263,"latencyMs":3284.6608330002055},{"questionId":"q159","format":"json-pretty","model":"gpt-5-nano","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1022,"outputTokens":136,"latencyMs":2568.9396250001155},{"questionId":"q159","format":"json-compact","model":"gpt-5-nano","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":664,"outputTokens":136,"latencyMs":2548.5491659999825},{"questionId":"q159","format":"toon","model":"gpt-5-nano","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":755,"outputTokens":200,"latencyMs":2528.12133300025},{"questionId":"q159","format":"xml","model":"gpt-5-nano","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1106,"outputTokens":200,"latencyMs":3261.567334000021},{"questionId":"q159","format":"yaml","model":"gpt-5-nano","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":774,"outputTokens":264,"latencyMs":5840.089374999981},{"questionId":"q160","format":"json-pretty","model":"gpt-5-nano","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":1023,"outputTokens":459,"latencyMs":4751.093458000105},{"questionId":"q160","format":"json-compact","model":"gpt-5-nano","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":665,"outputTokens":715,"latencyMs":9806.253459000029},{"questionId":"q160","format":"toon","model":"gpt-5-nano","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":756,"outputTokens":267,"latencyMs":3160.0135419997387},{"questionId":"q160","format":"xml","model":"gpt-5-nano","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":1107,"outputTokens":459,"latencyMs":4843.594291999936},{"questionId":"q160","format":"yaml","model":"gpt-5-nano","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":775,"outputTokens":395,"latencyMs":5162.69116699975},{"questionId":"q161","format":"json-pretty","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":1023,"outputTokens":135,"latencyMs":3906.6883749999106},{"questionId":"q161","format":"json-compact","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":665,"outputTokens":263,"latencyMs":3832.8434589998797},{"questionId":"q161","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":756,"outputTokens":263,"latencyMs":4753.713458000217},{"questionId":"q161","format":"xml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":1107,"outputTokens":199,"latencyMs":6292.803332999814},{"questionId":"q161","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":775,"outputTokens":199,"latencyMs":6083.018833000213},{"questionId":"q162","format":"json-pretty","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1023,"outputTokens":135,"latencyMs":2117.091250000056},{"questionId":"q162","format":"json-compact","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":665,"outputTokens":199,"latencyMs":5661.312124999706},{"questionId":"q162","format":"toon","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":756,"outputTokens":263,"latencyMs":5603.599000000395},{"questionId":"q162","format":"xml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1107,"outputTokens":135,"latencyMs":4710.21570800012},{"questionId":"q162","format":"yaml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":775,"outputTokens":199,"latencyMs":7471.390374999959},{"questionId":"q163","format":"json-pretty","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1022,"outputTokens":199,"latencyMs":4701.419417000376},{"questionId":"q163","format":"json-compact","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":664,"outputTokens":199,"latencyMs":3847.8547080000862},{"questionId":"q163","format":"toon","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":755,"outputTokens":263,"latencyMs":3375.736333000008},{"questionId":"q163","format":"xml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1106,"outputTokens":263,"latencyMs":4812.272791000083},{"questionId":"q163","format":"yaml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":774,"outputTokens":199,"latencyMs":2934.4373750002123},{"questionId":"q164","format":"json-pretty","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1022,"outputTokens":135,"latencyMs":2458.2781249997206},{"questionId":"q164","format":"json-compact","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":664,"outputTokens":263,"latencyMs":3522.378250000067},{"questionId":"q164","format":"toon","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":755,"outputTokens":327,"latencyMs":4799.6738340002485},{"questionId":"q164","format":"xml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1106,"outputTokens":199,"latencyMs":3384.5427500000224},{"questionId":"q164","format":"yaml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":774,"outputTokens":263,"latencyMs":5075.06341599999},{"questionId":"q165","format":"json-pretty","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":1022,"outputTokens":199,"latencyMs":2597.252208999824},{"questionId":"q165","format":"json-compact","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":664,"outputTokens":199,"latencyMs":2931.3202499998733},{"questionId":"q165","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":755,"outputTokens":71,"latencyMs":2898.455083000008},{"questionId":"q165","format":"xml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":1106,"outputTokens":263,"latencyMs":3072.9117919998243},{"questionId":"q165","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":774,"outputTokens":71,"latencyMs":2456.2880830001086},{"questionId":"q166","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1026,"outputTokens":199,"latencyMs":2803.085833000019},{"questionId":"q166","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":668,"outputTokens":839,"latencyMs":7020.810707999859},{"questionId":"q166","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":759,"outputTokens":199,"latencyMs":2661.654792000074},{"questionId":"q166","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1110,"outputTokens":327,"latencyMs":5022.77420799993},{"questionId":"q166","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":778,"outputTokens":263,"latencyMs":3315.3438749997877},{"questionId":"q167","format":"json-pretty","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":1022,"outputTokens":263,"latencyMs":4004.776000000071},{"questionId":"q167","format":"json-compact","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":664,"outputTokens":327,"latencyMs":4605.751166999806},{"questionId":"q167","format":"toon","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":755,"outputTokens":327,"latencyMs":5653.84929200029},{"questionId":"q167","format":"xml","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":1106,"outputTokens":263,"latencyMs":3888.8481250000186},{"questionId":"q167","format":"yaml","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":774,"outputTokens":263,"latencyMs":4843.462165999692},{"questionId":"q168","format":"json-pretty","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":1024,"outputTokens":199,"latencyMs":2777.7275000000373},{"questionId":"q168","format":"json-compact","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":666,"outputTokens":711,"latencyMs":10276.33304100018},{"questionId":"q168","format":"toon","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":757,"outputTokens":199,"latencyMs":4521.871375000104},{"questionId":"q168","format":"xml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":1108,"outputTokens":199,"latencyMs":5794.047832999844},{"questionId":"q168","format":"yaml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":776,"outputTokens":135,"latencyMs":3790.2669999999925},{"questionId":"q169","format":"json-pretty","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":1026,"outputTokens":391,"latencyMs":5940.847500000149},{"questionId":"q169","format":"json-compact","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":668,"outputTokens":455,"latencyMs":4762.850875000004},{"questionId":"q169","format":"toon","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":759,"outputTokens":455,"latencyMs":4719.768250000197},{"questionId":"q169","format":"xml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":1110,"outputTokens":327,"latencyMs":6340.48066599993},{"questionId":"q169","format":"yaml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":778,"outputTokens":263,"latencyMs":3185.5706250001676},{"questionId":"q170","format":"json-pretty","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":1025,"outputTokens":455,"latencyMs":4706.524750000332},{"questionId":"q170","format":"json-compact","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":667,"outputTokens":327,"latencyMs":5000.733999999706},{"questionId":"q170","format":"toon","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":758,"outputTokens":455,"latencyMs":6487.487999999896},{"questionId":"q170","format":"xml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":1109,"outputTokens":455,"latencyMs":8960.450375000015},{"questionId":"q170","format":"yaml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":777,"outputTokens":391,"latencyMs":5475.017541000154},{"questionId":"q171","format":"json-pretty","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":1027,"outputTokens":263,"latencyMs":3693.9329169997945},{"questionId":"q171","format":"json-compact","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":669,"outputTokens":263,"latencyMs":3648.1655830000527},{"questionId":"q171","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":760,"outputTokens":967,"latencyMs":8853.470167000312},{"questionId":"q171","format":"xml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":1111,"outputTokens":263,"latencyMs":3237.0124579998665},{"questionId":"q171","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":779,"outputTokens":263,"latencyMs":4246.907124999911},{"questionId":"q172","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1027,"outputTokens":199,"latencyMs":3634.9288749997504},{"questionId":"q172","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":669,"outputTokens":327,"latencyMs":20629.421875},{"questionId":"q172","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":760,"outputTokens":199,"latencyMs":3223.972542000003},{"questionId":"q172","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1111,"outputTokens":327,"latencyMs":13383.079999999609},{"questionId":"q172","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":779,"outputTokens":263,"latencyMs":4296.551624999847},{"questionId":"q173","format":"json-pretty","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":1028,"outputTokens":199,"latencyMs":2751.2610419997945},{"questionId":"q173","format":"json-compact","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":670,"outputTokens":327,"latencyMs":4416.645750000142},{"questionId":"q173","format":"toon","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":761,"outputTokens":519,"latencyMs":7847.565500000026},{"questionId":"q173","format":"xml","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":1112,"outputTokens":263,"latencyMs":6915.099750000052},{"questionId":"q173","format":"yaml","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":780,"outputTokens":391,"latencyMs":6582.503958000336},{"questionId":"q174","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1025,"outputTokens":135,"latencyMs":2869.2113329996355},{"questionId":"q174","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":667,"outputTokens":263,"latencyMs":3683.578083000146},{"questionId":"q174","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":758,"outputTokens":327,"latencyMs":11444.25062499987},{"questionId":"q174","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1109,"outputTokens":327,"latencyMs":11302.633040999994},{"questionId":"q174","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":777,"outputTokens":135,"latencyMs":3082.4122499995865},{"questionId":"q175","format":"json-pretty","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":1029,"outputTokens":327,"latencyMs":4507.2793749999255},{"questionId":"q175","format":"json-compact","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":671,"outputTokens":263,"latencyMs":4098.119249999989},{"questionId":"q175","format":"toon","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":762,"outputTokens":327,"latencyMs":10054.765292000026},{"questionId":"q175","format":"xml","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":1113,"outputTokens":391,"latencyMs":6727.330041999929},{"questionId":"q175","format":"yaml","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":781,"outputTokens":391,"latencyMs":7630.485625000205},{"questionId":"q176","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1023,"outputTokens":135,"latencyMs":2451.910416999832},{"questionId":"q176","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":665,"outputTokens":263,"latencyMs":3689.5402919999324},{"questionId":"q176","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":756,"outputTokens":199,"latencyMs":2873.5955409999005},{"questionId":"q176","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1107,"outputTokens":199,"latencyMs":3277.6729580000974},{"questionId":"q176","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":775,"outputTokens":263,"latencyMs":4645.139667000156},{"questionId":"q177","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1031,"outputTokens":327,"latencyMs":3554.097957999911},{"questionId":"q177","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":673,"outputTokens":263,"latencyMs":3744.1457079998218},{"questionId":"q177","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":764,"outputTokens":263,"latencyMs":6140.8108749999665},{"questionId":"q177","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1115,"outputTokens":327,"latencyMs":7183.078166999854},{"questionId":"q177","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":783,"outputTokens":263,"latencyMs":5394.631290999707},{"questionId":"q178","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1026,"outputTokens":135,"latencyMs":2505.3744160002097},{"questionId":"q178","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":668,"outputTokens":775,"latencyMs":11512.826999999583},{"questionId":"q178","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":759,"outputTokens":583,"latencyMs":9364.836542000063},{"questionId":"q178","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1110,"outputTokens":199,"latencyMs":4985.15625},{"questionId":"q178","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":778,"outputTokens":199,"latencyMs":3276.027791999746},{"questionId":"q179","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1025,"outputTokens":199,"latencyMs":4783.0132499998435},{"questionId":"q179","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":667,"outputTokens":839,"latencyMs":9481.153750000056},{"questionId":"q179","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":758,"outputTokens":199,"latencyMs":4011.8218750003725},{"questionId":"q179","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1109,"outputTokens":327,"latencyMs":4096.573667000048},{"questionId":"q179","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":777,"outputTokens":455,"latencyMs":5798.487083999906},{"questionId":"q180","format":"json-pretty","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":6451,"outputTokens":135,"latencyMs":5243.732333999593},{"questionId":"q180","format":"json-compact","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":4044,"outputTokens":135,"latencyMs":2556.379874999635},{"questionId":"q180","format":"toon","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":2608,"outputTokens":135,"latencyMs":1891.7810420002788},{"questionId":"q180","format":"csv","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":2442,"outputTokens":263,"latencyMs":5116.138582999818},{"questionId":"q180","format":"xml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":7413,"outputTokens":135,"latencyMs":5086.265749999788},{"questionId":"q180","format":"yaml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":5071,"outputTokens":135,"latencyMs":2536.9129999997094},{"questionId":"q181","format":"json-pretty","model":"gpt-5-nano","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":6456,"outputTokens":338,"latencyMs":7833.342083000112},{"questionId":"q181","format":"json-compact","model":"gpt-5-nano","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":4049,"outputTokens":594,"latencyMs":5790.048458999954},{"questionId":"q181","format":"toon","model":"gpt-5-nano","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":2613,"outputTokens":146,"latencyMs":3634.676667000167},{"questionId":"q181","format":"csv","model":"gpt-5-nano","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":2447,"outputTokens":210,"latencyMs":3299.1852080002427},{"questionId":"q181","format":"xml","model":"gpt-5-nano","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":7418,"outputTokens":466,"latencyMs":5790.619957999792},{"questionId":"q181","format":"yaml","model":"gpt-5-nano","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":5076,"outputTokens":466,"latencyMs":5859.513791000005},{"questionId":"q182","format":"json-pretty","model":"gpt-5-nano","expected":"email","actual":"email","isCorrect":true,"inputTokens":6454,"outputTokens":775,"latencyMs":11483.762374999933},{"questionId":"q182","format":"json-compact","model":"gpt-5-nano","expected":"email","actual":"email","isCorrect":true,"inputTokens":4047,"outputTokens":519,"latencyMs":8336.065458999947},{"questionId":"q182","format":"toon","model":"gpt-5-nano","expected":"email","actual":"email","isCorrect":true,"inputTokens":2611,"outputTokens":327,"latencyMs":4580.0390419997275},{"questionId":"q182","format":"csv","model":"gpt-5-nano","expected":"email","actual":"email","isCorrect":true,"inputTokens":2445,"outputTokens":135,"latencyMs":4742.354375000112},{"questionId":"q182","format":"xml","model":"gpt-5-nano","expected":"email","actual":"email","isCorrect":true,"inputTokens":7416,"outputTokens":263,"latencyMs":4211.900249999948},{"questionId":"q182","format":"yaml","model":"gpt-5-nano","expected":"email","actual":"email","isCorrect":true,"inputTokens":5074,"outputTokens":327,"latencyMs":4381.881250000093},{"questionId":"q183","format":"json-pretty","model":"gpt-5-nano","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":6455,"outputTokens":327,"latencyMs":4497.625540999696},{"questionId":"q183","format":"json-compact","model":"gpt-5-nano","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":4048,"outputTokens":199,"latencyMs":3268.267374999821},{"questionId":"q183","format":"toon","model":"gpt-5-nano","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":2612,"outputTokens":263,"latencyMs":5449.473415999673},{"questionId":"q183","format":"csv","model":"gpt-5-nano","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":2446,"outputTokens":263,"latencyMs":4097.444000000134},{"questionId":"q183","format":"xml","model":"gpt-5-nano","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":7417,"outputTokens":263,"latencyMs":4184.440708999988},{"questionId":"q183","format":"yaml","model":"gpt-5-nano","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":5075,"outputTokens":135,"latencyMs":2946.645917000249},{"questionId":"q184","format":"json-pretty","model":"gpt-5-nano","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":6455,"outputTokens":204,"latencyMs":9614.618167000357},{"questionId":"q184","format":"json-compact","model":"gpt-5-nano","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":4048,"outputTokens":140,"latencyMs":2873.793583000079},{"questionId":"q184","format":"toon","model":"gpt-5-nano","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":2612,"outputTokens":140,"latencyMs":2877.5963750001974},{"questionId":"q184","format":"csv","model":"gpt-5-nano","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":2446,"outputTokens":204,"latencyMs":3551.0457079997286},{"questionId":"q184","format":"xml","model":"gpt-5-nano","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":7417,"outputTokens":140,"latencyMs":2625.599458000157},{"questionId":"q184","format":"yaml","model":"gpt-5-nano","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":5075,"outputTokens":204,"latencyMs":2858.9412090000696},{"questionId":"q185","format":"json-pretty","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":6452,"outputTokens":199,"latencyMs":3881.6359999999404},{"questionId":"q185","format":"json-compact","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":4045,"outputTokens":263,"latencyMs":20388.577374999877},{"questionId":"q185","format":"toon","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":2609,"outputTokens":199,"latencyMs":3270.6690000002272},{"questionId":"q185","format":"csv","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":2443,"outputTokens":3591,"latencyMs":29106.583208000287},{"questionId":"q185","format":"xml","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":7414,"outputTokens":391,"latencyMs":4287.198749999981},{"questionId":"q185","format":"yaml","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":5072,"outputTokens":327,"latencyMs":3268.473958000075},{"questionId":"q186","format":"json-pretty","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":10804,"outputTokens":583,"latencyMs":5497.04287499981},{"questionId":"q186","format":"json-compact","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":6885,"outputTokens":519,"latencyMs":8948.515999999829},{"questionId":"q186","format":"toon","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":7322,"outputTokens":199,"latencyMs":4607.618165999651},{"questionId":"q186","format":"xml","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":12112,"outputTokens":263,"latencyMs":3848.347834000364},{"questionId":"q186","format":"yaml","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":8436,"outputTokens":263,"latencyMs":3796.977042000275},{"questionId":"q187","format":"json-pretty","model":"gpt-5-nano","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orders","isCorrect":false,"inputTokens":10811,"outputTokens":455,"latencyMs":5749.156665999908},{"questionId":"q187","format":"json-compact","model":"gpt-5-nano","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":6892,"outputTokens":725,"latencyMs":8672.660540999845},{"questionId":"q187","format":"toon","model":"gpt-5-nano","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":7329,"outputTokens":533,"latencyMs":5036.442500000354},{"questionId":"q187","format":"xml","model":"gpt-5-nano","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":12119,"outputTokens":405,"latencyMs":4477.925249999855},{"questionId":"q187","format":"yaml","model":"gpt-5-nano","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":8443,"outputTokens":533,"latencyMs":5181.2483749999665},{"questionId":"q188","format":"json-pretty","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":10807,"outputTokens":263,"latencyMs":4242.422333000228},{"questionId":"q188","format":"json-compact","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":6888,"outputTokens":519,"latencyMs":5718.889750000089},{"questionId":"q188","format":"toon","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":7325,"outputTokens":327,"latencyMs":5340.791166000068},{"questionId":"q188","format":"xml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":12115,"outputTokens":967,"latencyMs":9309.043292000424},{"questionId":"q188","format":"yaml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":8439,"outputTokens":583,"latencyMs":6073.824750000145},{"questionId":"q189","format":"json-pretty","model":"gpt-5-nano","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":10812,"outputTokens":140,"latencyMs":4690.233665999956},{"questionId":"q189","format":"json-compact","model":"gpt-5-nano","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":6893,"outputTokens":268,"latencyMs":5466.577833000105},{"questionId":"q189","format":"toon","model":"gpt-5-nano","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":7330,"outputTokens":204,"latencyMs":2964.15870800009},{"questionId":"q189","format":"xml","model":"gpt-5-nano","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":12120,"outputTokens":332,"latencyMs":4515.02179200016},{"questionId":"q189","format":"yaml","model":"gpt-5-nano","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":8444,"outputTokens":268,"latencyMs":8201.164624999743},{"questionId":"q190","format":"json-pretty","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":10808,"outputTokens":200,"latencyMs":4878.779084000271},{"questionId":"q190","format":"json-compact","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":6889,"outputTokens":200,"latencyMs":3476.9356669997796},{"questionId":"q190","format":"toon","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":7326,"outputTokens":200,"latencyMs":2631.7864590003155},{"questionId":"q190","format":"xml","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":12116,"outputTokens":200,"latencyMs":3225.6047499999404},{"questionId":"q190","format":"yaml","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8440,"outputTokens":264,"latencyMs":6446.187125000171},{"questionId":"q191","format":"json-pretty","model":"gpt-5-nano","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":10813,"outputTokens":395,"latencyMs":4756.288000000175},{"questionId":"q191","format":"json-compact","model":"gpt-5-nano","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":6894,"outputTokens":267,"latencyMs":4393.102415999863},{"questionId":"q191","format":"toon","model":"gpt-5-nano","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":7331,"outputTokens":267,"latencyMs":5854.2481249999255},{"questionId":"q191","format":"xml","model":"gpt-5-nano","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":12121,"outputTokens":267,"latencyMs":6437.172999999952},{"questionId":"q191","format":"yaml","model":"gpt-5-nano","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":8445,"outputTokens":203,"latencyMs":3253.0284589999355},{"questionId":"q192","format":"json-pretty","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":3768,"outputTokens":839,"latencyMs":7548.902958999854},{"questionId":"q192","format":"json-compact","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":2441,"outputTokens":647,"latencyMs":8269.41454199981},{"questionId":"q192","format":"toon","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":1639,"outputTokens":263,"latencyMs":8128.112708000001},{"questionId":"q192","format":"csv","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":1497,"outputTokens":967,"latencyMs":14334.201832999941},{"questionId":"q192","format":"xml","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":4474,"outputTokens":903,"latencyMs":11843.660625000019},{"questionId":"q192","format":"yaml","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":3039,"outputTokens":839,"latencyMs":12383.660041000228},{"questionId":"q193","format":"json-pretty","model":"gpt-5-nano","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":3772,"outputTokens":212,"latencyMs":3716.6943749999627},{"questionId":"q193","format":"json-compact","model":"gpt-5-nano","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":2445,"outputTokens":276,"latencyMs":4321.190417000093},{"questionId":"q193","format":"toon","model":"gpt-5-nano","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":1643,"outputTokens":276,"latencyMs":3628.611583000049},{"questionId":"q193","format":"csv","model":"gpt-5-nano","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":1501,"outputTokens":276,"latencyMs":3329.8520830003545},{"questionId":"q193","format":"xml","model":"gpt-5-nano","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":4478,"outputTokens":276,"latencyMs":8496.843958000187},{"questionId":"q193","format":"yaml","model":"gpt-5-nano","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":3043,"outputTokens":340,"latencyMs":4394.631957999896},{"questionId":"q194","format":"json-pretty","model":"gpt-5-nano","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":3771,"outputTokens":392,"latencyMs":7423.73466600012},{"questionId":"q194","format":"json-compact","model":"gpt-5-nano","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":2444,"outputTokens":584,"latencyMs":5982.649750000332},{"questionId":"q194","format":"toon","model":"gpt-5-nano","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":1642,"outputTokens":200,"latencyMs":4239.526542000007},{"questionId":"q194","format":"csv","model":"gpt-5-nano","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":1500,"outputTokens":200,"latencyMs":5031.122084000148},{"questionId":"q194","format":"xml","model":"gpt-5-nano","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":4477,"outputTokens":264,"latencyMs":7930.7731669996865},{"questionId":"q194","format":"yaml","model":"gpt-5-nano","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":3042,"outputTokens":264,"latencyMs":3106.251792000141},{"questionId":"q195","format":"json-pretty","model":"gpt-5-nano","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":3772,"outputTokens":524,"latencyMs":8153.412459000014},{"questionId":"q195","format":"json-compact","model":"gpt-5-nano","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":2445,"outputTokens":588,"latencyMs":7503.793999999762},{"questionId":"q195","format":"toon","model":"gpt-5-nano","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":1643,"outputTokens":204,"latencyMs":3050.2509169997647},{"questionId":"q195","format":"csv","model":"gpt-5-nano","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":1501,"outputTokens":140,"latencyMs":2617.243791999761},{"questionId":"q195","format":"xml","model":"gpt-5-nano","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":4478,"outputTokens":204,"latencyMs":3606.4029169999994},{"questionId":"q195","format":"yaml","model":"gpt-5-nano","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":3043,"outputTokens":140,"latencyMs":2635.89858300006},{"questionId":"q196","format":"json-pretty","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":3768,"outputTokens":199,"latencyMs":2703.3255830002017},{"questionId":"q196","format":"json-compact","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":2441,"outputTokens":327,"latencyMs":8638.098624999635},{"questionId":"q196","format":"toon","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":1639,"outputTokens":263,"latencyMs":5401.915333000012},{"questionId":"q196","format":"csv","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":1497,"outputTokens":327,"latencyMs":5221.692542000208},{"questionId":"q196","format":"xml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":4474,"outputTokens":263,"latencyMs":3620.0856249998324},{"questionId":"q196","format":"yaml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":3039,"outputTokens":263,"latencyMs":3699.8902920000255},{"questionId":"q197","format":"json-pretty","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":15248,"outputTokens":1415,"latencyMs":16647.309458999895},{"questionId":"q197","format":"json-compact","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":11555,"outputTokens":1415,"latencyMs":28783.25216699997},{"questionId":"q197","format":"toon","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":8869,"outputTokens":199,"latencyMs":2771.9329590001144},{"questionId":"q197","format":"csv","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":8617,"outputTokens":1543,"latencyMs":13179.85262500029},{"questionId":"q197","format":"xml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":17194,"outputTokens":2439,"latencyMs":22739.320625000168},{"questionId":"q197","format":"yaml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":13230,"outputTokens":6023,"latencyMs":51610.507999999914},{"questionId":"q198","format":"json-pretty","model":"gpt-5-nano","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":15253,"outputTokens":415,"latencyMs":4503.3462910000235},{"questionId":"q198","format":"json-compact","model":"gpt-5-nano","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":11560,"outputTokens":543,"latencyMs":5954.608874999918},{"questionId":"q198","format":"toon","model":"gpt-5-nano","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":8874,"outputTokens":351,"latencyMs":5119.701166999992},{"questionId":"q198","format":"csv","model":"gpt-5-nano","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":8622,"outputTokens":223,"latencyMs":3383.965749999974},{"questionId":"q198","format":"xml","model":"gpt-5-nano","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":17199,"outputTokens":351,"latencyMs":8255.69604199985},{"questionId":"q198","format":"yaml","model":"gpt-5-nano","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":13235,"outputTokens":415,"latencyMs":5688.796708000358},{"questionId":"q199","format":"json-pretty","model":"gpt-5-nano","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":15253,"outputTokens":713,"latencyMs":6935.332459000405},{"questionId":"q199","format":"json-compact","model":"gpt-5-nano","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":11560,"outputTokens":777,"latencyMs":7743.049292000011},{"questionId":"q199","format":"toon","model":"gpt-5-nano","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":8874,"outputTokens":329,"latencyMs":7626.734916999936},{"questionId":"q199","format":"csv","model":"gpt-5-nano","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":8622,"outputTokens":201,"latencyMs":3741.813084000256},{"questionId":"q199","format":"xml","model":"gpt-5-nano","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":17199,"outputTokens":329,"latencyMs":4252.712875000201},{"questionId":"q199","format":"yaml","model":"gpt-5-nano","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":13235,"outputTokens":329,"latencyMs":4048.160000000149},{"questionId":"q200","format":"json-pretty","model":"gpt-5-nano","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":15252,"outputTokens":265,"latencyMs":4140.997250000015},{"questionId":"q200","format":"json-compact","model":"gpt-5-nano","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":11559,"outputTokens":457,"latencyMs":4518.3047080002725},{"questionId":"q200","format":"toon","model":"gpt-5-nano","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":8873,"outputTokens":393,"latencyMs":4574.140791999642},{"questionId":"q200","format":"csv","model":"gpt-5-nano","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":8621,"outputTokens":585,"latencyMs":5762.444084000308},{"questionId":"q200","format":"xml","model":"gpt-5-nano","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":17198,"outputTokens":393,"latencyMs":4484.691707999911},{"questionId":"q200","format":"yaml","model":"gpt-5-nano","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":13234,"outputTokens":265,"latencyMs":4706.706375000067},{"questionId":"q201","format":"json-pretty","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":15249,"outputTokens":327,"latencyMs":3816.0244579999708},{"questionId":"q201","format":"json-compact","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":11556,"outputTokens":455,"latencyMs":6546.3870419999585},{"questionId":"q201","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":8870,"outputTokens":263,"latencyMs":4477.5936670000665},{"questionId":"q201","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":8618,"outputTokens":455,"latencyMs":5239.246875000186},{"questionId":"q201","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":17195,"outputTokens":647,"latencyMs":7984.748166000005},{"questionId":"q201","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":13231,"outputTokens":263,"latencyMs":8407.612875000108},{"questionId":"q202","format":"json-pretty","model":"gpt-5-nano","expected":"75","actual":"75","isCorrect":true,"inputTokens":6889,"outputTokens":2951,"latencyMs":27031.601250000298},{"questionId":"q202","format":"json-compact","model":"gpt-5-nano","expected":"75","actual":"75","isCorrect":true,"inputTokens":4911,"outputTokens":10631,"latencyMs":81286.11437499989},{"questionId":"q202","format":"toon","model":"gpt-5-nano","expected":"75","actual":"75","isCorrect":true,"inputTokens":5890,"outputTokens":135,"latencyMs":3970.110458999872},{"questionId":"q202","format":"xml","model":"gpt-5-nano","expected":"75","actual":"58","isCorrect":false,"inputTokens":7799,"outputTokens":4871,"latencyMs":149805.12412499962},{"questionId":"q202","format":"yaml","model":"gpt-5-nano","expected":"75","actual":"100","isCorrect":false,"inputTokens":5917,"outputTokens":13703,"latencyMs":155739.1889589997},{"questionId":"q203","format":"json-pretty","model":"gpt-5-nano","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":6898,"outputTokens":466,"latencyMs":9108.469208000228},{"questionId":"q203","format":"json-compact","model":"gpt-5-nano","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":4920,"outputTokens":1106,"latencyMs":10545.836332999635},{"questionId":"q203","format":"toon","model":"gpt-5-nano","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":5899,"outputTokens":594,"latencyMs":8183.054624999873},{"questionId":"q203","format":"xml","model":"gpt-5-nano","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":7808,"outputTokens":1426,"latencyMs":11678.348749999888},{"questionId":"q203","format":"yaml","model":"gpt-5-nano","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":5926,"outputTokens":850,"latencyMs":8783.712416999973},{"questionId":"q204","format":"json-pretty","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":6893,"outputTokens":455,"latencyMs":4951.810749999713},{"questionId":"q204","format":"json-compact","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":4915,"outputTokens":583,"latencyMs":8194.983541999944},{"questionId":"q204","format":"toon","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":5894,"outputTokens":263,"latencyMs":8032.524958000053},{"questionId":"q204","format":"xml","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":7803,"outputTokens":263,"latencyMs":5646.691916999873},{"questionId":"q204","format":"yaml","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":5921,"outputTokens":199,"latencyMs":3208.1868750001304},{"questionId":"q205","format":"json-pretty","model":"gpt-5-nano","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1378,"outputTokens":455,"latencyMs":5234.0922079999},{"questionId":"q205","format":"json-compact","model":"gpt-5-nano","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":891,"outputTokens":1159,"latencyMs":13605.088709000032},{"questionId":"q205","format":"toon","model":"gpt-5-nano","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":639,"outputTokens":711,"latencyMs":6515.72516599996},{"questionId":"q205","format":"csv","model":"gpt-5-nano","expected":"YES","actual":"NO","isCorrect":false,"inputTokens":587,"outputTokens":1479,"latencyMs":14114.901416999754},{"questionId":"q205","format":"xml","model":"gpt-5-nano","expected":"YES","actual":"NO","isCorrect":false,"inputTokens":1561,"outputTokens":327,"latencyMs":5021.389500000048},{"questionId":"q205","format":"yaml","model":"gpt-5-nano","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1097,"outputTokens":455,"latencyMs":5526.448458000086},{"questionId":"q206","format":"json-pretty","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1192,"outputTokens":647,"latencyMs":6677.833459000103},{"questionId":"q206","format":"json-compact","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":777,"outputTokens":455,"latencyMs":5277.759208000265},{"questionId":"q206","format":"toon","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":569,"outputTokens":1543,"latencyMs":16482.88245799998},{"questionId":"q206","format":"csv","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":520,"outputTokens":647,"latencyMs":8937.574082999956},{"questionId":"q206","format":"xml","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1345,"outputTokens":391,"latencyMs":5002.396875000093},{"questionId":"q206","format":"yaml","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":953,"outputTokens":711,"latencyMs":6825.064792000223},{"questionId":"q207","format":"json-pretty","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1572,"outputTokens":1351,"latencyMs":11435.325457999948},{"questionId":"q207","format":"json-compact","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1013,"outputTokens":1671,"latencyMs":17690.187415999826},{"questionId":"q207","format":"toon","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":716,"outputTokens":3399,"latencyMs":31119.575375000015},{"questionId":"q207","format":"csv","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":661,"outputTokens":1031,"latencyMs":13081.246334000025},{"questionId":"q207","format":"xml","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1786,"outputTokens":455,"latencyMs":8677.37220800016},{"questionId":"q207","format":"yaml","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1251,"outputTokens":903,"latencyMs":8989.367707999889},{"questionId":"q208","format":"json-pretty","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1370,"outputTokens":775,"latencyMs":7549.4047079999},{"questionId":"q208","format":"json-compact","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":886,"outputTokens":1095,"latencyMs":9804.55941700004},{"questionId":"q208","format":"toon","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1114,"outputTokens":967,"latencyMs":8704.191707999911},{"questionId":"q208","format":"csv","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":584,"outputTokens":519,"latencyMs":8088.646707999986},{"questionId":"q208","format":"xml","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1552,"outputTokens":583,"latencyMs":8100.714499999769},{"questionId":"q208","format":"yaml","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1090,"outputTokens":967,"latencyMs":8288.674542000052},{"questionId":"q209","format":"json-pretty","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1326,"outputTokens":647,"latencyMs":6657.2364159999415},{"questionId":"q209","format":"json-compact","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":851,"outputTokens":1159,"latencyMs":13106.269707999658},{"questionId":"q209","format":"toon","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1078,"outputTokens":1351,"latencyMs":22118.10566599993},{"questionId":"q209","format":"csv","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":424,"outputTokens":1031,"latencyMs":10323.110000000335},{"questionId":"q209","format":"xml","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1505,"outputTokens":455,"latencyMs":5428.757249999791},{"questionId":"q209","format":"yaml","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1054,"outputTokens":1671,"latencyMs":15239.667082999833}] ================================================ FILE: benchmarks/results/accuracy/models/grok-4-1-fast-non-reasoning ================================================ [{"questionId":"q1","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":6531,"outputTokens":2,"latencyMs":744.4004170000003},{"questionId":"q1","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":4122,"outputTokens":2,"latencyMs":624.5607500000001},{"questionId":"q1","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":2701,"outputTokens":2,"latencyMs":783.8468750000002},{"questionId":"q1","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":2538,"outputTokens":2,"latencyMs":612.0763750000001},{"questionId":"q1","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":7470,"outputTokens":2,"latencyMs":870.1430420000002},{"questionId":"q1","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":5170,"outputTokens":2,"latencyMs":606.9509579999999},{"questionId":"q2","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":6531,"outputTokens":1,"latencyMs":873.3557919999998},{"questionId":"q2","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":4122,"outputTokens":1,"latencyMs":613.800542},{"questionId":"q2","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":2701,"outputTokens":1,"latencyMs":754.9637910000001},{"questionId":"q2","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":2538,"outputTokens":1,"latencyMs":867.420541},{"questionId":"q2","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":7470,"outputTokens":1,"latencyMs":817.9377920000002},{"questionId":"q2","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":5170,"outputTokens":1,"latencyMs":469.9504579999998},{"questionId":"q3","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":6531,"outputTokens":7,"latencyMs":661.1184170000001},{"questionId":"q3","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":4122,"outputTokens":7,"latencyMs":488.89708299999984},{"questionId":"q3","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":2701,"outputTokens":7,"latencyMs":441.70254200000045},{"questionId":"q3","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":2538,"outputTokens":7,"latencyMs":532.721708},{"questionId":"q3","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":7470,"outputTokens":7,"latencyMs":634.2196670000003},{"questionId":"q3","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":5170,"outputTokens":7,"latencyMs":696.9583749999997},{"questionId":"q4","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":13224,"outputTokens":1,"latencyMs":917.3097500000003},{"questionId":"q4","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":4123,"outputTokens":1,"latencyMs":716.4699999999998},{"questionId":"q4","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":2702,"outputTokens":1,"latencyMs":580.384791},{"questionId":"q4","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":2539,"outputTokens":1,"latencyMs":416.5690419999996},{"questionId":"q4","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":7471,"outputTokens":1,"latencyMs":634.5267919999997},{"questionId":"q4","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":5171,"outputTokens":1,"latencyMs":643.9792089999996},{"questionId":"q5","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":6529,"outputTokens":1,"latencyMs":702.2764589999997},{"questionId":"q5","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":4120,"outputTokens":1,"latencyMs":659.5474580000009},{"questionId":"q5","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":2699,"outputTokens":1,"latencyMs":577.9544170000008},{"questionId":"q5","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"yes","isCorrect":true,"inputTokens":2536,"outputTokens":1,"latencyMs":465.2838330000013},{"questionId":"q5","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":7468,"outputTokens":1,"latencyMs":634.7964580000043},{"questionId":"q5","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":5168,"outputTokens":1,"latencyMs":668.1913339999955},{"questionId":"q6","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":6531,"outputTokens":2,"latencyMs":643.9961659999972},{"questionId":"q6","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":4122,"outputTokens":2,"latencyMs":464.65929199999664},{"questionId":"q6","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":2701,"outputTokens":2,"latencyMs":466.872292},{"questionId":"q6","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":2538,"outputTokens":2,"latencyMs":491.4008329999997},{"questionId":"q6","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":7470,"outputTokens":2,"latencyMs":607.0218329999989},{"questionId":"q6","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":5170,"outputTokens":2,"latencyMs":450.4032909999951},{"questionId":"q7","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":6533,"outputTokens":1,"latencyMs":521.4899160000059},{"questionId":"q7","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":4124,"outputTokens":1,"latencyMs":594.7228749999995},{"questionId":"q7","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":2703,"outputTokens":1,"latencyMs":423.5501250000016},{"questionId":"q7","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":2540,"outputTokens":1,"latencyMs":449.349000000002},{"questionId":"q7","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":7472,"outputTokens":1,"latencyMs":562.4655000000057},{"questionId":"q7","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":5172,"outputTokens":1,"latencyMs":422.5996249999953},{"questionId":"q8","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":6532,"outputTokens":6,"latencyMs":404.39295899999706},{"questionId":"q8","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":4123,"outputTokens":6,"latencyMs":529.7585000000036},{"questionId":"q8","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":2702,"outputTokens":6,"latencyMs":498.1508329999997},{"questionId":"q8","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":2539,"outputTokens":6,"latencyMs":432.265790999998},{"questionId":"q8","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":7471,"outputTokens":6,"latencyMs":743.6265419999982},{"questionId":"q8","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":10502,"outputTokens":6,"latencyMs":817.4960830000055},{"questionId":"q9","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":6534,"outputTokens":1,"latencyMs":584.2985830000034},{"questionId":"q9","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":4125,"outputTokens":1,"latencyMs":439.3900419999991},{"questionId":"q9","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":2704,"outputTokens":1,"latencyMs":584.6250419999997},{"questionId":"q9","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":2541,"outputTokens":1,"latencyMs":581.2358340000064},{"questionId":"q9","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":7473,"outputTokens":1,"latencyMs":857.8352500000037},{"questionId":"q9","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":5173,"outputTokens":1,"latencyMs":741.203874999992},{"questionId":"q10","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":6529,"outputTokens":1,"latencyMs":601.8820419999975},{"questionId":"q10","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":4120,"outputTokens":1,"latencyMs":546.5664580000011},{"questionId":"q10","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":2699,"outputTokens":1,"latencyMs":782.0337089999957},{"questionId":"q10","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"1","isCorrect":true,"inputTokens":2536,"outputTokens":1,"latencyMs":550.6735000000044},{"questionId":"q10","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":7468,"outputTokens":1,"latencyMs":806.8262499999983},{"questionId":"q10","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":5168,"outputTokens":1,"latencyMs":495.4067090000026},{"questionId":"q11","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":13220,"outputTokens":2,"latencyMs":981.7749999999942},{"questionId":"q11","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":4121,"outputTokens":2,"latencyMs":564.8935420000053},{"questionId":"q11","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":2700,"outputTokens":2,"latencyMs":705.6945840000117},{"questionId":"q11","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":2537,"outputTokens":2,"latencyMs":444.15466699999524},{"questionId":"q11","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":7469,"outputTokens":2,"latencyMs":714.6199590000033},{"questionId":"q11","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":5169,"outputTokens":2,"latencyMs":391.2693750000035},{"questionId":"q12","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":6532,"outputTokens":1,"latencyMs":561.0343339999963},{"questionId":"q12","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":4123,"outputTokens":1,"latencyMs":384.5078749999957},{"questionId":"q12","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":2702,"outputTokens":1,"latencyMs":377.22441699999035},{"questionId":"q12","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":2539,"outputTokens":1,"latencyMs":451.06324999999197},{"questionId":"q12","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":7471,"outputTokens":1,"latencyMs":577.9445830000041},{"questionId":"q12","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":5171,"outputTokens":1,"latencyMs":498.8087499999965},{"questionId":"q13","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"10","isCorrect":false,"inputTokens":6528,"outputTokens":1,"latencyMs":631.3149169999961},{"questionId":"q13","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"12","isCorrect":false,"inputTokens":4119,"outputTokens":1,"latencyMs":493.6619999999966},{"questionId":"q13","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":2698,"outputTokens":1,"latencyMs":491.4699580000015},{"questionId":"q13","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"16","isCorrect":false,"inputTokens":2535,"outputTokens":1,"latencyMs":704.4415830000071},{"questionId":"q13","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":7467,"outputTokens":1,"latencyMs":853.990792000026},{"questionId":"q13","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"12","isCorrect":false,"inputTokens":5167,"outputTokens":1,"latencyMs":6325.531707999995},{"questionId":"q14","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":6528,"outputTokens":1,"latencyMs":825.4149170000164},{"questionId":"q14","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":4119,"outputTokens":1,"latencyMs":1838.9603329999954},{"questionId":"q14","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":2698,"outputTokens":1,"latencyMs":546.789208000002},{"questionId":"q14","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":2535,"outputTokens":1,"latencyMs":487.18799999999464},{"questionId":"q14","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":7467,"outputTokens":1,"latencyMs":719.2681250000023},{"questionId":"q14","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":5167,"outputTokens":1,"latencyMs":620.3604580000101},{"questionId":"q15","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"16","isCorrect":false,"inputTokens":6528,"outputTokens":1,"latencyMs":702.3599169999943},{"questionId":"q15","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"14","isCorrect":false,"inputTokens":4119,"outputTokens":1,"latencyMs":1350.6427919999987},{"questionId":"q15","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":2698,"outputTokens":1,"latencyMs":445.94154200001503},{"questionId":"q15","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"25","isCorrect":false,"inputTokens":2535,"outputTokens":1,"latencyMs":430.4858330000134},{"questionId":"q15","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"16","isCorrect":false,"inputTokens":7467,"outputTokens":1,"latencyMs":764.09283400001},{"questionId":"q15","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"16","isCorrect":false,"inputTokens":5167,"outputTokens":1,"latencyMs":718.1185830000031},{"questionId":"q16","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"86","actual":"95","isCorrect":false,"inputTokens":6533,"outputTokens":1,"latencyMs":640.624833000009},{"questionId":"q16","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"86","actual":"92","isCorrect":false,"inputTokens":4124,"outputTokens":1,"latencyMs":518.7864580000169},{"questionId":"q16","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"86","actual":"95","isCorrect":false,"inputTokens":2703,"outputTokens":1,"latencyMs":505.09270800001104},{"questionId":"q16","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"86","actual":"95","isCorrect":false,"inputTokens":2540,"outputTokens":1,"latencyMs":7579.916916999995},{"questionId":"q16","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"86","actual":"95","isCorrect":false,"inputTokens":7472,"outputTokens":1,"latencyMs":826.4459579999966},{"questionId":"q16","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"86","actual":"95","isCorrect":false,"inputTokens":5172,"outputTokens":1,"latencyMs":513.2143330000108},{"questionId":"q17","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"65","actual":"67","isCorrect":false,"inputTokens":6533,"outputTokens":1,"latencyMs":521.1675840000098},{"questionId":"q17","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"65","actual":"68","isCorrect":false,"inputTokens":4124,"outputTokens":1,"latencyMs":657.3941669999913},{"questionId":"q17","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"65","actual":"68","isCorrect":false,"inputTokens":2703,"outputTokens":1,"latencyMs":427.2082079999964},{"questionId":"q17","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"65","actual":"67","isCorrect":false,"inputTokens":2540,"outputTokens":1,"latencyMs":500.1402919999964},{"questionId":"q17","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"65","actual":"67","isCorrect":false,"inputTokens":7472,"outputTokens":1,"latencyMs":624.4298750000016},{"questionId":"q17","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"65","actual":"67","isCorrect":false,"inputTokens":5172,"outputTokens":1,"latencyMs":532.2810839999875},{"questionId":"q18","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"47","actual":"42","isCorrect":false,"inputTokens":6533,"outputTokens":1,"latencyMs":725.5792080000101},{"questionId":"q18","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"47","actual":"52","isCorrect":false,"inputTokens":4124,"outputTokens":1,"latencyMs":518.0593750000116},{"questionId":"q18","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"47","actual":"58","isCorrect":false,"inputTokens":2703,"outputTokens":1,"latencyMs":525.0036670000118},{"questionId":"q18","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"47","actual":"58","isCorrect":false,"inputTokens":2540,"outputTokens":1,"latencyMs":479.3065420000057},{"questionId":"q18","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"47","actual":"47","isCorrect":true,"inputTokens":7472,"outputTokens":1,"latencyMs":705.658167000016},{"questionId":"q18","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"47","actual":"45","isCorrect":false,"inputTokens":5172,"outputTokens":1,"latencyMs":866.4303749999963},{"questionId":"q19","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":6529,"outputTokens":1,"latencyMs":585.0571249999921},{"questionId":"q19","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":4120,"outputTokens":1,"latencyMs":741.8611669999955},{"questionId":"q19","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":2699,"outputTokens":1,"latencyMs":362.5301249999902},{"questionId":"q19","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":2536,"outputTokens":1,"latencyMs":432.6569170000148},{"questionId":"q19","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":7468,"outputTokens":1,"latencyMs":545.3256669999973},{"questionId":"q19","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":5168,"outputTokens":1,"latencyMs":572.6263749999925},{"questionId":"q20","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"96825","actual":"104140","isCorrect":false,"inputTokens":6530,"outputTokens":2,"latencyMs":744.2125410000153},{"questionId":"q20","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"96825","actual":"{\"averageSalary\":102382}\n\n## Explanation\n\nTo calculate the average salary across all employees, the process involves these steps:\n\n1. Extract the salary values from the employees array, yielding 100 numeric values.\n2. Sum these values: total = 10238200.\n3. Divide the total by the number of employees: 10238200 ÷ 100 = 102382.\n4. The result is the average salary, output as a compact JSON object with the key \"averageSalary\".","isCorrect":false,"inputTokens":4121,"outputTokens":100,"latencyMs":1392.9901250000112},{"questionId":"q20","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"96825","actual":"104878","isCorrect":false,"inputTokens":2700,"outputTokens":2,"latencyMs":434.17908299999544},{"questionId":"q20","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"96825","actual":"100492","isCorrect":false,"inputTokens":2537,"outputTokens":2,"latencyMs":572.8373330000031},{"questionId":"q20","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"96825","actual":"99999","isCorrect":false,"inputTokens":7469,"outputTokens":2,"latencyMs":734.3059579999826},{"questionId":"q20","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"96825","actual":"104122","isCorrect":false,"inputTokens":5169,"outputTokens":2,"latencyMs":486.9180000000051},{"questionId":"q21","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"79","actual":"76","isCorrect":false,"inputTokens":6527,"outputTokens":1,"latencyMs":650.6604170000064},{"questionId":"q21","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"79","actual":"73","isCorrect":false,"inputTokens":4118,"outputTokens":1,"latencyMs":552.6554160000233},{"questionId":"q21","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"79","actual":"72","isCorrect":false,"inputTokens":2697,"outputTokens":1,"latencyMs":411.1337080000085},{"questionId":"q21","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"79","actual":"62","isCorrect":false,"inputTokens":2534,"outputTokens":1,"latencyMs":448.09316699998453},{"questionId":"q21","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"79","actual":"77","isCorrect":false,"inputTokens":7466,"outputTokens":1,"latencyMs":559.984334000008},{"questionId":"q21","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"79","actual":"77","isCorrect":false,"inputTokens":5166,"outputTokens":1,"latencyMs":555.7300839999807},{"questionId":"q22","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"21","actual":"20","isCorrect":false,"inputTokens":6527,"outputTokens":1,"latencyMs":603.9903330000234},{"questionId":"q22","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"21","actual":"22","isCorrect":false,"inputTokens":4118,"outputTokens":1,"latencyMs":553.2302919999929},{"questionId":"q22","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"21","actual":"20","isCorrect":false,"inputTokens":2697,"outputTokens":1,"latencyMs":538.6504580000183},{"questionId":"q22","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"21","actual":"26","isCorrect":false,"inputTokens":2534,"outputTokens":1,"latencyMs":661.0164169999771},{"questionId":"q22","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"21","actual":"20","isCorrect":false,"inputTokens":7466,"outputTokens":1,"latencyMs":648.4302499999758},{"questionId":"q22","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"21","actual":"20","isCorrect":false,"inputTokens":5166,"outputTokens":1,"latencyMs":510.7701249999809},{"questionId":"q23","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":6535,"outputTokens":1,"latencyMs":678.8493340000277},{"questionId":"q23","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"14","isCorrect":false,"inputTokens":4126,"outputTokens":1,"latencyMs":530.6694580000476},{"questionId":"q23","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"19","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":557.4721249999711},{"questionId":"q23","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"20","isCorrect":false,"inputTokens":2542,"outputTokens":1,"latencyMs":415.67420799995307},{"questionId":"q23","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"18","isCorrect":false,"inputTokens":7474,"outputTokens":1,"latencyMs":697.5731660000165},{"questionId":"q23","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"11","isCorrect":false,"inputTokens":5174,"outputTokens":1,"latencyMs":693.6044589999947},{"questionId":"q24","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"12","isCorrect":false,"inputTokens":6535,"outputTokens":1,"latencyMs":762.7310419999994},{"questionId":"q24","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"12","isCorrect":false,"inputTokens":4126,"outputTokens":1,"latencyMs":543.1126659999718},{"questionId":"q24","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"16","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":471.44666600000346},{"questionId":"q24","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"7","isCorrect":false,"inputTokens":2542,"outputTokens":1,"latencyMs":460.0599579999689},{"questionId":"q24","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"18","isCorrect":false,"inputTokens":7474,"outputTokens":1,"latencyMs":558.4079590000329},{"questionId":"q24","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"18","isCorrect":false,"inputTokens":5174,"outputTokens":1,"latencyMs":525.0044999999809},{"questionId":"q25","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"11","isCorrect":false,"inputTokens":6535,"outputTokens":1,"latencyMs":656.2660419999738},{"questionId":"q25","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"12","isCorrect":false,"inputTokens":4126,"outputTokens":1,"latencyMs":477.75929100002395},{"questionId":"q25","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"12","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":491.8847079999978},{"questionId":"q25","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"15","isCorrect":false,"inputTokens":2542,"outputTokens":1,"latencyMs":492.8086250000051},{"questionId":"q25","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"21","isCorrect":false,"inputTokens":7474,"outputTokens":1,"latencyMs":499.74279200000456},{"questionId":"q25","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"20","isCorrect":false,"inputTokens":5174,"outputTokens":1,"latencyMs":349.43766699999105},{"questionId":"q26","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"5","isCorrect":false,"inputTokens":6535,"outputTokens":1,"latencyMs":724.9569999999949},{"questionId":"q26","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"7","isCorrect":false,"inputTokens":4126,"outputTokens":1,"latencyMs":587.3300000000163},{"questionId":"q26","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"8","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":540.6909169999999},{"questionId":"q26","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"9","isCorrect":false,"inputTokens":2542,"outputTokens":1,"latencyMs":630.7571250000037},{"questionId":"q26","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"8","isCorrect":false,"inputTokens":7474,"outputTokens":1,"latencyMs":892.6946670000325},{"questionId":"q26","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"7","isCorrect":false,"inputTokens":5174,"outputTokens":1,"latencyMs":529.9295830000192},{"questionId":"q27","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"11","isCorrect":false,"inputTokens":6535,"outputTokens":1,"latencyMs":884.9784160000272},{"questionId":"q27","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":4126,"outputTokens":1,"latencyMs":539.7102500000037},{"questionId":"q27","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"20","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":537.8525420000078},{"questionId":"q27","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"14","isCorrect":false,"inputTokens":2542,"outputTokens":1,"latencyMs":570.3273330000229},{"questionId":"q27","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":7474,"outputTokens":1,"latencyMs":635.3287920000148},{"questionId":"q27","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"11","isCorrect":false,"inputTokens":5174,"outputTokens":1,"latencyMs":625.2615409999853},{"questionId":"q28","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"68","isCorrect":false,"inputTokens":6534,"outputTokens":1,"latencyMs":785.9446250000037},{"questionId":"q28","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"68","isCorrect":false,"inputTokens":4125,"outputTokens":1,"latencyMs":446.27083399996627},{"questionId":"q28","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"83","isCorrect":false,"inputTokens":2704,"outputTokens":1,"latencyMs":386.73541600001045},{"questionId":"q28","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"73","isCorrect":false,"inputTokens":2541,"outputTokens":1,"latencyMs":529.6602500000154},{"questionId":"q28","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"78","isCorrect":false,"inputTokens":7473,"outputTokens":1,"latencyMs":479.33741700003156},{"questionId":"q28","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"80","isCorrect":false,"inputTokens":5173,"outputTokens":1,"latencyMs":428.09250000002794},{"questionId":"q29","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"48","actual":"58","isCorrect":false,"inputTokens":6534,"outputTokens":1,"latencyMs":678.0827500000014},{"questionId":"q29","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"48","actual":"55","isCorrect":false,"inputTokens":4125,"outputTokens":1,"latencyMs":473.91145799995866},{"questionId":"q29","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"48","actual":"67","isCorrect":false,"inputTokens":2704,"outputTokens":1,"latencyMs":477.66345900000306},{"questionId":"q29","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"48","actual":"58","isCorrect":false,"inputTokens":2541,"outputTokens":1,"latencyMs":480.2849580000038},{"questionId":"q29","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"48","actual":"58","isCorrect":false,"inputTokens":7473,"outputTokens":1,"latencyMs":625.3212920000078},{"questionId":"q29","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"48","actual":"62","isCorrect":false,"inputTokens":5173,"outputTokens":1,"latencyMs":372.15625},{"questionId":"q30","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"36","actual":"35","isCorrect":false,"inputTokens":6534,"outputTokens":1,"latencyMs":681.7562080000062},{"questionId":"q30","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"36","actual":"38","isCorrect":false,"inputTokens":4125,"outputTokens":1,"latencyMs":520.1463329999824},{"questionId":"q30","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"36","actual":"38","isCorrect":false,"inputTokens":2704,"outputTokens":1,"latencyMs":605.6678340000217},{"questionId":"q30","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"36","actual":"42","isCorrect":false,"inputTokens":2541,"outputTokens":1,"latencyMs":566.4944160000305},{"questionId":"q30","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"36","actual":"25","isCorrect":false,"inputTokens":7473,"outputTokens":1,"latencyMs":716.9994169999845},{"questionId":"q30","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"36","actual":"35","isCorrect":false,"inputTokens":5173,"outputTokens":1,"latencyMs":488.8819159999839},{"questionId":"q31","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":6535,"outputTokens":1,"latencyMs":731.7278750000405},{"questionId":"q31","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":4126,"outputTokens":1,"latencyMs":555.083666999999},{"questionId":"q31","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"25","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":1560.6482499999693},{"questionId":"q31","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"20","isCorrect":false,"inputTokens":2542,"outputTokens":1,"latencyMs":513.6154170000227},{"questionId":"q31","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":7474,"outputTokens":1,"latencyMs":837.3640000000014},{"questionId":"q31","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":5174,"outputTokens":1,"latencyMs":388.5099170000176},{"questionId":"q32","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"12","isCorrect":false,"inputTokens":6535,"outputTokens":1,"latencyMs":704.1717499999795},{"questionId":"q32","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"12","isCorrect":false,"inputTokens":4126,"outputTokens":1,"latencyMs":740.1301250000251},{"questionId":"q32","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"22","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":678.28216599999},{"questionId":"q32","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"15","isCorrect":false,"inputTokens":2542,"outputTokens":1,"latencyMs":406.4673330000369},{"questionId":"q32","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"12","isCorrect":false,"inputTokens":7474,"outputTokens":1,"latencyMs":760.0417080000043},{"questionId":"q32","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"12","isCorrect":false,"inputTokens":5174,"outputTokens":1,"latencyMs":471.80441599996993},{"questionId":"q33","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":6535,"outputTokens":1,"latencyMs":628.7932499999879},{"questionId":"q33","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":4126,"outputTokens":1,"latencyMs":338.47966599999927},{"questionId":"q33","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"25","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":349.7202920000418},{"questionId":"q33","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":2542,"outputTokens":1,"latencyMs":434.40783300000476},{"questionId":"q33","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":7474,"outputTokens":1,"latencyMs":792.124582999968},{"questionId":"q33","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"20","isCorrect":false,"inputTokens":5174,"outputTokens":1,"latencyMs":536.3979579999577},{"questionId":"q34","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"12","isCorrect":false,"inputTokens":6529,"outputTokens":1,"latencyMs":789.2198330000392},{"questionId":"q34","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"12","isCorrect":false,"inputTokens":4120,"outputTokens":1,"latencyMs":484.61395800003083},{"questionId":"q34","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"16","isCorrect":false,"inputTokens":2699,"outputTokens":1,"latencyMs":553.2415829999954},{"questionId":"q34","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"14","isCorrect":true,"inputTokens":2536,"outputTokens":1,"latencyMs":984.8130420000525},{"questionId":"q34","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"12","isCorrect":false,"inputTokens":7468,"outputTokens":1,"latencyMs":783.877790999948},{"questionId":"q34","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"14","isCorrect":true,"inputTokens":5168,"outputTokens":1,"latencyMs":548.6428749999614},{"questionId":"q35","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"20","isCorrect":false,"inputTokens":6529,"outputTokens":1,"latencyMs":579.86837500002},{"questionId":"q35","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"16","isCorrect":false,"inputTokens":4120,"outputTokens":1,"latencyMs":550.2779159999918},{"questionId":"q35","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"23","isCorrect":false,"inputTokens":2699,"outputTokens":1,"latencyMs":1089.9340000000084},{"questionId":"q35","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"15","isCorrect":false,"inputTokens":2536,"outputTokens":1,"latencyMs":590.8242079999764},{"questionId":"q35","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"25","isCorrect":false,"inputTokens":7468,"outputTokens":1,"latencyMs":731.3217500000028},{"questionId":"q35","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"25","isCorrect":false,"inputTokens":5168,"outputTokens":1,"latencyMs":576.3463750000228},{"questionId":"q36","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":11530,"outputTokens":3,"latencyMs":956.360708000022},{"questionId":"q36","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":7304,"outputTokens":3,"latencyMs":941.193499999994},{"questionId":"q36","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":7622,"outputTokens":3,"latencyMs":624.192958999949},{"questionId":"q36","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":12949,"outputTokens":3,"latencyMs":1550.606792000006},{"questionId":"q36","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":8967,"outputTokens":3,"latencyMs":1126.5071250000037},{"questionId":"q37","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":11530,"outputTokens":2,"latencyMs":706.5146250000107},{"questionId":"q37","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":7304,"outputTokens":2,"latencyMs":848.0497500000056},{"questionId":"q37","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":7622,"outputTokens":2,"latencyMs":688.9195419999887},{"questionId":"q37","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":12949,"outputTokens":2,"latencyMs":665.4055830000434},{"questionId":"q37","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":8967,"outputTokens":2,"latencyMs":726.3700830000453},{"questionId":"q38","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":11530,"outputTokens":3,"latencyMs":1015.0052920000162},{"questionId":"q38","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":7304,"outputTokens":3,"latencyMs":570.882500000007},{"questionId":"q38","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":7622,"outputTokens":3,"latencyMs":873.4677500000107},{"questionId":"q38","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":12949,"outputTokens":3,"latencyMs":895.0652910000063},{"questionId":"q38","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":8967,"outputTokens":3,"latencyMs":679.0187089999672},{"questionId":"q39","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":11530,"outputTokens":1,"latencyMs":834.1685419999994},{"questionId":"q39","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":7304,"outputTokens":1,"latencyMs":819.2685000000056},{"questionId":"q39","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":7622,"outputTokens":1,"latencyMs":743.3259170000674},{"questionId":"q39","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":12949,"outputTokens":1,"latencyMs":866.9567500000121},{"questionId":"q39","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":8967,"outputTokens":1,"latencyMs":914.6808339999989},{"questionId":"q40","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":11530,"outputTokens":3,"latencyMs":900.5022079999326},{"questionId":"q40","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":7304,"outputTokens":3,"latencyMs":683.2722500000382},{"questionId":"q40","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":7622,"outputTokens":3,"latencyMs":5871.395708999946},{"questionId":"q40","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":12949,"outputTokens":3,"latencyMs":939.6127499999711},{"questionId":"q40","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":8967,"outputTokens":3,"latencyMs":632.5313339999411},{"questionId":"q41","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":11530,"outputTokens":1,"latencyMs":766.8200000000652},{"questionId":"q41","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":7304,"outputTokens":1,"latencyMs":699.7452079999493},{"questionId":"q41","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":7622,"outputTokens":1,"latencyMs":584.8662079999922},{"questionId":"q41","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":12949,"outputTokens":1,"latencyMs":927.346416000044},{"questionId":"q41","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":8967,"outputTokens":1,"latencyMs":707.2890840000473},{"questionId":"q42","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":11530,"outputTokens":3,"latencyMs":649.6757920000236},{"questionId":"q42","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":7304,"outputTokens":3,"latencyMs":702.0949169999221},{"questionId":"q42","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":7622,"outputTokens":3,"latencyMs":1425.4147919999668},{"questionId":"q42","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":12949,"outputTokens":3,"latencyMs":962.8165000000736},{"questionId":"q42","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":8967,"outputTokens":3,"latencyMs":663.4795000000158},{"questionId":"q43","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":11530,"outputTokens":2,"latencyMs":1409.5359999999637},{"questionId":"q43","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":7304,"outputTokens":2,"latencyMs":571.7008749999804},{"questionId":"q43","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":7622,"outputTokens":2,"latencyMs":654.3067499999888},{"questionId":"q43","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":12949,"outputTokens":2,"latencyMs":744.0284999998985},{"questionId":"q43","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8967,"outputTokens":2,"latencyMs":798.8013339999598},{"questionId":"q44","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":11531,"outputTokens":5,"latencyMs":826.5850839999039},{"questionId":"q44","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":7305,"outputTokens":5,"latencyMs":768.5535420000087},{"questionId":"q44","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":7623,"outputTokens":5,"latencyMs":752.9152499999618},{"questionId":"q44","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":12950,"outputTokens":5,"latencyMs":788.1977920000209},{"questionId":"q44","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":8968,"outputTokens":5,"latencyMs":909.2735419999808},{"questionId":"q45","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":11531,"outputTokens":5,"latencyMs":768.4937919999938},{"questionId":"q45","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":7305,"outputTokens":5,"latencyMs":853.6360420000274},{"questionId":"q45","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":7623,"outputTokens":5,"latencyMs":803.939916000003},{"questionId":"q45","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":12950,"outputTokens":5,"latencyMs":990.8110419999575},{"questionId":"q45","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":8968,"outputTokens":5,"latencyMs":1062.565166000044},{"questionId":"q46","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":11531,"outputTokens":6,"latencyMs":692.8294590000296},{"questionId":"q46","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":7305,"outputTokens":6,"latencyMs":628.9641660000198},{"questionId":"q46","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":7623,"outputTokens":6,"latencyMs":645.8005000000121},{"questionId":"q46","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":12950,"outputTokens":6,"latencyMs":778.7395409999881},{"questionId":"q46","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":8968,"outputTokens":6,"latencyMs":709.9560000000056},{"questionId":"q47","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":11530,"outputTokens":1,"latencyMs":740.091791999992},{"questionId":"q47","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":7304,"outputTokens":1,"latencyMs":513.7422500000102},{"questionId":"q47","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":7622,"outputTokens":1,"latencyMs":670.5234169999603},{"questionId":"q47","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":12949,"outputTokens":1,"latencyMs":884.692541000084},{"questionId":"q47","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":8967,"outputTokens":1,"latencyMs":868.8687499999069},{"questionId":"q48","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":11531,"outputTokens":5,"latencyMs":837.4969999999739},{"questionId":"q48","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":7305,"outputTokens":5,"latencyMs":578.1517920000479},{"questionId":"q48","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":7623,"outputTokens":5,"latencyMs":758.3587909999769},{"questionId":"q48","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":12950,"outputTokens":5,"latencyMs":718.5274590000045},{"questionId":"q48","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":8968,"outputTokens":5,"latencyMs":538.1767499999842},{"questionId":"q49","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":11531,"outputTokens":8,"latencyMs":858.6002909999806},{"questionId":"q49","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":7305,"outputTokens":8,"latencyMs":907.2394170000916},{"questionId":"q49","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":7623,"outputTokens":8,"latencyMs":778.3918749999721},{"questionId":"q49","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":12950,"outputTokens":8,"latencyMs":985.8520830000052},{"questionId":"q49","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":8968,"outputTokens":8,"latencyMs":777.3625829999801},{"questionId":"q50","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":11531,"outputTokens":6,"latencyMs":966.6647499999963},{"questionId":"q50","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":7305,"outputTokens":6,"latencyMs":788.547542000073},{"questionId":"q50","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":7623,"outputTokens":6,"latencyMs":758.7405839998974},{"questionId":"q50","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":12950,"outputTokens":6,"latencyMs":809.8147920001065},{"questionId":"q50","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":8968,"outputTokens":6,"latencyMs":1031.0702499999898},{"questionId":"q51","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"8","isCorrect":false,"inputTokens":11530,"outputTokens":1,"latencyMs":948.9400000000605},{"questionId":"q51","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"8","isCorrect":false,"inputTokens":7304,"outputTokens":1,"latencyMs":535.2138330000453},{"questionId":"q51","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":7622,"outputTokens":1,"latencyMs":714.1589580000145},{"questionId":"q51","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"8","isCorrect":false,"inputTokens":12949,"outputTokens":1,"latencyMs":783.6580829999875},{"questionId":"q51","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"8","isCorrect":false,"inputTokens":8967,"outputTokens":1,"latencyMs":661.3643750000047},{"questionId":"q52","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":11527,"outputTokens":1,"latencyMs":643.7397079999791},{"questionId":"q52","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":7301,"outputTokens":1,"latencyMs":536.3641669999342},{"questionId":"q52","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":7619,"outputTokens":1,"latencyMs":535.9290829999372},{"questionId":"q52","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":12946,"outputTokens":1,"latencyMs":826.2162920000264},{"questionId":"q52","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":8964,"outputTokens":1,"latencyMs":496.73766600003},{"questionId":"q53","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":11527,"outputTokens":1,"latencyMs":462.14420800004154},{"questionId":"q53","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":7301,"outputTokens":1,"latencyMs":439.53295799996704},{"questionId":"q53","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":7619,"outputTokens":1,"latencyMs":1286.7901250000577},{"questionId":"q53","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":12946,"outputTokens":1,"latencyMs":1014.7207919999491},{"questionId":"q53","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":8964,"outputTokens":1,"latencyMs":1001.4921660000691},{"questionId":"q54","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":11528,"outputTokens":1,"latencyMs":773.265625},{"questionId":"q54","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":7302,"outputTokens":1,"latencyMs":789.5051669999957},{"questionId":"q54","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":7620,"outputTokens":1,"latencyMs":880.1744160000235},{"questionId":"q54","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":12947,"outputTokens":1,"latencyMs":962.6629160000011},{"questionId":"q54","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":8965,"outputTokens":1,"latencyMs":559.9401249999646},{"questionId":"q55","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"38069.93","actual":"352938.35","isCorrect":false,"inputTokens":11528,"outputTokens":4,"latencyMs":813.3962500001071},{"questionId":"q55","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"38069.93","actual":"35235.79","isCorrect":false,"inputTokens":7302,"outputTokens":4,"latencyMs":6438.277499999967},{"questionId":"q55","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"38069.93","actual":"35239.79","isCorrect":false,"inputTokens":7620,"outputTokens":4,"latencyMs":792.4362909999909},{"questionId":"q55","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"38069.93","actual":"43341.79","isCorrect":false,"inputTokens":12947,"outputTokens":4,"latencyMs":1113.342209000024},{"questionId":"q55","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"38069.93","actual":"35208.89","isCorrect":false,"inputTokens":8965,"outputTokens":4,"latencyMs":660.7110419999808},{"questionId":"q56","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"761.40","actual":"806.07","isCorrect":false,"inputTokens":11526,"outputTokens":3,"latencyMs":711.9117499999702},{"questionId":"q56","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"761.40","actual":"806.35","isCorrect":false,"inputTokens":7300,"outputTokens":3,"latencyMs":729.7875840000343},{"questionId":"q56","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"761.40","actual":"554.39","isCorrect":false,"inputTokens":7618,"outputTokens":3,"latencyMs":682.1083750000689},{"questionId":"q56","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"761.40","actual":"806.35","isCorrect":false,"inputTokens":12945,"outputTokens":3,"latencyMs":876.8197500000242},{"questionId":"q56","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"761.40","actual":"806.35","isCorrect":false,"inputTokens":8963,"outputTokens":3,"latencyMs":590.2461250000633},{"questionId":"q57","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":11527,"outputTokens":1,"latencyMs":761.8431669999845},{"questionId":"q57","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":7301,"outputTokens":1,"latencyMs":894.544041999965},{"questionId":"q57","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":7619,"outputTokens":1,"latencyMs":717.7675840000156},{"questionId":"q57","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":12946,"outputTokens":1,"latencyMs":893.6164590000408},{"questionId":"q57","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":8964,"outputTokens":1,"latencyMs":623.1933339999523},{"questionId":"q58","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":11526,"outputTokens":4,"latencyMs":616.3692909999518},{"questionId":"q58","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":7300,"outputTokens":4,"latencyMs":592.0623749999795},{"questionId":"q58","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":7618,"outputTokens":4,"latencyMs":571.844458000036},{"questionId":"q58","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":12945,"outputTokens":4,"latencyMs":825.7324580000713},{"questionId":"q58","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":8963,"outputTokens":4,"latencyMs":4773.520040999982},{"questionId":"q59","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"46","actual":"38","isCorrect":false,"inputTokens":11530,"outputTokens":1,"latencyMs":875.5253329999978},{"questionId":"q59","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"46","actual":"35","isCorrect":false,"inputTokens":7304,"outputTokens":1,"latencyMs":880.0571250000503},{"questionId":"q59","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"46","actual":"42","isCorrect":false,"inputTokens":7622,"outputTokens":1,"latencyMs":969.6056660000468},{"questionId":"q59","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"46","actual":"38","isCorrect":false,"inputTokens":12949,"outputTokens":1,"latencyMs":897.704458000022},{"questionId":"q59","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"46","actual":"35","isCorrect":false,"inputTokens":8967,"outputTokens":1,"latencyMs":848.4107910000021},{"questionId":"q60","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"38","actual":"32","isCorrect":false,"inputTokens":11530,"outputTokens":1,"latencyMs":787.8418749999255},{"questionId":"q60","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"38","actual":"27","isCorrect":false,"inputTokens":7304,"outputTokens":1,"latencyMs":674.10699999996},{"questionId":"q60","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"38","actual":"32","isCorrect":false,"inputTokens":7622,"outputTokens":1,"latencyMs":740.3179999999702},{"questionId":"q60","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"38","actual":"31","isCorrect":false,"inputTokens":12949,"outputTokens":1,"latencyMs":943.3058749999618},{"questionId":"q60","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"38","actual":"29","isCorrect":false,"inputTokens":8967,"outputTokens":1,"latencyMs":746.451834000065},{"questionId":"q61","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"29","actual":"25","isCorrect":false,"inputTokens":11530,"outputTokens":1,"latencyMs":862.3196669999743},{"questionId":"q61","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"29","actual":"23","isCorrect":false,"inputTokens":7304,"outputTokens":1,"latencyMs":493.14495799993165},{"questionId":"q61","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"29","actual":"27","isCorrect":false,"inputTokens":7622,"outputTokens":1,"latencyMs":635.9011669999454},{"questionId":"q61","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"29","actual":"25","isCorrect":false,"inputTokens":12949,"outputTokens":1,"latencyMs":760.0674590000417},{"questionId":"q61","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"29","actual":"26","isCorrect":false,"inputTokens":8967,"outputTokens":1,"latencyMs":691.1831660000607},{"questionId":"q62","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"7","isCorrect":false,"inputTokens":11534,"outputTokens":1,"latencyMs":895.8139170000795},{"questionId":"q62","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":7308,"outputTokens":1,"latencyMs":470.424500000081},{"questionId":"q62","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"9","isCorrect":false,"inputTokens":7626,"outputTokens":1,"latencyMs":516.0680840000277},{"questionId":"q62","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":12953,"outputTokens":1,"latencyMs":809.124499999918},{"questionId":"q62","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":8971,"outputTokens":1,"latencyMs":890.047499999986},{"questionId":"q63","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"13","isCorrect":false,"inputTokens":11534,"outputTokens":1,"latencyMs":951.530999999959},{"questionId":"q63","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"17","isCorrect":false,"inputTokens":7308,"outputTokens":1,"latencyMs":913.9237920000451},{"questionId":"q63","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"17","isCorrect":false,"inputTokens":7626,"outputTokens":1,"latencyMs":634.6960420000833},{"questionId":"q63","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"12","isCorrect":false,"inputTokens":12953,"outputTokens":1,"latencyMs":948.3523329999298},{"questionId":"q63","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"18","isCorrect":false,"inputTokens":8971,"outputTokens":1,"latencyMs":811.1162079999922},{"questionId":"q64","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"7","isCorrect":false,"inputTokens":11535,"outputTokens":1,"latencyMs":912.2439580000937},{"questionId":"q64","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"7","isCorrect":false,"inputTokens":7309,"outputTokens":1,"latencyMs":718.7288330000592},{"questionId":"q64","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"7","isCorrect":false,"inputTokens":7627,"outputTokens":1,"latencyMs":883.9092080000555},{"questionId":"q64","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"5","isCorrect":false,"inputTokens":12954,"outputTokens":1,"latencyMs":661.7925409999443},{"questionId":"q64","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"5","isCorrect":false,"inputTokens":8972,"outputTokens":1,"latencyMs":604.7804580000229},{"questionId":"q65","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"7","isCorrect":false,"inputTokens":11535,"outputTokens":1,"latencyMs":737.0242090000538},{"questionId":"q65","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"7","isCorrect":false,"inputTokens":7309,"outputTokens":1,"latencyMs":782.2560000000522},{"questionId":"q65","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"7","isCorrect":false,"inputTokens":7627,"outputTokens":1,"latencyMs":749.6715839999961},{"questionId":"q65","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"7","isCorrect":false,"inputTokens":12954,"outputTokens":1,"latencyMs":769.360250000027},{"questionId":"q65","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"7","isCorrect":false,"inputTokens":8972,"outputTokens":1,"latencyMs":835.999291999964},{"questionId":"q66","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"2","isCorrect":false,"inputTokens":11534,"outputTokens":1,"latencyMs":682.1851670000469},{"questionId":"q66","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"2","isCorrect":false,"inputTokens":7308,"outputTokens":1,"latencyMs":568.4164159999928},{"questionId":"q66","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"2","isCorrect":false,"inputTokens":7626,"outputTokens":1,"latencyMs":4085.6644169999054},{"questionId":"q66","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"5","isCorrect":false,"inputTokens":12953,"outputTokens":1,"latencyMs":526.9112080000341},{"questionId":"q66","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"0","isCorrect":false,"inputTokens":8971,"outputTokens":1,"latencyMs":803.7134579999838},{"questionId":"q67","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"12","isCorrect":false,"inputTokens":11534,"outputTokens":1,"latencyMs":481.30329099996015},{"questionId":"q67","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"12","isCorrect":false,"inputTokens":7308,"outputTokens":1,"latencyMs":883.0185419999762},{"questionId":"q67","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"12","isCorrect":false,"inputTokens":7626,"outputTokens":1,"latencyMs":555.7566250000382},{"questionId":"q67","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"12","isCorrect":false,"inputTokens":12953,"outputTokens":1,"latencyMs":832.8929159999825},{"questionId":"q67","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"11","isCorrect":false,"inputTokens":8971,"outputTokens":1,"latencyMs":691.280792000005},{"questionId":"q68","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"3","isCorrect":false,"inputTokens":11535,"outputTokens":1,"latencyMs":876.8689170000143},{"questionId":"q68","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"2","isCorrect":false,"inputTokens":7309,"outputTokens":1,"latencyMs":528.7574579999782},{"questionId":"q68","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":7627,"outputTokens":1,"latencyMs":535.7825419999426},{"questionId":"q68","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"8","isCorrect":false,"inputTokens":12954,"outputTokens":1,"latencyMs":980.4331659999443},{"questionId":"q68","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"2","isCorrect":false,"inputTokens":8972,"outputTokens":1,"latencyMs":620.7542089999188},{"questionId":"q69","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"18","isCorrect":false,"inputTokens":11536,"outputTokens":1,"latencyMs":753.1936249999562},{"questionId":"q69","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"12","isCorrect":false,"inputTokens":7310,"outputTokens":1,"latencyMs":873.3733329999959},{"questionId":"q69","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"25","isCorrect":false,"inputTokens":7628,"outputTokens":1,"latencyMs":700.7058749999851},{"questionId":"q69","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"25","isCorrect":false,"inputTokens":12955,"outputTokens":1,"latencyMs":931.9119159999536},{"questionId":"q69","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"12","isCorrect":false,"inputTokens":8973,"outputTokens":1,"latencyMs":842.4380420000525},{"questionId":"q70","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"22","actual":"12","isCorrect":false,"inputTokens":11536,"outputTokens":1,"latencyMs":867.3247500000289},{"questionId":"q70","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"22","actual":"12","isCorrect":false,"inputTokens":7310,"outputTokens":1,"latencyMs":684.500500000082},{"questionId":"q70","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"22","actual":"20","isCorrect":false,"inputTokens":7628,"outputTokens":1,"latencyMs":680.5775829999475},{"questionId":"q70","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"22","actual":"20","isCorrect":false,"inputTokens":12955,"outputTokens":1,"latencyMs":816.7335409999359},{"questionId":"q70","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"22","actual":"12","isCorrect":false,"inputTokens":8973,"outputTokens":1,"latencyMs":864.5872499999823},{"questionId":"q71","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":3877,"outputTokens":2,"latencyMs":667.1784170001047},{"questionId":"q71","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":2548,"outputTokens":2,"latencyMs":636.6927500000456},{"questionId":"q71","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":1807,"outputTokens":2,"latencyMs":946.3649999999907},{"questionId":"q71","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":1666,"outputTokens":2,"latencyMs":627.0107919999864},{"questionId":"q71","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":4523,"outputTokens":2,"latencyMs":595.8528330000117},{"questionId":"q71","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":3207,"outputTokens":2,"latencyMs":721.3300420000451},{"questionId":"q72","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":3877,"outputTokens":4,"latencyMs":583.9176250000019},{"questionId":"q72","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":2548,"outputTokens":4,"latencyMs":757.8279590000166},{"questionId":"q72","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":1807,"outputTokens":4,"latencyMs":643.0962499999441},{"questionId":"q72","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":1666,"outputTokens":4,"latencyMs":536.1682499999879},{"questionId":"q72","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":4523,"outputTokens":4,"latencyMs":581.5594580000034},{"questionId":"q72","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":3207,"outputTokens":4,"latencyMs":477.8069579999428},{"questionId":"q73","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":3878,"outputTokens":3,"latencyMs":469.72750000003725},{"questionId":"q73","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":2549,"outputTokens":3,"latencyMs":392.8477919999277},{"questionId":"q73","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":1808,"outputTokens":3,"latencyMs":504.17579100001603},{"questionId":"q73","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":1667,"outputTokens":3,"latencyMs":646.0211249999702},{"questionId":"q73","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":4524,"outputTokens":3,"latencyMs":598.495833999943},{"questionId":"q73","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":3208,"outputTokens":3,"latencyMs":626.5938340000575},{"questionId":"q74","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"37","isCorrect":true,"inputTokens":3878,"outputTokens":1,"latencyMs":866.4132919999538},{"questionId":"q74","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"37","isCorrect":true,"inputTokens":2549,"outputTokens":1,"latencyMs":596.7857079999521},{"questionId":"q74","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"37","isCorrect":true,"inputTokens":1808,"outputTokens":1,"latencyMs":639.6120830000145},{"questionId":"q74","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"37","isCorrect":true,"inputTokens":1667,"outputTokens":1,"latencyMs":478.824583999929},{"questionId":"q74","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"37","isCorrect":true,"inputTokens":4524,"outputTokens":1,"latencyMs":513.1029999998864},{"questionId":"q74","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"37","isCorrect":true,"inputTokens":3208,"outputTokens":1,"latencyMs":624.2674169999082},{"questionId":"q75","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":3877,"outputTokens":2,"latencyMs":561.5092919999734},{"questionId":"q75","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":2548,"outputTokens":2,"latencyMs":553.9505000000354},{"questionId":"q75","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":1807,"outputTokens":2,"latencyMs":480.482416999992},{"questionId":"q75","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":1666,"outputTokens":2,"latencyMs":445.3764170000795},{"questionId":"q75","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":4523,"outputTokens":2,"latencyMs":516.5285830000648},{"questionId":"q75","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":3207,"outputTokens":2,"latencyMs":520.0939159999834},{"questionId":"q76","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":3877,"outputTokens":4,"latencyMs":438.40795799996704},{"questionId":"q76","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":2548,"outputTokens":4,"latencyMs":549.6162079999922},{"questionId":"q76","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":1807,"outputTokens":4,"latencyMs":475.16245800000615},{"questionId":"q76","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":1666,"outputTokens":4,"latencyMs":523.6847919999855},{"questionId":"q76","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":4523,"outputTokens":4,"latencyMs":529.6843329999829},{"questionId":"q76","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":3207,"outputTokens":4,"latencyMs":586.4784579999978},{"questionId":"q77","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":3878,"outputTokens":3,"latencyMs":595.1180000000168},{"questionId":"q77","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":2549,"outputTokens":3,"latencyMs":474.8182079999242},{"questionId":"q77","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":1808,"outputTokens":3,"latencyMs":475.0827079999726},{"questionId":"q77","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":1667,"outputTokens":3,"latencyMs":380.63508300005924},{"questionId":"q77","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":4524,"outputTokens":3,"latencyMs":492.3605830000015},{"questionId":"q77","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":3208,"outputTokens":3,"latencyMs":474.56541699997615},{"questionId":"q78","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"43","isCorrect":true,"inputTokens":3878,"outputTokens":1,"latencyMs":637.0482920000795},{"questionId":"q78","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"43","isCorrect":true,"inputTokens":2549,"outputTokens":1,"latencyMs":623.8157079999801},{"questionId":"q78","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"43","isCorrect":true,"inputTokens":1808,"outputTokens":1,"latencyMs":485.66600000008475},{"questionId":"q78","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"43","isCorrect":true,"inputTokens":1667,"outputTokens":1,"latencyMs":493.966083999956},{"questionId":"q78","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"43","isCorrect":true,"inputTokens":4524,"outputTokens":1,"latencyMs":494.7225000000326},{"questionId":"q78","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"43","isCorrect":true,"inputTokens":3208,"outputTokens":1,"latencyMs":447.7400830000406},{"questionId":"q79","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":3877,"outputTokens":2,"latencyMs":555.2192090000026},{"questionId":"q79","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":2548,"outputTokens":2,"latencyMs":498.54058400006033},{"questionId":"q79","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":1807,"outputTokens":2,"latencyMs":4969.214000000036},{"questionId":"q79","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":1666,"outputTokens":2,"latencyMs":442.37591599998996},{"questionId":"q79","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":4523,"outputTokens":2,"latencyMs":481.6617499999702},{"questionId":"q79","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":3207,"outputTokens":2,"latencyMs":4933.525125000044},{"questionId":"q80","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"60","isCorrect":true,"inputTokens":3874,"outputTokens":1,"latencyMs":367.5052920000162},{"questionId":"q80","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"60","isCorrect":true,"inputTokens":2545,"outputTokens":1,"latencyMs":476.88395799999125},{"questionId":"q80","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"60","isCorrect":true,"inputTokens":1804,"outputTokens":1,"latencyMs":453.7974170000525},{"questionId":"q80","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"60","isCorrect":true,"inputTokens":1663,"outputTokens":1,"latencyMs":394.2155420000199},{"questionId":"q80","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"68","isCorrect":false,"inputTokens":4520,"outputTokens":1,"latencyMs":411.7383329999866},{"questionId":"q80","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"60","isCorrect":true,"inputTokens":3204,"outputTokens":1,"latencyMs":720.8658329999307},{"questionId":"q81","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"338580","actual":"436942","isCorrect":false,"inputTokens":3875,"outputTokens":2,"latencyMs":418.73649999999907},{"questionId":"q81","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"338580","actual":"{\"totalViews\":431790}","isCorrect":false,"inputTokens":2546,"outputTokens":7,"latencyMs":621.2502499999246},{"questionId":"q81","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"338580","actual":"433895","isCorrect":false,"inputTokens":1805,"outputTokens":2,"latencyMs":376.69983399997},{"questionId":"q81","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"338580","actual":"848687","isCorrect":false,"inputTokens":1664,"outputTokens":2,"latencyMs":526.1514169999864},{"questionId":"q81","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"338580","actual":"417476","isCorrect":false,"inputTokens":4521,"outputTokens":2,"latencyMs":715.0293339999625},{"questionId":"q81","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"338580","actual":"352879","isCorrect":false,"inputTokens":3205,"outputTokens":2,"latencyMs":696.9394590000156},{"questionId":"q82","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1666","actual":"1321","isCorrect":false,"inputTokens":3875,"outputTokens":2,"latencyMs":506.02349999989383},{"questionId":"q82","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1666","actual":"1672","isCorrect":false,"inputTokens":2546,"outputTokens":2,"latencyMs":529.5858749998733},{"questionId":"q82","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1666","actual":"1582","isCorrect":false,"inputTokens":1805,"outputTokens":2,"latencyMs":542.3188340000343},{"questionId":"q82","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"1666","actual":"1762","isCorrect":false,"inputTokens":1664,"outputTokens":2,"latencyMs":473.39445900009014},{"questionId":"q82","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1666","actual":"1621","isCorrect":false,"inputTokens":4521,"outputTokens":2,"latencyMs":514.995541999815},{"questionId":"q82","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1666","actual":"1541","isCorrect":false,"inputTokens":3205,"outputTokens":2,"latencyMs":637.6235839999281},{"questionId":"q83","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"278050.98","actual":"297171.51","isCorrect":false,"inputTokens":3873,"outputTokens":4,"latencyMs":601.5097920000553},{"questionId":"q83","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"278050.98","actual":"```json\n286329.96\n```","isCorrect":false,"inputTokens":2544,"outputTokens":9,"latencyMs":596.0253330001142},{"questionId":"q83","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"278050.98","actual":"304457.35","isCorrect":false,"inputTokens":1803,"outputTokens":4,"latencyMs":588.1962499998044},{"questionId":"q83","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"278050.98","actual":"319248.47","isCorrect":false,"inputTokens":1662,"outputTokens":4,"latencyMs":723.7299580001272},{"questionId":"q83","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"278050.98","actual":"336090.49","isCorrect":false,"inputTokens":4519,"outputTokens":4,"latencyMs":571.2627920000814},{"questionId":"q83","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"278050.98","actual":"291290.95","isCorrect":false,"inputTokens":3203,"outputTokens":4,"latencyMs":400.36724999989383},{"questionId":"q84","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"0.49","actual":"0.49","isCorrect":true,"inputTokens":3871,"outputTokens":3,"latencyMs":412.0887499998789},{"questionId":"q84","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"0.49","actual":"0.49","isCorrect":true,"inputTokens":2542,"outputTokens":3,"latencyMs":481.8872079998255},{"questionId":"q84","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"0.49","actual":"0.49","isCorrect":true,"inputTokens":1801,"outputTokens":3,"latencyMs":509.14441700000316},{"questionId":"q84","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"0.49","actual":"0.49","isCorrect":true,"inputTokens":1660,"outputTokens":3,"latencyMs":383.484625000041},{"questionId":"q84","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"0.49","actual":"0.49","isCorrect":true,"inputTokens":4517,"outputTokens":3,"latencyMs":943.1645829998888},{"questionId":"q84","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"0.49","actual":"0.49","isCorrect":true,"inputTokens":3201,"outputTokens":3,"latencyMs":567.7023330000229},{"questionId":"q85","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":3875,"outputTokens":1,"latencyMs":570.1790839999449},{"questionId":"q85","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"28","isCorrect":false,"inputTokens":2546,"outputTokens":1,"latencyMs":611.8090830000583},{"questionId":"q85","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":1805,"outputTokens":1,"latencyMs":329.9869160000235},{"questionId":"q85","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"28","isCorrect":false,"inputTokens":1664,"outputTokens":1,"latencyMs":803.358959000092},{"questionId":"q85","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"28","isCorrect":false,"inputTokens":4521,"outputTokens":1,"latencyMs":480.0933330000844},{"questionId":"q85","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":3205,"outputTokens":1,"latencyMs":513.3110409998335},{"questionId":"q86","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"42","isCorrect":false,"inputTokens":3874,"outputTokens":1,"latencyMs":483.51479099993594},{"questionId":"q86","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"42","isCorrect":false,"inputTokens":2545,"outputTokens":1,"latencyMs":791.2523749999236},{"questionId":"q86","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"48","isCorrect":false,"inputTokens":1804,"outputTokens":1,"latencyMs":543.8400000000838},{"questionId":"q86","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"51","isCorrect":false,"inputTokens":1663,"outputTokens":1,"latencyMs":645.0875000001397},{"questionId":"q86","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"62","isCorrect":false,"inputTokens":4520,"outputTokens":1,"latencyMs":488.80837500002235},{"questionId":"q86","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"25","isCorrect":false,"inputTokens":3204,"outputTokens":1,"latencyMs":506.6174999999348},{"questionId":"q87","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"12","isCorrect":false,"inputTokens":3881,"outputTokens":1,"latencyMs":512.5699579999782},{"questionId":"q87","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"22","isCorrect":false,"inputTokens":2552,"outputTokens":1,"latencyMs":771.8474590000696},{"questionId":"q87","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"18","isCorrect":false,"inputTokens":1811,"outputTokens":1,"latencyMs":571.1704170000739},{"questionId":"q87","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"20","isCorrect":false,"inputTokens":3500,"outputTokens":1,"latencyMs":779.3597500000615},{"questionId":"q87","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"25","isCorrect":false,"inputTokens":4527,"outputTokens":1,"latencyMs":498.4696670002304},{"questionId":"q87","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"22","isCorrect":false,"inputTokens":3211,"outputTokens":1,"latencyMs":466.0879590001423},{"questionId":"q88","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"7","isCorrect":false,"inputTokens":3881,"outputTokens":1,"latencyMs":440.93083299999125},{"questionId":"q88","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"12","isCorrect":false,"inputTokens":2552,"outputTokens":1,"latencyMs":419.83762499992736},{"questionId":"q88","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"7","isCorrect":false,"inputTokens":1811,"outputTokens":1,"latencyMs":374.7700419998728},{"questionId":"q88","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"8","isCorrect":false,"inputTokens":1670,"outputTokens":1,"latencyMs":523.8123749999795},{"questionId":"q88","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"12","isCorrect":false,"inputTokens":4527,"outputTokens":1,"latencyMs":384.8144170001615},{"questionId":"q88","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"8","isCorrect":false,"inputTokens":3211,"outputTokens":1,"latencyMs":449.75270899990574},{"questionId":"q89","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"12","isCorrect":false,"inputTokens":3881,"outputTokens":1,"latencyMs":474.3642500001006},{"questionId":"q89","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"12","isCorrect":false,"inputTokens":2552,"outputTokens":1,"latencyMs":438.2861250001006},{"questionId":"q89","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"18","isCorrect":false,"inputTokens":1811,"outputTokens":1,"latencyMs":461.3356250000652},{"questionId":"q89","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"11","isCorrect":false,"inputTokens":1670,"outputTokens":1,"latencyMs":418.0638330001384},{"questionId":"q89","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"12","isCorrect":false,"inputTokens":4527,"outputTokens":1,"latencyMs":355.1533749999944},{"questionId":"q89","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"11","isCorrect":false,"inputTokens":3211,"outputTokens":1,"latencyMs":367.96429100004025},{"questionId":"q90","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"12","isCorrect":false,"inputTokens":3881,"outputTokens":1,"latencyMs":582.3625829999801},{"questionId":"q90","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"12","isCorrect":false,"inputTokens":2552,"outputTokens":1,"latencyMs":674.5549170000013},{"questionId":"q90","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"22","isCorrect":false,"inputTokens":1811,"outputTokens":1,"latencyMs":474.828582999995},{"questionId":"q90","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"15","isCorrect":false,"inputTokens":1670,"outputTokens":1,"latencyMs":526.7267080000602},{"questionId":"q90","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"12","isCorrect":false,"inputTokens":4527,"outputTokens":1,"latencyMs":578.2530419998802},{"questionId":"q90","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"12","isCorrect":false,"inputTokens":3211,"outputTokens":1,"latencyMs":523.2485409998335},{"questionId":"q91","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"11","isCorrect":false,"inputTokens":3881,"outputTokens":1,"latencyMs":490.45974999992177},{"questionId":"q91","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"12","isCorrect":false,"inputTokens":2552,"outputTokens":1,"latencyMs":503.06287500006147},{"questionId":"q91","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"12","isCorrect":false,"inputTokens":1811,"outputTokens":1,"latencyMs":513.8332919999957},{"questionId":"q91","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"11","isCorrect":false,"inputTokens":1670,"outputTokens":1,"latencyMs":505.761208999902},{"questionId":"q91","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"12","isCorrect":false,"inputTokens":4527,"outputTokens":1,"latencyMs":470.404499999946},{"questionId":"q91","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"11","isCorrect":false,"inputTokens":3211,"outputTokens":1,"latencyMs":419.1475840001367},{"questionId":"q92","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"23","isCorrect":false,"inputTokens":3880,"outputTokens":1,"latencyMs":537.3507499999832},{"questionId":"q92","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"23","isCorrect":false,"inputTokens":2551,"outputTokens":1,"latencyMs":419.5671249998268},{"questionId":"q92","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"16","isCorrect":false,"inputTokens":1810,"outputTokens":1,"latencyMs":530.794624999864},{"questionId":"q92","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"23","isCorrect":false,"inputTokens":1669,"outputTokens":1,"latencyMs":650.0870000000577},{"questionId":"q92","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"25","isCorrect":false,"inputTokens":4526,"outputTokens":1,"latencyMs":588.5105830000248},{"questionId":"q92","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"19","isCorrect":false,"inputTokens":3210,"outputTokens":1,"latencyMs":588.5343329999596},{"questionId":"q93","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"9","actual":"9","isCorrect":true,"inputTokens":3880,"outputTokens":1,"latencyMs":448.8965419998858},{"questionId":"q93","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"9","actual":"12","isCorrect":false,"inputTokens":2551,"outputTokens":1,"latencyMs":427.7964580000844},{"questionId":"q93","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"9","actual":"12","isCorrect":false,"inputTokens":1810,"outputTokens":1,"latencyMs":449.91379200015217},{"questionId":"q93","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"9","actual":"22","isCorrect":false,"inputTokens":1669,"outputTokens":1,"latencyMs":486.40762499999255},{"questionId":"q93","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"9","actual":"12","isCorrect":false,"inputTokens":4526,"outputTokens":1,"latencyMs":385.6574169998057},{"questionId":"q93","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"9","actual":"8","isCorrect":false,"inputTokens":3210,"outputTokens":1,"latencyMs":446.4321250000503},{"questionId":"q94","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"31","actual":"25","isCorrect":false,"inputTokens":3883,"outputTokens":1,"latencyMs":601.0140420000535},{"questionId":"q94","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"31","actual":"28","isCorrect":false,"inputTokens":2554,"outputTokens":1,"latencyMs":626.378292000154},{"questionId":"q94","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"31","actual":"28","isCorrect":false,"inputTokens":1813,"outputTokens":1,"latencyMs":593.7920419999864},{"questionId":"q94","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"31","actual":"35","isCorrect":false,"inputTokens":1672,"outputTokens":1,"latencyMs":604.6700839998666},{"questionId":"q94","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"31","actual":"31","isCorrect":true,"inputTokens":4529,"outputTokens":1,"latencyMs":648.5342079999391},{"questionId":"q94","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"31","actual":"26","isCorrect":false,"inputTokens":3213,"outputTokens":1,"latencyMs":462.07916700001806},{"questionId":"q95","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"23","isCorrect":false,"inputTokens":3883,"outputTokens":1,"latencyMs":586.6430000001565},{"questionId":"q95","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"26","isCorrect":false,"inputTokens":2554,"outputTokens":1,"latencyMs":779.3139580001589},{"questionId":"q95","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"26","isCorrect":false,"inputTokens":1813,"outputTokens":1,"latencyMs":712.7615409998689},{"questionId":"q95","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"25","isCorrect":false,"inputTokens":1672,"outputTokens":1,"latencyMs":597.8637919998728},{"questionId":"q95","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"35","isCorrect":false,"inputTokens":4529,"outputTokens":1,"latencyMs":856.8264589998871},{"questionId":"q95","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"25","isCorrect":false,"inputTokens":3213,"outputTokens":1,"latencyMs":507.8472919999622},{"questionId":"q96","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":15289,"outputTokens":2,"latencyMs":1028.6837090000045},{"questionId":"q96","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":11593,"outputTokens":2,"latencyMs":839.0477919999976},{"questionId":"q96","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":8916,"outputTokens":2,"latencyMs":1018.7267920000013},{"questionId":"q96","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":8666,"outputTokens":2,"latencyMs":864.162416000152},{"questionId":"q96","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":17201,"outputTokens":2,"latencyMs":1127.1430420000106},{"questionId":"q96","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":13287,"outputTokens":2,"latencyMs":875.9483329998329},{"questionId":"q97","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":15290,"outputTokens":2,"latencyMs":884.7861669999547},{"questionId":"q97","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":11594,"outputTokens":2,"latencyMs":863.4405000000261},{"questionId":"q97","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":8917,"outputTokens":2,"latencyMs":713.9379590000026},{"questionId":"q97","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":8667,"outputTokens":2,"latencyMs":954.2697080001235},{"questionId":"q97","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":17202,"outputTokens":2,"latencyMs":1526.1647910000756},{"questionId":"q97","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":13288,"outputTokens":2,"latencyMs":651.6998330000788},{"questionId":"q98","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":15286,"outputTokens":2,"latencyMs":1002.4406659998931},{"questionId":"q98","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":11590,"outputTokens":2,"latencyMs":778.0931249998976},{"questionId":"q98","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":8913,"outputTokens":2,"latencyMs":698.0237500001676},{"questionId":"q98","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":8663,"outputTokens":2,"latencyMs":2732.6750419999007},{"questionId":"q98","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":17198,"outputTokens":2,"latencyMs":2715.6885829998646},{"questionId":"q98","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":13284,"outputTokens":2,"latencyMs":3166.9347080001608},{"questionId":"q99","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"master","actual":"master","isCorrect":true,"inputTokens":15291,"outputTokens":1,"latencyMs":969.6473749999423},{"questionId":"q99","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"master","actual":"master","isCorrect":true,"inputTokens":11595,"outputTokens":1,"latencyMs":669.4215410000179},{"questionId":"q99","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"master","actual":"master","isCorrect":true,"inputTokens":8918,"outputTokens":1,"latencyMs":904.8111250000075},{"questionId":"q99","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"master","actual":"master","isCorrect":true,"inputTokens":8668,"outputTokens":1,"latencyMs":907.8212919998914},{"questionId":"q99","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"master","actual":"master","isCorrect":true,"inputTokens":17203,"outputTokens":1,"latencyMs":1169.048333000159},{"questionId":"q99","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"master","actual":"master","isCorrect":true,"inputTokens":13289,"outputTokens":1,"latencyMs":1029.4778330000117},{"questionId":"q100","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":15286,"outputTokens":2,"latencyMs":965.6498750001192},{"questionId":"q100","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":11590,"outputTokens":2,"latencyMs":955.9572499999776},{"questionId":"q100","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":8913,"outputTokens":2,"latencyMs":1146.311541999923},{"questionId":"q100","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":8663,"outputTokens":2,"latencyMs":718.9294590000063},{"questionId":"q100","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":17198,"outputTokens":2,"latencyMs":1058.8808750000317},{"questionId":"q100","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":13284,"outputTokens":2,"latencyMs":883.9617089999374},{"questionId":"q101","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":15290,"outputTokens":2,"latencyMs":1124.9118330001365},{"questionId":"q101","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":11594,"outputTokens":2,"latencyMs":801.140625},{"questionId":"q101","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":8917,"outputTokens":2,"latencyMs":672.8650829999242},{"questionId":"q101","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":8667,"outputTokens":2,"latencyMs":636.2527499999851},{"questionId":"q101","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":17202,"outputTokens":2,"latencyMs":920.3125},{"questionId":"q101","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":13288,"outputTokens":2,"latencyMs":687.6969169999938},{"questionId":"q102","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"678","actual":"678","isCorrect":true,"inputTokens":15290,"outputTokens":1,"latencyMs":644.921166999964},{"questionId":"q102","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"678","actual":"678","isCorrect":true,"inputTokens":11594,"outputTokens":1,"latencyMs":772.4852089998312},{"questionId":"q102","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"678","actual":"678","isCorrect":true,"inputTokens":8917,"outputTokens":1,"latencyMs":691.0827920001466},{"questionId":"q102","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"678","actual":"678","isCorrect":true,"inputTokens":8667,"outputTokens":1,"latencyMs":680.5707080001011},{"questionId":"q102","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"678","actual":"678","isCorrect":true,"inputTokens":17202,"outputTokens":1,"latencyMs":1144.0804580000695},{"questionId":"q102","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"678","actual":"678","isCorrect":true,"inputTokens":13288,"outputTokens":1,"latencyMs":851.5518330000341},{"questionId":"q103","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"main","actual":"main","isCorrect":true,"inputTokens":15289,"outputTokens":1,"latencyMs":905.217165999813},{"questionId":"q103","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"main","actual":"main","isCorrect":true,"inputTokens":11593,"outputTokens":1,"latencyMs":783.5753750000149},{"questionId":"q103","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"main","actual":"main","isCorrect":true,"inputTokens":8916,"outputTokens":1,"latencyMs":688.7867499999702},{"questionId":"q103","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"main","actual":"main","isCorrect":true,"inputTokens":8666,"outputTokens":1,"latencyMs":1171.5374169999268},{"questionId":"q103","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"main","actual":"main","isCorrect":true,"inputTokens":17201,"outputTokens":1,"latencyMs":1257.7813329999335},{"questionId":"q103","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"main","actual":"main","isCorrect":true,"inputTokens":13287,"outputTokens":1,"latencyMs":956.1958329998888},{"questionId":"q104","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":15292,"outputTokens":2,"latencyMs":795.1947500000242},{"questionId":"q104","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":11596,"outputTokens":2,"latencyMs":977.4269169999752},{"questionId":"q104","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":8919,"outputTokens":2,"latencyMs":630.7267080000602},{"questionId":"q104","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":8669,"outputTokens":2,"latencyMs":675.2367499999236},{"questionId":"q104","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":17204,"outputTokens":2,"latencyMs":876.6145830000751},{"questionId":"q104","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":13290,"outputTokens":2,"latencyMs":878.96875},{"questionId":"q105","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":15289,"outputTokens":2,"latencyMs":918.0222499999218},{"questionId":"q105","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":11593,"outputTokens":2,"latencyMs":818.6200840000529},{"questionId":"q105","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":8916,"outputTokens":2,"latencyMs":999.2968339999206},{"questionId":"q105","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":8666,"outputTokens":2,"latencyMs":842.6387499999255},{"questionId":"q105","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":17201,"outputTokens":2,"latencyMs":1071.0478339998517},{"questionId":"q105","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":13287,"outputTokens":2,"latencyMs":650.575416999869},{"questionId":"q106","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":15293,"outputTokens":2,"latencyMs":1022.3034590000752},{"questionId":"q106","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":11597,"outputTokens":2,"latencyMs":744.5276669999585},{"questionId":"q106","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":8920,"outputTokens":2,"latencyMs":893.2044999999925},{"questionId":"q106","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":8670,"outputTokens":2,"latencyMs":667.956875000149},{"questionId":"q106","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":17205,"outputTokens":2,"latencyMs":1136.977458999958},{"questionId":"q106","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":13291,"outputTokens":2,"latencyMs":635.3852920001373},{"questionId":"q107","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":15285,"outputTokens":1,"latencyMs":1026.0453329999},{"questionId":"q107","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":11589,"outputTokens":1,"latencyMs":860.7715409998782},{"questionId":"q107","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":8912,"outputTokens":1,"latencyMs":779.9598750001751},{"questionId":"q107","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":8662,"outputTokens":1,"latencyMs":936.8535829999018},{"questionId":"q107","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"65","isCorrect":false,"inputTokens":17197,"outputTokens":1,"latencyMs":728.9177079999354},{"questionId":"q107","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":13283,"outputTokens":1,"latencyMs":954.2083749999292},{"questionId":"q108","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"15413563","actual":"23964281","isCorrect":false,"inputTokens":15288,"outputTokens":3,"latencyMs":1148.6309589999728},{"questionId":"q108","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"15413563","actual":"15939335","isCorrect":false,"inputTokens":11592,"outputTokens":3,"latencyMs":661.1757080000825},{"questionId":"q108","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"15413563","actual":"11000000","isCorrect":false,"inputTokens":8915,"outputTokens":3,"latencyMs":1072.593416000018},{"questionId":"q108","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"15413563","actual":"17321675","isCorrect":false,"inputTokens":8665,"outputTokens":3,"latencyMs":811.687249999959},{"questionId":"q108","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"15413563","actual":"14782617","isCorrect":false,"inputTokens":17200,"outputTokens":3,"latencyMs":1047.7613750000019},{"questionId":"q108","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"15413563","actual":"15077794","isCorrect":false,"inputTokens":13286,"outputTokens":3,"latencyMs":781.4639580000658},{"questionId":"q109","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2528243","actual":"1821788","isCorrect":false,"inputTokens":15288,"outputTokens":3,"latencyMs":878.4607920001727},{"questionId":"q109","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2528243","actual":"2251781","isCorrect":false,"inputTokens":11592,"outputTokens":3,"latencyMs":748.1280000000261},{"questionId":"q109","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2528243","actual":"1841289","isCorrect":false,"inputTokens":8915,"outputTokens":3,"latencyMs":989.4657089998946},{"questionId":"q109","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"2528243","actual":"2751478","isCorrect":false,"inputTokens":8665,"outputTokens":3,"latencyMs":805.2802919999231},{"questionId":"q109","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2528243","actual":"1735050","isCorrect":false,"inputTokens":17200,"outputTokens":3,"latencyMs":530.580083000008},{"questionId":"q109","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2528243","actual":"1721781","isCorrect":false,"inputTokens":13286,"outputTokens":3,"latencyMs":1035.6608329999726},{"questionId":"q110","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"154136","actual":"147531","isCorrect":false,"inputTokens":15287,"outputTokens":2,"latencyMs":749.2882499999832},{"questionId":"q110","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"154136","actual":"143707","isCorrect":false,"inputTokens":11591,"outputTokens":2,"latencyMs":918.7278330000117},{"questionId":"q110","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"154136","actual":"143279","isCorrect":false,"inputTokens":8914,"outputTokens":2,"latencyMs":742.1529169999994},{"questionId":"q110","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"154136","actual":"143937","isCorrect":false,"inputTokens":8664,"outputTokens":2,"latencyMs":852.8257919999305},{"questionId":"q110","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"154136","actual":"128484","isCorrect":false,"inputTokens":17199,"outputTokens":2,"latencyMs":1170.8482499998063},{"questionId":"q110","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"154136","actual":"132881","isCorrect":false,"inputTokens":13285,"outputTokens":2,"latencyMs":612.3462499999441},{"questionId":"q111","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"42","isCorrect":false,"inputTokens":15289,"outputTokens":1,"latencyMs":953.1429159999825},{"questionId":"q111","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"35","isCorrect":false,"inputTokens":11593,"outputTokens":1,"latencyMs":1029.956540999934},{"questionId":"q111","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"35","isCorrect":false,"inputTokens":8916,"outputTokens":1,"latencyMs":741.9714999999851},{"questionId":"q111","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"35","isCorrect":false,"inputTokens":8666,"outputTokens":1,"latencyMs":864.9059580001049},{"questionId":"q111","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"42","isCorrect":false,"inputTokens":17201,"outputTokens":1,"latencyMs":1208.616083999863},{"questionId":"q111","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"42","isCorrect":false,"inputTokens":13287,"outputTokens":1,"latencyMs":1042.6380839999765},{"questionId":"q112","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"53","actual":"42","isCorrect":false,"inputTokens":15289,"outputTokens":1,"latencyMs":1173.637875000015},{"questionId":"q112","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"53","actual":"42","isCorrect":false,"inputTokens":11593,"outputTokens":1,"latencyMs":841.763666999992},{"questionId":"q112","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"53","actual":"39","isCorrect":false,"inputTokens":8916,"outputTokens":1,"latencyMs":1392.9161670000758},{"questionId":"q112","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"53","actual":"42","isCorrect":false,"inputTokens":8666,"outputTokens":1,"latencyMs":714.6367079999764},{"questionId":"q112","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"53","actual":"42","isCorrect":false,"inputTokens":17201,"outputTokens":1,"latencyMs":815.1910830000415},{"questionId":"q112","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"53","actual":"42","isCorrect":false,"inputTokens":13287,"outputTokens":1,"latencyMs":597.3310420000926},{"questionId":"q113","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"77","actual":"78","isCorrect":false,"inputTokens":15288,"outputTokens":1,"latencyMs":871.604708999861},{"questionId":"q113","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"77","actual":"66","isCorrect":false,"inputTokens":11592,"outputTokens":1,"latencyMs":807.617333999835},{"questionId":"q113","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"77","actual":"38","isCorrect":false,"inputTokens":8915,"outputTokens":1,"latencyMs":638.7451250001322},{"questionId":"q113","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"77","actual":"66","isCorrect":false,"inputTokens":8665,"outputTokens":1,"latencyMs":708.1594589999877},{"questionId":"q113","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"77","actual":"62","isCorrect":false,"inputTokens":17200,"outputTokens":1,"latencyMs":855.563000000082},{"questionId":"q113","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"77","actual":"71","isCorrect":false,"inputTokens":13286,"outputTokens":1,"latencyMs":794.3505830001086},{"questionId":"q114","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"42","isCorrect":false,"inputTokens":15288,"outputTokens":1,"latencyMs":933.0522910000291},{"questionId":"q114","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"35","isCorrect":false,"inputTokens":11592,"outputTokens":1,"latencyMs":506.23399999993853},{"questionId":"q114","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"39","isCorrect":false,"inputTokens":8915,"outputTokens":1,"latencyMs":494.1964999998454},{"questionId":"q114","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"26","isCorrect":false,"inputTokens":8665,"outputTokens":1,"latencyMs":563.3929169999901},{"questionId":"q114","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"25","isCorrect":false,"inputTokens":17200,"outputTokens":1,"latencyMs":1057.8160830000415},{"questionId":"q114","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"39","isCorrect":false,"inputTokens":13286,"outputTokens":1,"latencyMs":895.5522079998627},{"questionId":"q115","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"12","isCorrect":false,"inputTokens":15288,"outputTokens":1,"latencyMs":5503.627499999944},{"questionId":"q115","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"25","isCorrect":false,"inputTokens":11592,"outputTokens":1,"latencyMs":889.953167000087},{"questionId":"q115","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"38","isCorrect":false,"inputTokens":8915,"outputTokens":1,"latencyMs":814.922040999867},{"questionId":"q115","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"25","isCorrect":false,"inputTokens":8665,"outputTokens":1,"latencyMs":831.7978329998441},{"questionId":"q115","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"12","isCorrect":false,"inputTokens":17200,"outputTokens":1,"latencyMs":8343.876374999993},{"questionId":"q115","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"16","isCorrect":true,"inputTokens":13286,"outputTokens":1,"latencyMs":1020.1683750001248},{"questionId":"q116","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"49","actual":"66","isCorrect":false,"inputTokens":15288,"outputTokens":1,"latencyMs":984.3649999999907},{"questionId":"q116","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"49","actual":"65","isCorrect":false,"inputTokens":11592,"outputTokens":1,"latencyMs":919.2169170000125},{"questionId":"q116","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"49","actual":"84","isCorrect":false,"inputTokens":8915,"outputTokens":1,"latencyMs":857.4657499999739},{"questionId":"q116","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"49","actual":"58","isCorrect":false,"inputTokens":8665,"outputTokens":1,"latencyMs":841.0771250000689},{"questionId":"q116","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"49","actual":"39","isCorrect":false,"inputTokens":17200,"outputTokens":1,"latencyMs":889.4989579999819},{"questionId":"q116","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"49","actual":"51","isCorrect":false,"inputTokens":13286,"outputTokens":1,"latencyMs":779.8050410000142},{"questionId":"q117","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"35","isCorrect":false,"inputTokens":15288,"outputTokens":1,"latencyMs":1121.593792000087},{"questionId":"q117","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"38","isCorrect":false,"inputTokens":11592,"outputTokens":1,"latencyMs":766.6404159998056},{"questionId":"q117","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"39","isCorrect":false,"inputTokens":8915,"outputTokens":1,"latencyMs":885.6065830001608},{"questionId":"q117","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"35","isCorrect":false,"inputTokens":8665,"outputTokens":1,"latencyMs":781.8842919999734},{"questionId":"q117","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"42","isCorrect":false,"inputTokens":17200,"outputTokens":1,"latencyMs":1529.369834000012},{"questionId":"q117","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"25","isCorrect":false,"inputTokens":13286,"outputTokens":1,"latencyMs":862.2314169998281},{"questionId":"q118","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"1","isCorrect":false,"inputTokens":15288,"outputTokens":1,"latencyMs":731.7133749998175},{"questionId":"q118","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"7","isCorrect":false,"inputTokens":11592,"outputTokens":1,"latencyMs":755.2664159999695},{"questionId":"q118","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"0","isCorrect":false,"inputTokens":8915,"outputTokens":1,"latencyMs":650.2094999998808},{"questionId":"q118","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"0","isCorrect":false,"inputTokens":8665,"outputTokens":1,"latencyMs":634.1624999998603},{"questionId":"q118","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"1","isCorrect":false,"inputTokens":17200,"outputTokens":1,"latencyMs":999.6341250000987},{"questionId":"q118","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"1","isCorrect":false,"inputTokens":13286,"outputTokens":1,"latencyMs":678.9282090000343},{"questionId":"q119","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"57","actual":"67","isCorrect":false,"inputTokens":15295,"outputTokens":1,"latencyMs":637.6143749998882},{"questionId":"q119","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"57","actual":"42","isCorrect":false,"inputTokens":11599,"outputTokens":1,"latencyMs":822.7012920000125},{"questionId":"q119","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"57","actual":"12","isCorrect":false,"inputTokens":8922,"outputTokens":1,"latencyMs":915.8724579999689},{"questionId":"q119","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"57","actual":"42","isCorrect":false,"inputTokens":8672,"outputTokens":1,"latencyMs":901.0087499998044},{"questionId":"q119","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"57","actual":"42","isCorrect":false,"inputTokens":17207,"outputTokens":1,"latencyMs":986.8243750000838},{"questionId":"q119","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"57","actual":"25","isCorrect":false,"inputTokens":13293,"outputTokens":1,"latencyMs":971.8272919999436},{"questionId":"q120","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"42","isCorrect":false,"inputTokens":15295,"outputTokens":1,"latencyMs":955.3667919998989},{"questionId":"q120","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"25","isCorrect":false,"inputTokens":11599,"outputTokens":1,"latencyMs":790.203125},{"questionId":"q120","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"8","isCorrect":false,"inputTokens":8922,"outputTokens":1,"latencyMs":995.7208749998827},{"questionId":"q120","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"21","isCorrect":false,"inputTokens":8672,"outputTokens":1,"latencyMs":871.769125000108},{"questionId":"q120","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"35","isCorrect":false,"inputTokens":17207,"outputTokens":1,"latencyMs":1067.404041999951},{"questionId":"q120","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"25","isCorrect":false,"inputTokens":13293,"outputTokens":1,"latencyMs":836.6861250000075},{"questionId":"q121","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"12","isCorrect":false,"inputTokens":15295,"outputTokens":1,"latencyMs":1021.1512080000248},{"questionId":"q121","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"12","isCorrect":false,"inputTokens":11599,"outputTokens":1,"latencyMs":2547.47916600015},{"questionId":"q121","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"5","isCorrect":false,"inputTokens":8922,"outputTokens":1,"latencyMs":606.0274169999175},{"questionId":"q121","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"14","isCorrect":false,"inputTokens":8672,"outputTokens":1,"latencyMs":970.8832089998759},{"questionId":"q121","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"12","isCorrect":false,"inputTokens":17207,"outputTokens":1,"latencyMs":751.1921669999138},{"questionId":"q121","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"12","isCorrect":false,"inputTokens":13293,"outputTokens":1,"latencyMs":750.9648329999764},{"questionId":"q122","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"0","isCorrect":false,"inputTokens":15295,"outputTokens":1,"latencyMs":659.0223749999423},{"questionId":"q122","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"0","isCorrect":false,"inputTokens":11599,"outputTokens":1,"latencyMs":975.9859169998672},{"questionId":"q122","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"0","isCorrect":false,"inputTokens":8922,"outputTokens":1,"latencyMs":538.5595419998281},{"questionId":"q122","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"0","isCorrect":false,"inputTokens":8672,"outputTokens":1,"latencyMs":818.3229999998584},{"questionId":"q122","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"8","isCorrect":false,"inputTokens":17207,"outputTokens":1,"latencyMs":1122.52804200002},{"questionId":"q122","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"0","isCorrect":false,"inputTokens":13293,"outputTokens":1,"latencyMs":627.9833750000689},{"questionId":"q123","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"7","isCorrect":false,"inputTokens":15295,"outputTokens":1,"latencyMs":1331.4281659999397},{"questionId":"q123","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"12","isCorrect":false,"inputTokens":11599,"outputTokens":1,"latencyMs":686.9591670001391},{"questionId":"q123","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":8922,"outputTokens":1,"latencyMs":720.088041999843},{"questionId":"q123","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":8672,"outputTokens":1,"latencyMs":778.5020409999415},{"questionId":"q123","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"2","isCorrect":false,"inputTokens":17207,"outputTokens":1,"latencyMs":1112.706082999939},{"questionId":"q123","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"5","isCorrect":false,"inputTokens":13293,"outputTokens":1,"latencyMs":799.0505000001285},{"questionId":"q124","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":7075,"outputTokens":1,"latencyMs":669.6382910001557},{"questionId":"q124","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":4987,"outputTokens":1,"latencyMs":676.7220410001464},{"questionId":"q124","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":6076,"outputTokens":1,"latencyMs":633.0288750000764},{"questionId":"q124","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":7985,"outputTokens":1,"latencyMs":726.093957999954},{"questionId":"q124","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":6102,"outputTokens":1,"latencyMs":722.1203749999404},{"questionId":"q125","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":7075,"outputTokens":3,"latencyMs":663.9378329999745},{"questionId":"q125","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":4987,"outputTokens":3,"latencyMs":668.3629580000415},{"questionId":"q125","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":6076,"outputTokens":3,"latencyMs":695.7735000001267},{"questionId":"q125","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":7985,"outputTokens":3,"latencyMs":791.358666999964},{"questionId":"q125","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":6102,"outputTokens":3,"latencyMs":547.9040830000304},{"questionId":"q126","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"297","actual":"297","isCorrect":true,"inputTokens":7076,"outputTokens":1,"latencyMs":720.8922079999465},{"questionId":"q126","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"297","actual":"297","isCorrect":true,"inputTokens":4988,"outputTokens":1,"latencyMs":1768.109417000087},{"questionId":"q126","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"297","actual":"297","isCorrect":true,"inputTokens":6077,"outputTokens":1,"latencyMs":434.0052920000162},{"questionId":"q126","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"297","actual":"297","isCorrect":true,"inputTokens":7986,"outputTokens":1,"latencyMs":841.0514159998856},{"questionId":"q126","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"297","actual":"297","isCorrect":true,"inputTokens":6103,"outputTokens":1,"latencyMs":671.7435409999453},{"questionId":"q127","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":7076,"outputTokens":2,"latencyMs":679.3918339998927},{"questionId":"q127","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":4988,"outputTokens":2,"latencyMs":531.0219580000266},{"questionId":"q127","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":6077,"outputTokens":2,"latencyMs":502.0852500000037},{"questionId":"q127","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":7986,"outputTokens":2,"latencyMs":547.9402090001386},{"questionId":"q127","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":6103,"outputTokens":2,"latencyMs":816.4800829999149},{"questionId":"q128","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":7075,"outputTokens":1,"latencyMs":1052.8748339999001},{"questionId":"q128","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":4987,"outputTokens":1,"latencyMs":430.6976250000298},{"questionId":"q128","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":6076,"outputTokens":1,"latencyMs":596.5801659999415},{"questionId":"q128","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":7985,"outputTokens":1,"latencyMs":670.6383330000099},{"questionId":"q128","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":6102,"outputTokens":1,"latencyMs":666.0189590000082},{"questionId":"q129","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":7075,"outputTokens":2,"latencyMs":975.8670830000192},{"questionId":"q129","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"{\"logs\":[{\"timestamp\":\"2026-03-01T16:56:46.557Z\",\"level\":\"info\",\"endpoint\":\"/api/auth\",\"statusCode\":205,\"responseTime\":765,\"userId\":2867}]} \n**Answer:** /api/auth","isCorrect":false,"inputTokens":4987,"outputTokens":53,"latencyMs":918.1929160000291},{"questionId":"q129","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":6076,"outputTokens":2,"latencyMs":484.9342089998536},{"questionId":"q129","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":7985,"outputTokens":2,"latencyMs":5054.260624999879},{"questionId":"q129","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":6102,"outputTokens":2,"latencyMs":4841.998874999816},{"questionId":"q130","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"298","actual":"298","isCorrect":true,"inputTokens":7076,"outputTokens":1,"latencyMs":596.7672910001129},{"questionId":"q130","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"298","actual":"298","isCorrect":true,"inputTokens":4988,"outputTokens":1,"latencyMs":4763.605125000002},{"questionId":"q130","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"298","actual":"298","isCorrect":true,"inputTokens":6077,"outputTokens":1,"latencyMs":756.2959169999231},{"questionId":"q130","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"298","actual":"298","isCorrect":true,"inputTokens":7986,"outputTokens":1,"latencyMs":452.62154199997894},{"questionId":"q130","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"298","actual":"298","isCorrect":true,"inputTokens":6103,"outputTokens":1,"latencyMs":494.21950000012293},{"questionId":"q131","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"398","actual":"398","isCorrect":true,"inputTokens":7076,"outputTokens":1,"latencyMs":590.6149999999907},{"questionId":"q131","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"398","actual":"398","isCorrect":true,"inputTokens":4988,"outputTokens":1,"latencyMs":544.4521670001559},{"questionId":"q131","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"398","actual":"398","isCorrect":true,"inputTokens":6077,"outputTokens":1,"latencyMs":496.3487500001211},{"questionId":"q131","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"398","actual":"398","isCorrect":true,"inputTokens":7986,"outputTokens":1,"latencyMs":717.1118340000976},{"questionId":"q131","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"398","actual":"398","isCorrect":true,"inputTokens":6103,"outputTokens":1,"latencyMs":480.12924999999814},{"questionId":"q132","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":7075,"outputTokens":1,"latencyMs":693.7166249998845},{"questionId":"q132","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":4987,"outputTokens":1,"latencyMs":537.2367080000695},{"questionId":"q132","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":6076,"outputTokens":1,"latencyMs":744.6592079999391},{"questionId":"q132","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":7985,"outputTokens":1,"latencyMs":652.4775829999708},{"questionId":"q132","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":6102,"outputTokens":1,"latencyMs":560.8553750000428},{"questionId":"q133","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":7075,"outputTokens":2,"latencyMs":589.8924159999005},{"questionId":"q133","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"{\"logs\":[{\"timestamp\":\"2026-02-26T16:40:28.512Z\",\"level\":\"error\",\"endpoint\":\"/api/payments\",\"statusCode\":442,\"responseTime\":2708,\"userId\":4665,\"error\":{\"message\":\"Invalid authentication token\",\"stack\":\"Error: Admitto tui et aperio denuo minima aeneus arto.\\n at desparatus\\n at curvo\",\"retryable\":true}},{\"timestamp\":\"2026-03-01T22:38:51.411Z\",\"level\":\"info\",\"endpoint\":\"/api/orders\",\"statusCode\":215,\"responseTime\":2058,\"userId\":8534},{\"timestamp\":\"2026-03-02T14:27:54.933","isCorrect":false,"inputTokens":4987,"outputTokens":153,"latencyMs":1858.7134589999914},{"questionId":"q133","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":6076,"outputTokens":2,"latencyMs":620.287666000193},{"questionId":"q133","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":7985,"outputTokens":2,"latencyMs":765.4392080001999},{"questionId":"q133","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":6102,"outputTokens":2,"latencyMs":614.7382080000825},{"questionId":"q134","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"80","isCorrect":false,"inputTokens":7059,"outputTokens":1,"latencyMs":641.4104170000646},{"questionId":"q134","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"{\"count\":100}","isCorrect":false,"inputTokens":4971,"outputTokens":5,"latencyMs":561.1789159998298},{"questionId":"q134","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"75","isCorrect":true,"inputTokens":6060,"outputTokens":1,"latencyMs":3554.437834000215},{"questionId":"q134","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"50","isCorrect":false,"inputTokens":7969,"outputTokens":1,"latencyMs":534.5713329999708},{"questionId":"q134","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"72","isCorrect":false,"inputTokens":6086,"outputTokens":1,"latencyMs":482.87349999998696},{"questionId":"q135","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2665.00","actual":"2388","isCorrect":false,"inputTokens":7060,"outputTokens":2,"latencyMs":2085.6222910000943},{"questionId":"q135","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2665.00","actual":"2591","isCorrect":false,"inputTokens":4972,"outputTokens":2,"latencyMs":583.098166000098},{"questionId":"q135","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2665.00","actual":"2585","isCorrect":false,"inputTokens":6061,"outputTokens":2,"latencyMs":497.60691700014286},{"questionId":"q135","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2665.00","actual":"2507","isCorrect":false,"inputTokens":7970,"outputTokens":2,"latencyMs":644.9506250000559},{"questionId":"q135","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2665.00","actual":"2459","isCorrect":false,"inputTokens":6087,"outputTokens":2,"latencyMs":573.1919589999598},{"questionId":"q136","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"25","isCorrect":false,"inputTokens":7059,"outputTokens":1,"latencyMs":697.5648329998367},{"questionId":"q136","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"25","isCorrect":false,"inputTokens":4971,"outputTokens":1,"latencyMs":463.88112500007264},{"questionId":"q136","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"28","isCorrect":false,"inputTokens":6060,"outputTokens":1,"latencyMs":2941.9194579999894},{"questionId":"q136","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"25","isCorrect":false,"inputTokens":7969,"outputTokens":1,"latencyMs":703.0076249998529},{"questionId":"q136","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"25","isCorrect":false,"inputTokens":6086,"outputTokens":1,"latencyMs":696.7065420001745},{"questionId":"q137","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"30","actual":"26","isCorrect":false,"inputTokens":7059,"outputTokens":1,"latencyMs":658.0850420000497},{"questionId":"q137","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"30","actual":"24","isCorrect":false,"inputTokens":4971,"outputTokens":1,"latencyMs":492.91225000005215},{"questionId":"q137","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"30","actual":"25","isCorrect":false,"inputTokens":6060,"outputTokens":1,"latencyMs":514.7448330000043},{"questionId":"q137","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"30","actual":"26","isCorrect":false,"inputTokens":7969,"outputTokens":1,"latencyMs":393.0493749999441},{"questionId":"q137","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"30","actual":"24","isCorrect":false,"inputTokens":6086,"outputTokens":1,"latencyMs":423.2260000000242},{"questionId":"q138","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"19","actual":"16","isCorrect":false,"inputTokens":7059,"outputTokens":1,"latencyMs":526.8290830000769},{"questionId":"q138","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"19","actual":"15","isCorrect":false,"inputTokens":4971,"outputTokens":1,"latencyMs":452.80616699997336},{"questionId":"q138","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"19","actual":"16","isCorrect":false,"inputTokens":6060,"outputTokens":1,"latencyMs":747.925125000067},{"questionId":"q138","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"19","actual":"19","isCorrect":true,"inputTokens":7969,"outputTokens":1,"latencyMs":616.3643330000341},{"questionId":"q138","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"19","actual":"17","isCorrect":false,"inputTokens":6086,"outputTokens":1,"latencyMs":545.6614999999292},{"questionId":"q139","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"17","isCorrect":false,"inputTokens":7062,"outputTokens":1,"latencyMs":594.9790000000503},{"questionId":"q139","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"15","isCorrect":false,"inputTokens":4974,"outputTokens":1,"latencyMs":633.8448329998646},{"questionId":"q139","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"15","isCorrect":false,"inputTokens":6063,"outputTokens":1,"latencyMs":467.36437499988824},{"questionId":"q139","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"16","isCorrect":true,"inputTokens":7972,"outputTokens":1,"latencyMs":749.2177499998361},{"questionId":"q139","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"18","isCorrect":false,"inputTokens":6089,"outputTokens":1,"latencyMs":588.8195410000626},{"questionId":"q140","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"15","isCorrect":false,"inputTokens":7061,"outputTokens":1,"latencyMs":955.6003749999218},{"questionId":"q140","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"15","isCorrect":false,"inputTokens":4973,"outputTokens":1,"latencyMs":524.8153750000056},{"questionId":"q140","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"15","isCorrect":false,"inputTokens":6062,"outputTokens":1,"latencyMs":5448.149041000055},{"questionId":"q140","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"12","isCorrect":false,"inputTokens":7971,"outputTokens":1,"latencyMs":656.2655419998337},{"questionId":"q140","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"15","isCorrect":false,"inputTokens":6088,"outputTokens":1,"latencyMs":537.8960829998832},{"questionId":"q141","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"33","actual":"35","isCorrect":false,"inputTokens":7065,"outputTokens":1,"latencyMs":437.2469580001198},{"questionId":"q141","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"33","actual":"35","isCorrect":false,"inputTokens":4977,"outputTokens":1,"latencyMs":567.4906249998603},{"questionId":"q141","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"33","actual":"37","isCorrect":false,"inputTokens":6066,"outputTokens":1,"latencyMs":568.9782090000808},{"questionId":"q141","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"33","actual":"34","isCorrect":false,"inputTokens":7975,"outputTokens":1,"latencyMs":640.1640840000473},{"questionId":"q141","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"33","actual":"35","isCorrect":false,"inputTokens":6092,"outputTokens":1,"latencyMs":731.982374999905},{"questionId":"q142","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"42","actual":"28","isCorrect":false,"inputTokens":7064,"outputTokens":1,"latencyMs":656.7047500000335},{"questionId":"q142","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"42","actual":"38","isCorrect":false,"inputTokens":4976,"outputTokens":1,"latencyMs":480.58245799993165},{"questionId":"q142","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"42","actual":"37","isCorrect":false,"inputTokens":6065,"outputTokens":1,"latencyMs":539.6479159998707},{"questionId":"q142","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"42","actual":"38","isCorrect":false,"inputTokens":7974,"outputTokens":1,"latencyMs":655.2422499998938},{"questionId":"q142","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"42","actual":"42","isCorrect":true,"inputTokens":6091,"outputTokens":1,"latencyMs":641.8482500000391},{"questionId":"q143","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"20","isCorrect":false,"inputTokens":7060,"outputTokens":1,"latencyMs":507.58849999983795},{"questionId":"q143","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"18","isCorrect":false,"inputTokens":4972,"outputTokens":1,"latencyMs":587.9285420000087},{"questionId":"q143","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"26","isCorrect":false,"inputTokens":6061,"outputTokens":1,"latencyMs":543.0668340001721},{"questionId":"q143","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"25","isCorrect":false,"inputTokens":7970,"outputTokens":1,"latencyMs":633.4865840000566},{"questionId":"q143","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"22","isCorrect":false,"inputTokens":6087,"outputTokens":1,"latencyMs":547.9707090000156},{"questionId":"q144","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"25","isCorrect":false,"inputTokens":7066,"outputTokens":1,"latencyMs":807.6963329999708},{"questionId":"q144","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"27","isCorrect":false,"inputTokens":4978,"outputTokens":1,"latencyMs":603.1292499999981},{"questionId":"q144","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"26","isCorrect":true,"inputTokens":6067,"outputTokens":1,"latencyMs":570.2862919999752},{"questionId":"q144","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"26","isCorrect":true,"inputTokens":7976,"outputTokens":1,"latencyMs":810.4777089999989},{"questionId":"q144","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"25","isCorrect":false,"inputTokens":6093,"outputTokens":1,"latencyMs":584.8633749999572},{"questionId":"q145","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":7066,"outputTokens":1,"latencyMs":771.7984579999465},{"questionId":"q145","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"12","isCorrect":false,"inputTokens":4978,"outputTokens":1,"latencyMs":611.7062500000466},{"questionId":"q145","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":6067,"outputTokens":1,"latencyMs":562.7346249998081},{"questionId":"q145","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"12","isCorrect":false,"inputTokens":7976,"outputTokens":1,"latencyMs":1165.6651250000577},{"questionId":"q145","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"11","isCorrect":false,"inputTokens":6093,"outputTokens":1,"latencyMs":458.80704100010917},{"questionId":"q146","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":7069,"outputTokens":1,"latencyMs":863.8479579999112},{"questionId":"q146","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":4981,"outputTokens":1,"latencyMs":570.4369580000639},{"questionId":"q146","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":6070,"outputTokens":1,"latencyMs":519.018917000154},{"questionId":"q146","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"5","isCorrect":false,"inputTokens":7979,"outputTokens":1,"latencyMs":949.3144999998622},{"questionId":"q146","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":6096,"outputTokens":1,"latencyMs":402.4407089999877},{"questionId":"q147","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"7","isCorrect":false,"inputTokens":7068,"outputTokens":1,"latencyMs":622.7003339999355},{"questionId":"q147","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"5","isCorrect":false,"inputTokens":4980,"outputTokens":1,"latencyMs":441.8485409999266},{"questionId":"q147","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"7","isCorrect":false,"inputTokens":6069,"outputTokens":1,"latencyMs":483.62008300004527},{"questionId":"q147","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"5","isCorrect":false,"inputTokens":7978,"outputTokens":1,"latencyMs":699.7590830000117},{"questionId":"q147","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"6","isCorrect":true,"inputTokens":6095,"outputTokens":1,"latencyMs":602.388666999992},{"questionId":"q148","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"5","isCorrect":false,"inputTokens":7068,"outputTokens":1,"latencyMs":10241.612874999875},{"questionId":"q148","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":4980,"outputTokens":1,"latencyMs":475.24783300003037},{"questionId":"q148","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"5","isCorrect":false,"inputTokens":6069,"outputTokens":1,"latencyMs":575.6938749998808},{"questionId":"q148","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":7978,"outputTokens":1,"latencyMs":780.9076249999925},{"questionId":"q148","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"5","isCorrect":false,"inputTokens":6095,"outputTokens":1,"latencyMs":566.3519999999553},{"questionId":"q149","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"4","isCorrect":false,"inputTokens":7067,"outputTokens":1,"latencyMs":780.5568750000093},{"questionId":"q149","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"5","isCorrect":false,"inputTokens":4979,"outputTokens":1,"latencyMs":451.3896250000689},{"questionId":"q149","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"5","isCorrect":false,"inputTokens":6068,"outputTokens":1,"latencyMs":740.5959590000566},{"questionId":"q149","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"4","isCorrect":false,"inputTokens":7977,"outputTokens":1,"latencyMs":839.9617080001626},{"questionId":"q149","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"4","isCorrect":false,"inputTokens":6094,"outputTokens":1,"latencyMs":545.8948749999981},{"questionId":"q150","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"5","isCorrect":false,"inputTokens":7066,"outputTokens":1,"latencyMs":617.8501670002006},{"questionId":"q150","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":4978,"outputTokens":1,"latencyMs":499.71529100020416},{"questionId":"q150","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":6067,"outputTokens":1,"latencyMs":436.76954200002365},{"questionId":"q150","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"6","isCorrect":false,"inputTokens":7976,"outputTokens":1,"latencyMs":665.0118329999968},{"questionId":"q150","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":6093,"outputTokens":1,"latencyMs":630.4103749999776},{"questionId":"q151","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"development","actual":"development","isCorrect":true,"inputTokens":1167,"outputTokens":1,"latencyMs":568.3108330001123},{"questionId":"q151","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"development","actual":"development","isCorrect":true,"inputTokens":791,"outputTokens":1,"latencyMs":664.702874999959},{"questionId":"q151","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"development","actual":"development","isCorrect":true,"inputTokens":898,"outputTokens":1,"latencyMs":887.1532499999739},{"questionId":"q151","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"development","actual":"development","isCorrect":true,"inputTokens":1239,"outputTokens":1,"latencyMs":1236.0112499999814},{"questionId":"q151","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"development","actual":"development","isCorrect":true,"inputTokens":918,"outputTokens":1,"latencyMs":532.5732499998994},{"questionId":"q152","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":1165,"outputTokens":7,"latencyMs":1971.4524580000434},{"questionId":"q152","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":789,"outputTokens":7,"latencyMs":813.3573330000509},{"questionId":"q152","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":896,"outputTokens":7,"latencyMs":924.5502499998547},{"questionId":"q152","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":1237,"outputTokens":7,"latencyMs":1006.0048750001006},{"questionId":"q152","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":916,"outputTokens":7,"latencyMs":430.3585830000229},{"questionId":"q153","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1165,"outputTokens":2,"latencyMs":541.6815420000348},{"questionId":"q153","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":789,"outputTokens":2,"latencyMs":460.0598750000354},{"questionId":"q153","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":896,"outputTokens":2,"latencyMs":484.6317910000216},{"questionId":"q153","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1237,"outputTokens":2,"latencyMs":465.4757910000626},{"questionId":"q153","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":916,"outputTokens":2,"latencyMs":591.9300409997813},{"questionId":"q154","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":1167,"outputTokens":1,"latencyMs":435.2754999999888},{"questionId":"q154","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":791,"outputTokens":1,"latencyMs":536.249749999959},{"questionId":"q154","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":898,"outputTokens":1,"latencyMs":12238.85999999987},{"questionId":"q154","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":1239,"outputTokens":1,"latencyMs":662.8013750000391},{"questionId":"q154","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":918,"outputTokens":1,"latencyMs":594.5850830001291},{"questionId":"q155","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1165,"outputTokens":2,"latencyMs":609.3842909999657},{"questionId":"q155","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":789,"outputTokens":2,"latencyMs":526.6292089999188},{"questionId":"q155","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":896,"outputTokens":2,"latencyMs":550.4001660000067},{"questionId":"q155","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1237,"outputTokens":2,"latencyMs":539.1558329998516},{"questionId":"q155","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":916,"outputTokens":2,"latencyMs":433.43420799984597},{"questionId":"q156","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":1167,"outputTokens":1,"latencyMs":506.74600000004284},{"questionId":"q156","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":791,"outputTokens":1,"latencyMs":492.7102080001496},{"questionId":"q156","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":898,"outputTokens":1,"latencyMs":433.3894169998821},{"questionId":"q156","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":1239,"outputTokens":1,"latencyMs":398.6489579998888},{"questionId":"q156","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":918,"outputTokens":1,"latencyMs":420.9260420000646},{"questionId":"q157","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1167,"outputTokens":2,"latencyMs":503.7374169998802},{"questionId":"q157","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":791,"outputTokens":2,"latencyMs":648.766082999995},{"questionId":"q157","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":898,"outputTokens":2,"latencyMs":711.5557909999043},{"questionId":"q157","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1239,"outputTokens":2,"latencyMs":383.2424170000013},{"questionId":"q157","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":918,"outputTokens":2,"latencyMs":564.1333329998888},{"questionId":"q158","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":1165,"outputTokens":1,"latencyMs":515.1211670001503},{"questionId":"q158","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":789,"outputTokens":1,"latencyMs":444.1989999997895},{"questionId":"q158","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":896,"outputTokens":1,"latencyMs":553.0614579999819},{"questionId":"q158","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":1237,"outputTokens":1,"latencyMs":516.2151250001043},{"questionId":"q158","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":916,"outputTokens":1,"latencyMs":548.5459579997696},{"questionId":"q159","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1166,"outputTokens":2,"latencyMs":484.6058339998126},{"questionId":"q159","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":790,"outputTokens":2,"latencyMs":612.9599999999627},{"questionId":"q159","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":897,"outputTokens":2,"latencyMs":1002.7459169998765},{"questionId":"q159","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1238,"outputTokens":2,"latencyMs":514.824166000355},{"questionId":"q159","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":917,"outputTokens":2,"latencyMs":573.5519579998218},{"questionId":"q160","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":1167,"outputTokens":5,"latencyMs":729.5760000003502},{"questionId":"q160","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":791,"outputTokens":5,"latencyMs":22246.316542000044},{"questionId":"q160","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":898,"outputTokens":5,"latencyMs":484.80991700012237},{"questionId":"q160","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":1239,"outputTokens":5,"latencyMs":683.5540000000037},{"questionId":"q160","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":918,"outputTokens":5,"latencyMs":537.343334000092},{"questionId":"q161","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":1167,"outputTokens":1,"latencyMs":321.8642909997143},{"questionId":"q161","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":791,"outputTokens":1,"latencyMs":711.7291670003906},{"questionId":"q161","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":1956,"outputTokens":1,"latencyMs":585.2053749999031},{"questionId":"q161","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":1239,"outputTokens":1,"latencyMs":443.67554099997506},{"questionId":"q161","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":918,"outputTokens":1,"latencyMs":499.66949999984354},{"questionId":"q162","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":1167,"outputTokens":1,"latencyMs":650.0754170003347},{"questionId":"q162","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":791,"outputTokens":1,"latencyMs":559.8465829999186},{"questionId":"q162","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":898,"outputTokens":1,"latencyMs":404.6767500001006},{"questionId":"q162","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":2638,"outputTokens":1,"latencyMs":474.6345830000937},{"questionId":"q162","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":918,"outputTokens":1,"latencyMs":785.9961669999175},{"questionId":"q163","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":1166,"outputTokens":1,"latencyMs":506.4886670000851},{"questionId":"q163","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":790,"outputTokens":1,"latencyMs":499.8605829998851},{"questionId":"q163","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":897,"outputTokens":1,"latencyMs":548.1877080001868},{"questionId":"q163","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":1238,"outputTokens":1,"latencyMs":573.2623340003192},{"questionId":"q163","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":917,"outputTokens":1,"latencyMs":2085.15475000022},{"questionId":"q164","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":1166,"outputTokens":1,"latencyMs":2063.1506249997765},{"questionId":"q164","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":790,"outputTokens":1,"latencyMs":2063.2115000002086},{"questionId":"q164","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":897,"outputTokens":1,"latencyMs":484.71375000011176},{"questionId":"q164","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":1238,"outputTokens":1,"latencyMs":412.90587500017136},{"questionId":"q164","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":917,"outputTokens":1,"latencyMs":592.0760830002837},{"questionId":"q165","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":1166,"outputTokens":1,"latencyMs":465.3804170000367},{"questionId":"q165","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":790,"outputTokens":1,"latencyMs":490.51391700003296},{"questionId":"q165","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":897,"outputTokens":1,"latencyMs":555.85291699972},{"questionId":"q165","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":1238,"outputTokens":1,"latencyMs":416.2364590000361},{"questionId":"q165","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":917,"outputTokens":1,"latencyMs":626.9462079997174},{"questionId":"q166","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1170,"outputTokens":1,"latencyMs":483.3775410000235},{"questionId":"q166","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":794,"outputTokens":1,"latencyMs":525.3997499998659},{"questionId":"q166","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":901,"outputTokens":1,"latencyMs":460.0272920001298},{"questionId":"q166","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1242,"outputTokens":1,"latencyMs":468.0580000001937},{"questionId":"q166","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":921,"outputTokens":1,"latencyMs":434.2507499996573},{"questionId":"q167","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"0","isCorrect":true,"inputTokens":1166,"outputTokens":1,"latencyMs":498.7844169996679},{"questionId":"q167","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"0","isCorrect":true,"inputTokens":790,"outputTokens":1,"latencyMs":1106.7191670001484},{"questionId":"q167","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"0","isCorrect":true,"inputTokens":897,"outputTokens":1,"latencyMs":398.2881669998169},{"questionId":"q167","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"0","isCorrect":true,"inputTokens":1238,"outputTokens":1,"latencyMs":423.91337499953806},{"questionId":"q167","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"0","isCorrect":true,"inputTokens":917,"outputTokens":1,"latencyMs":399.51820799987763},{"questionId":"q168","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"5","isCorrect":true,"inputTokens":2496,"outputTokens":1,"latencyMs":530.0409999997355},{"questionId":"q168","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"5","isCorrect":true,"inputTokens":792,"outputTokens":1,"latencyMs":372.8215419999324},{"questionId":"q168","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"5","isCorrect":true,"inputTokens":899,"outputTokens":1,"latencyMs":559.5414160001092},{"questionId":"q168","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"5","isCorrect":true,"inputTokens":1240,"outputTokens":1,"latencyMs":516.9351249998435},{"questionId":"q168","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"5","isCorrect":true,"inputTokens":919,"outputTokens":1,"latencyMs":625.4416249999776},{"questionId":"q169","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"9","isCorrect":false,"inputTokens":1170,"outputTokens":1,"latencyMs":958.8957500001416},{"questionId":"q169","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"9","isCorrect":false,"inputTokens":794,"outputTokens":1,"latencyMs":609.8635840001516},{"questionId":"q169","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"8","isCorrect":true,"inputTokens":901,"outputTokens":1,"latencyMs":1038.2139170002192},{"questionId":"q169","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"8","isCorrect":true,"inputTokens":1242,"outputTokens":1,"latencyMs":849.5646669999696},{"questionId":"q169","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"11","isCorrect":false,"inputTokens":921,"outputTokens":1,"latencyMs":3410.467042000033},{"questionId":"q170","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"5","isCorrect":true,"inputTokens":1169,"outputTokens":1,"latencyMs":594.7289589997381},{"questionId":"q170","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"6","isCorrect":false,"inputTokens":793,"outputTokens":1,"latencyMs":551.421459000092},{"questionId":"q170","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"6","isCorrect":false,"inputTokens":900,"outputTokens":1,"latencyMs":573.0434999996796},{"questionId":"q170","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"6","isCorrect":false,"inputTokens":1241,"outputTokens":1,"latencyMs":658.1823330000043},{"questionId":"q170","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"5","isCorrect":true,"inputTokens":920,"outputTokens":1,"latencyMs":408.4260830003768},{"questionId":"q171","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"4","isCorrect":false,"inputTokens":1171,"outputTokens":1,"latencyMs":497.02908399980515},{"questionId":"q171","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":795,"outputTokens":1,"latencyMs":491.7958749998361},{"questionId":"q171","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":902,"outputTokens":1,"latencyMs":590.5531250000931},{"questionId":"q171","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"4","isCorrect":false,"inputTokens":1243,"outputTokens":1,"latencyMs":452.9944170000963},{"questionId":"q171","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"5","isCorrect":false,"inputTokens":922,"outputTokens":1,"latencyMs":866.8199579999782},{"questionId":"q172","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":1171,"outputTokens":1,"latencyMs":607.6427080002613},{"questionId":"q172","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":795,"outputTokens":1,"latencyMs":519.5298330001533},{"questionId":"q172","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":902,"outputTokens":1,"latencyMs":399.87587500037625},{"questionId":"q172","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":1243,"outputTokens":1,"latencyMs":440.52479099994525},{"questionId":"q172","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":922,"outputTokens":1,"latencyMs":559.67833300028},{"questionId":"q173","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"2","isCorrect":false,"inputTokens":1172,"outputTokens":1,"latencyMs":564.7204999998212},{"questionId":"q173","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"2","isCorrect":false,"inputTokens":796,"outputTokens":1,"latencyMs":384.5131250000559},{"questionId":"q173","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"2","isCorrect":false,"inputTokens":903,"outputTokens":1,"latencyMs":510.0762499999255},{"questionId":"q173","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1244,"outputTokens":1,"latencyMs":471.7332919999026},{"questionId":"q173","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":923,"outputTokens":1,"latencyMs":584.4937080000527},{"questionId":"q174","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1169,"outputTokens":1,"latencyMs":669.2428330001421},{"questionId":"q174","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":793,"outputTokens":1,"latencyMs":494.3819160000421},{"questionId":"q174","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":900,"outputTokens":1,"latencyMs":773.781165999826},{"questionId":"q174","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"2","isCorrect":false,"inputTokens":1241,"outputTokens":1,"latencyMs":585.256166999694},{"questionId":"q174","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":920,"outputTokens":1,"latencyMs":707.0857500000857},{"questionId":"q175","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"1","isCorrect":false,"inputTokens":1173,"outputTokens":1,"latencyMs":434.921624999959},{"questionId":"q175","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"2","isCorrect":false,"inputTokens":797,"outputTokens":1,"latencyMs":814.5306669999845},{"questionId":"q175","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"1","isCorrect":false,"inputTokens":1968,"outputTokens":1,"latencyMs":414.5523340003565},{"questionId":"q175","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"1","isCorrect":false,"inputTokens":1245,"outputTokens":1,"latencyMs":571.2765410002321},{"questionId":"q175","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"1","isCorrect":false,"inputTokens":924,"outputTokens":1,"latencyMs":344.7449159999378},{"questionId":"q176","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1167,"outputTokens":1,"latencyMs":513.3467919998802},{"questionId":"q176","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":791,"outputTokens":1,"latencyMs":533.905749999918},{"questionId":"q176","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":898,"outputTokens":1,"latencyMs":370.74941699998453},{"questionId":"q176","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1239,"outputTokens":1,"latencyMs":631.5751670002937},{"questionId":"q176","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1996,"outputTokens":1,"latencyMs":930.6895409999415},{"questionId":"q177","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1175,"outputTokens":1,"latencyMs":399.03012500004843},{"questionId":"q177","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":799,"outputTokens":1,"latencyMs":365.84254100034013},{"questionId":"q177","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":906,"outputTokens":1,"latencyMs":582.7095000003465},{"questionId":"q177","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":1247,"outputTokens":1,"latencyMs":418.50779200019315},{"questionId":"q177","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":926,"outputTokens":1,"latencyMs":336.9835830000229},{"questionId":"q178","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":1170,"outputTokens":1,"latencyMs":532.1033749999478},{"questionId":"q178","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":794,"outputTokens":1,"latencyMs":438.3645840003155},{"questionId":"q178","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":901,"outputTokens":1,"latencyMs":577.2512499997392},{"questionId":"q178","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":1242,"outputTokens":1,"latencyMs":579.6784999999218},{"questionId":"q178","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":921,"outputTokens":1,"latencyMs":811.5298750000075},{"questionId":"q179","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":1169,"outputTokens":1,"latencyMs":556.395291000139},{"questionId":"q179","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":793,"outputTokens":1,"latencyMs":579.4757079998963},{"questionId":"q179","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":900,"outputTokens":1,"latencyMs":561.9488340001553},{"questionId":"q179","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":1241,"outputTokens":1,"latencyMs":552.0802909997292},{"questionId":"q179","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":920,"outputTokens":1,"latencyMs":556.203125},{"questionId":"q180","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":6529,"outputTokens":1,"latencyMs":1252.5789579995908},{"questionId":"q180","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":4120,"outputTokens":1,"latencyMs":664.0655419998802},{"questionId":"q180","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":2699,"outputTokens":1,"latencyMs":564.4555420000106},{"questionId":"q180","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":2536,"outputTokens":1,"latencyMs":546.6606669998728},{"questionId":"q180","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":7468,"outputTokens":1,"latencyMs":785.5787080000155},{"questionId":"q180","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":5168,"outputTokens":1,"latencyMs":802.9945829999633},{"questionId":"q181","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":6534,"outputTokens":12,"latencyMs":770.2546669999138},{"questionId":"q181","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":4125,"outputTokens":12,"latencyMs":646.5715000000782},{"questionId":"q181","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":2704,"outputTokens":12,"latencyMs":738.1328750001267},{"questionId":"q181","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":2541,"outputTokens":12,"latencyMs":453.31841700011864},{"questionId":"q181","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":7473,"outputTokens":12,"latencyMs":600.691165999975},{"questionId":"q181","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":5173,"outputTokens":12,"latencyMs":528.4648329997435},{"questionId":"q182","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"email","actual":"email","isCorrect":true,"inputTokens":6532,"outputTokens":1,"latencyMs":747.806707999669},{"questionId":"q182","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"email","actual":"email","isCorrect":true,"inputTokens":4123,"outputTokens":1,"latencyMs":579.8203329998069},{"questionId":"q182","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"email","actual":"email","isCorrect":true,"inputTokens":2702,"outputTokens":1,"latencyMs":648.0913329999894},{"questionId":"q182","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"email","actual":"email","isCorrect":true,"inputTokens":5238,"outputTokens":1,"latencyMs":753.5913749998435},{"questionId":"q182","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"email","actual":"email","isCorrect":true,"inputTokens":7471,"outputTokens":1,"latencyMs":555.7612499999814},{"questionId":"q182","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"email","actual":"email","isCorrect":true,"inputTokens":5171,"outputTokens":1,"latencyMs":694.1673329998739},{"questionId":"q183","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":6533,"outputTokens":1,"latencyMs":615.7989579997957},{"questionId":"q183","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":4124,"outputTokens":1,"latencyMs":636.5812910003588},{"questionId":"q183","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":2703,"outputTokens":1,"latencyMs":477.2688339999877},{"questionId":"q183","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":2540,"outputTokens":1,"latencyMs":539.9712910000235},{"questionId":"q183","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":7472,"outputTokens":1,"latencyMs":761.8279169998132},{"questionId":"q183","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":5172,"outputTokens":1,"latencyMs":465.2080830000341},{"questionId":"q184","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":6533,"outputTokens":4,"latencyMs":1139.6310410001315},{"questionId":"q184","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":4124,"outputTokens":4,"latencyMs":613.9928749999963},{"questionId":"q184","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":2703,"outputTokens":4,"latencyMs":488.5675419997424},{"questionId":"q184","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":2540,"outputTokens":4,"latencyMs":604.6790000000037},{"questionId":"q184","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":7472,"outputTokens":4,"latencyMs":608.0647919997573},{"questionId":"q184","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":5172,"outputTokens":4,"latencyMs":664.8144999998622},{"questionId":"q185","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":6530,"outputTokens":1,"latencyMs":12182.406749999616},{"questionId":"q185","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":4121,"outputTokens":1,"latencyMs":493.5817499998957},{"questionId":"q185","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"6","isCorrect":false,"inputTokens":2700,"outputTokens":1,"latencyMs":586.3510839999653},{"questionId":"q185","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":2537,"outputTokens":1,"latencyMs":549.4824999999255},{"questionId":"q185","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":7469,"outputTokens":1,"latencyMs":706.1140000000596},{"questionId":"q185","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":5169,"outputTokens":1,"latencyMs":509.1063749999739},{"questionId":"q186","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":11527,"outputTokens":1,"latencyMs":889.8342909999192},{"questionId":"q186","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":7301,"outputTokens":1,"latencyMs":836.0652499999851},{"questionId":"q186","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":7619,"outputTokens":1,"latencyMs":764.9607909996994},{"questionId":"q186","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":12946,"outputTokens":1,"latencyMs":843.1492909998633},{"questionId":"q186","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":8964,"outputTokens":1,"latencyMs":867.6933340001851},{"questionId":"q187","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":11534,"outputTokens":15,"latencyMs":869.5551669998094},{"questionId":"q187","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":7308,"outputTokens":15,"latencyMs":618.4852499999106},{"questionId":"q187","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":7626,"outputTokens":15,"latencyMs":876.8569999998435},{"questionId":"q187","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":12953,"outputTokens":15,"latencyMs":1181.0373749998398},{"questionId":"q187","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":8971,"outputTokens":15,"latencyMs":767.7519590002485},{"questionId":"q188","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":11530,"outputTokens":1,"latencyMs":818.1288749999367},{"questionId":"q188","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":7304,"outputTokens":1,"latencyMs":910.3320000004023},{"questionId":"q188","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":7622,"outputTokens":1,"latencyMs":584.8427499998361},{"questionId":"q188","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":12949,"outputTokens":1,"latencyMs":774.0600000000559},{"questionId":"q188","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":8967,"outputTokens":1,"latencyMs":861.5179579998367},{"questionId":"q189","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":11535,"outputTokens":6,"latencyMs":803.0108340000734},{"questionId":"q189","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":7309,"outputTokens":6,"latencyMs":700.7432919996791},{"questionId":"q189","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":7627,"outputTokens":6,"latencyMs":598.2462919997051},{"questionId":"q189","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":12954,"outputTokens":6,"latencyMs":726.3003329997882},{"questionId":"q189","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":8972,"outputTokens":6,"latencyMs":865.8394590001553},{"questionId":"q190","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":11531,"outputTokens":2,"latencyMs":854.4052920001559},{"questionId":"q190","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":7305,"outputTokens":2,"latencyMs":612.8385410001501},{"questionId":"q190","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":7623,"outputTokens":2,"latencyMs":533.2371249999851},{"questionId":"q190","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":12950,"outputTokens":2,"latencyMs":958.8608749997802},{"questionId":"q190","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8968,"outputTokens":2,"latencyMs":928.7744160001166},{"questionId":"q191","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":11536,"outputTokens":5,"latencyMs":699.7848749998957},{"questionId":"q191","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":7310,"outputTokens":5,"latencyMs":767.8369579999708},{"questionId":"q191","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":7628,"outputTokens":5,"latencyMs":809.0390840000473},{"questionId":"q191","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":12955,"outputTokens":5,"latencyMs":722.1277910000645},{"questionId":"q191","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":8973,"outputTokens":5,"latencyMs":742.7240829998627},{"questionId":"q192","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"62","isCorrect":false,"inputTokens":3873,"outputTokens":1,"latencyMs":580.0553330001421},{"questionId":"q192","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"62","isCorrect":false,"inputTokens":2544,"outputTokens":1,"latencyMs":611.8277079998516},{"questionId":"q192","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"60","isCorrect":true,"inputTokens":1803,"outputTokens":1,"latencyMs":582.2866250001825},{"questionId":"q192","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"60","isCorrect":true,"inputTokens":1662,"outputTokens":1,"latencyMs":609.2173339999281},{"questionId":"q192","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"67","isCorrect":false,"inputTokens":4519,"outputTokens":1,"latencyMs":661.6859169998206},{"questionId":"q192","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"62","isCorrect":false,"inputTokens":3203,"outputTokens":1,"latencyMs":589.9222920001484},{"questionId":"q193","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":3877,"outputTokens":14,"latencyMs":522.1874170000665},{"questionId":"q193","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":2548,"outputTokens":14,"latencyMs":549.2259579999372},{"questionId":"q193","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":1807,"outputTokens":14,"latencyMs":560.921875},{"questionId":"q193","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":1666,"outputTokens":14,"latencyMs":681.8551659998484},{"questionId":"q193","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":4523,"outputTokens":14,"latencyMs":610.0809579999186},{"questionId":"q193","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":3207,"outputTokens":14,"latencyMs":941.1139580002055},{"questionId":"q194","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"revenue","actual":"conversions","isCorrect":false,"inputTokens":3876,"outputTokens":2,"latencyMs":595.4821250000969},{"questionId":"q194","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":2547,"outputTokens":1,"latencyMs":410.87079200008884},{"questionId":"q194","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":1806,"outputTokens":1,"latencyMs":527.0672500003129},{"questionId":"q194","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":3490,"outputTokens":1,"latencyMs":1611.3186250003055},{"questionId":"q194","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"revenue","actual":"conversions","isCorrect":false,"inputTokens":4522,"outputTokens":2,"latencyMs":541.6719589997083},{"questionId":"q194","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":3206,"outputTokens":1,"latencyMs":443.4391669998877},{"questionId":"q195","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":3877,"outputTokens":6,"latencyMs":646.6973330001347},{"questionId":"q195","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":2548,"outputTokens":6,"latencyMs":460.30249999975786},{"questionId":"q195","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":1807,"outputTokens":6,"latencyMs":462.1170000000857},{"questionId":"q195","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":1666,"outputTokens":6,"latencyMs":681.4101669997908},{"questionId":"q195","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":4523,"outputTokens":6,"latencyMs":554.3486249996349},{"questionId":"q195","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":3207,"outputTokens":6,"latencyMs":659.1484580002725},{"questionId":"q196","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"6","isCorrect":true,"inputTokens":3873,"outputTokens":1,"latencyMs":371.490291999653},{"questionId":"q196","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"6","isCorrect":true,"inputTokens":2544,"outputTokens":1,"latencyMs":539.5985829997808},{"questionId":"q196","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"6","isCorrect":true,"inputTokens":1803,"outputTokens":1,"latencyMs":515.8493749997579},{"questionId":"q196","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"6","isCorrect":true,"inputTokens":1662,"outputTokens":1,"latencyMs":477.6968330000527},{"questionId":"q196","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"6","isCorrect":true,"inputTokens":4519,"outputTokens":1,"latencyMs":632.060792000033},{"questionId":"q196","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"6","isCorrect":true,"inputTokens":3203,"outputTokens":1,"latencyMs":475.396583000198},{"questionId":"q197","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":15285,"outputTokens":1,"latencyMs":1091.5453749997541},{"questionId":"q197","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":11589,"outputTokens":1,"latencyMs":772.7851670002565},{"questionId":"q197","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":8912,"outputTokens":1,"latencyMs":879.1698340000585},{"questionId":"q197","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":8662,"outputTokens":1,"latencyMs":802.6152500002645},{"questionId":"q197","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"68","isCorrect":false,"inputTokens":17197,"outputTokens":1,"latencyMs":1066.6569999996573},{"questionId":"q197","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":13283,"outputTokens":1,"latencyMs":815.2937500001863},{"questionId":"q198","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":15290,"outputTokens":24,"latencyMs":993.7714999997988},{"questionId":"q198","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":11594,"outputTokens":24,"latencyMs":801.9431670000777},{"questionId":"q198","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":8917,"outputTokens":24,"latencyMs":784.6215000003576},{"questionId":"q198","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":8667,"outputTokens":24,"latencyMs":862.8154170000926},{"questionId":"q198","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":17202,"outputTokens":24,"latencyMs":1196.5535840000957},{"questionId":"q198","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":13288,"outputTokens":24,"latencyMs":790.2083340003155},{"questionId":"q199","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"forks","actual":"stars","isCorrect":false,"inputTokens":15290,"outputTokens":1,"latencyMs":820.4306660001166},{"questionId":"q199","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"forks","actual":"stars","isCorrect":false,"inputTokens":11594,"outputTokens":1,"latencyMs":1137.4689589999616},{"questionId":"q199","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":8917,"outputTokens":3,"latencyMs":434.1880419999361},{"questionId":"q199","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"forks","actual":"defaultBranch","isCorrect":false,"inputTokens":8667,"outputTokens":2,"latencyMs":723.8139579999261},{"questionId":"q199","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"forks","actual":"stars","isCorrect":false,"inputTokens":17202,"outputTokens":1,"latencyMs":1043.3001669999212},{"questionId":"q199","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"forks","actual":"stars","isCorrect":false,"inputTokens":13288,"outputTokens":1,"latencyMs":663.120625000447},{"questionId":"q200","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":15289,"outputTokens":2,"latencyMs":864.5532499998808},{"questionId":"q200","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":11593,"outputTokens":2,"latencyMs":821.558999999892},{"questionId":"q200","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":8916,"outputTokens":2,"latencyMs":840.106124999933},{"questionId":"q200","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":8666,"outputTokens":2,"latencyMs":885.9072079998441},{"questionId":"q200","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":17201,"outputTokens":2,"latencyMs":1052.6682919999585},{"questionId":"q200","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":13287,"outputTokens":2,"latencyMs":924.2444159998558},{"questionId":"q201","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"11","isCorrect":true,"inputTokens":15286,"outputTokens":1,"latencyMs":844.3746249997057},{"questionId":"q201","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"12","isCorrect":false,"inputTokens":11590,"outputTokens":1,"latencyMs":948.0078329998069},{"questionId":"q201","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"11","isCorrect":true,"inputTokens":8913,"outputTokens":1,"latencyMs":642.2395419999957},{"questionId":"q201","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"11","isCorrect":true,"inputTokens":8663,"outputTokens":1,"latencyMs":635.705959000159},{"questionId":"q201","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"10","isCorrect":false,"inputTokens":17198,"outputTokens":1,"latencyMs":4565.840375000145},{"questionId":"q201","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"11","isCorrect":true,"inputTokens":13284,"outputTokens":1,"latencyMs":741.9597499999218},{"questionId":"q202","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"80","isCorrect":false,"inputTokens":7059,"outputTokens":1,"latencyMs":776.4865409997292},{"questionId":"q202","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"{\"count\":100}","isCorrect":false,"inputTokens":4971,"outputTokens":5,"latencyMs":509.64670799998567},{"questionId":"q202","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"75","isCorrect":true,"inputTokens":6060,"outputTokens":1,"latencyMs":748.3888749997132},{"questionId":"q202","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"65","isCorrect":false,"inputTokens":7969,"outputTokens":1,"latencyMs":633.1895829997957},{"questionId":"q202","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"67","isCorrect":false,"inputTokens":6086,"outputTokens":1,"latencyMs":579.6138330004178},{"questionId":"q203","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":7068,"outputTokens":13,"latencyMs":600.4942499999888},{"questionId":"q203","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":4980,"outputTokens":13,"latencyMs":655.0449999999255},{"questionId":"q203","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":6069,"outputTokens":13,"latencyMs":636.632458999753},{"questionId":"q203","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":7978,"outputTokens":13,"latencyMs":663.1943330001086},{"questionId":"q203","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":6095,"outputTokens":13,"latencyMs":835.4048330001533},{"questionId":"q204","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":7063,"outputTokens":1,"latencyMs":483.99374999990687},{"questionId":"q204","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":4975,"outputTokens":1,"latencyMs":437.3198329997249},{"questionId":"q204","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":6064,"outputTokens":1,"latencyMs":1106.938957999926},{"questionId":"q204","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":7973,"outputTokens":1,"latencyMs":584.410000000149},{"questionId":"q204","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":6090,"outputTokens":1,"latencyMs":390.6225839997642},{"questionId":"q205","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1525,"outputTokens":1,"latencyMs":772.7192919999361},{"questionId":"q205","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1036,"outputTokens":1,"latencyMs":672.0561250001192},{"questionId":"q205","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":788,"outputTokens":1,"latencyMs":674.134958000388},{"questionId":"q205","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"YES","actual":"NO","isCorrect":false,"inputTokens":737,"outputTokens":1,"latencyMs":759.3277080003172},{"questionId":"q205","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"YES","actual":"NO","isCorrect":false,"inputTokens":1708,"outputTokens":1,"latencyMs":616.3170829997398},{"questionId":"q205","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1243,"outputTokens":1,"latencyMs":761.9437910001725},{"questionId":"q206","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1337,"outputTokens":1,"latencyMs":440.14870799984783},{"questionId":"q206","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":920,"outputTokens":1,"latencyMs":861.7682080003433},{"questionId":"q206","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":716,"outputTokens":1,"latencyMs":613.3492499999702},{"questionId":"q206","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":668,"outputTokens":1,"latencyMs":620.8054160000756},{"questionId":"q206","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":3144,"outputTokens":1,"latencyMs":2278.847166999709},{"questionId":"q206","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1095,"outputTokens":1,"latencyMs":534.0277920002118},{"questionId":"q207","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1710,"outputTokens":1,"latencyMs":522.3714589998126},{"questionId":"q207","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1149,"outputTokens":1,"latencyMs":523.8722079996951},{"questionId":"q207","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":857,"outputTokens":1,"latencyMs":1199.117249999661},{"questionId":"q207","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":803,"outputTokens":1,"latencyMs":552.5905840001069},{"questionId":"q207","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1921,"outputTokens":1,"latencyMs":349.7741249999963},{"questionId":"q207","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1387,"outputTokens":1,"latencyMs":648.5670839999802},{"questionId":"q208","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1517,"outputTokens":1,"latencyMs":471.8094999999739},{"questionId":"q208","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1031,"outputTokens":1,"latencyMs":569.4215420000255},{"questionId":"q208","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1261,"outputTokens":1,"latencyMs":645.5075420001522},{"questionId":"q208","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":734,"outputTokens":1,"latencyMs":608.0681250002235},{"questionId":"q208","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1699,"outputTokens":1,"latencyMs":655.7050419999287},{"questionId":"q208","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1236,"outputTokens":1,"latencyMs":626.2335000000894},{"questionId":"q209","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1477,"outputTokens":1,"latencyMs":647.2873749998398},{"questionId":"q209","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1000,"outputTokens":1,"latencyMs":616.4202079996467},{"questionId":"q209","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1227,"outputTokens":1,"latencyMs":674.6432080003433},{"questionId":"q209","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":595,"outputTokens":1,"latencyMs":797.3570420001633},{"questionId":"q209","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1656,"outputTokens":1,"latencyMs":605.5889579998329},{"questionId":"q209","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1202,"outputTokens":1,"latencyMs":780.1308329999447}] ================================================ FILE: benchmarks/results/retrieval-accuracy.md ================================================ Benchmarks test LLM comprehension across different input formats using 209 data retrieval questions on 4 models. Show Dataset Catalog #### Dataset Catalog | Dataset | Rows | Structure | CSV Support | Eligibility | | ------- | ---- | --------- | ----------- | ----------- | | Uniform employee records | 100 | uniform | ✓ | 100% | | E-commerce orders with nested structures | 50 | nested | ✗ | 33% | | Time-series analytics data | 60 | uniform | ✓ | 100% | | Top 100 GitHub repositories | 100 | uniform | ✓ | 100% | | Semi-uniform event logs | 75 | semi-uniform | ✗ | 50% | | Deeply nested configuration | 11 | deep | ✗ | 0% | | Valid complete dataset (control) | 20 | uniform | ✓ | 100% | | Array truncated: 3 rows removed from end | 17 | uniform | ✓ | 100% | | Extra rows added beyond declared length | 23 | uniform | ✓ | 100% | | Inconsistent field count (missing salary in row 10) | 20 | uniform | ✓ | 100% | | Missing required fields (no email in multiple rows) | 20 | uniform | ✓ | 100% | **Structure classes:** - **uniform**: All objects have identical fields with primitive values - **semi-uniform**: Mix of uniform and non-uniform structures - **nested**: Objects with nested structures (nested objects or arrays) - **deep**: Highly nested with minimal tabular eligibility **CSV Support:** ✓ (supported), ✗ (not supported – would require lossy flattening) **Eligibility:** Percentage of arrays that qualify for TOON's tabular format (uniform objects with primitive values) #### Efficiency Ranking (Accuracy per 1K Tokens) Each format ranked by efficiency (accuracy percentage per 1,000 tokens): ``` TOON ████████████████████ 27.7 acc%/1K tok │ 76.4% acc │ 2,759 tokens JSON compact █████████████████░░░ 23.7 acc%/1K tok │ 73.7% acc │ 3,104 tokens YAML ██████████████░░░░░░ 19.9 acc%/1K tok │ 74.5% acc │ 3,749 tokens JSON ████████████░░░░░░░░ 16.4 acc%/1K tok │ 75.0% acc │ 4,587 tokens XML ██████████░░░░░░░░░░ 13.8 acc%/1K tok │ 72.1% acc │ 5,221 tokens ``` *Efficiency score = (Accuracy % ÷ Tokens) × 1,000. Higher is better.* > [!TIP] > TOON achieves **76.4%** accuracy (vs JSON's 75.0%) while using **39.9% fewer tokens**. **Note on CSV:** Excluded from ranking as it only supports 109 of 209 questions (flat tabular data only). While CSV is highly token-efficient for simple tabular data, it cannot represent nested structures that other formats handle. #### Per-Model Accuracy Accuracy across 4 LLMs on 209 data retrieval questions: ``` claude-haiku-4-5-20251001 → TOON ████████████░░░░░░░░ 59.8% (125/209) JSON ███████████░░░░░░░░░ 57.4% (120/209) YAML ███████████░░░░░░░░░ 56.0% (117/209) XML ███████████░░░░░░░░░ 55.5% (116/209) JSON compact ███████████░░░░░░░░░ 55.0% (115/209) CSV ██████████░░░░░░░░░░ 50.5% (55/109) gemini-3-flash-preview XML ████████████████████ 98.1% (205/209) JSON ███████████████████░ 97.1% (203/209) YAML ███████████████████░ 97.1% (203/209) → TOON ███████████████████░ 96.7% (202/209) JSON compact ███████████████████░ 96.7% (202/209) CSV ███████████████████░ 96.3% (105/109) gpt-5-nano → TOON ██████████████████░░ 90.9% (190/209) JSON compact ██████████████████░░ 90.9% (190/209) JSON ██████████████████░░ 89.0% (186/209) CSV ██████████████████░░ 89.0% (97/109) YAML █████████████████░░░ 87.1% (182/209) XML ████████████████░░░░ 80.9% (169/209) grok-4-1-fast-non-reasoning → TOON ████████████░░░░░░░░ 58.4% (122/209) YAML ████████████░░░░░░░░ 57.9% (121/209) JSON ███████████░░░░░░░░░ 56.5% (118/209) XML ███████████░░░░░░░░░ 54.1% (113/209) JSON compact ██████████░░░░░░░░░░ 52.2% (109/209) CSV ██████████░░░░░░░░░░ 51.4% (56/109) ``` > [!TIP] > TOON achieves **76.4% accuracy** (vs JSON's 75.0%) while using **39.9% fewer tokens** on these datasets. Performance by dataset, model, and question type #### Performance by Question Type | Question Type | TOON | JSON | YAML | JSON compact | XML | CSV | | ------------- | ---- | ---- | ---- | ---- | ---- | ---- | | Field Retrieval | 99.6% | 99.3% | 98.5% | 98.5% | 98.9% | 100.0% | | Aggregation | 61.9% | 61.9% | 59.9% | 58.3% | 54.4% | 50.9% | | Filtering | 56.8% | 53.1% | 56.3% | 55.2% | 51.6% | 50.9% | | Structure Awareness | 89.0% | 87.0% | 84.0% | 84.0% | 81.0% | 85.9% | | Structural Validation | 70.0% | 60.0% | 60.0% | 55.0% | 85.0% | 80.0% | #### Performance by Dataset ##### Uniform employee records | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 73.2% | 2,334 | 120/164 | | `toon` | 73.2% | 2,498 | 120/164 | | `json-compact` | 73.8% | 3,924 | 121/164 | | `yaml` | 73.8% | 4,959 | 121/164 | | `json-pretty` | 73.8% | 6,331 | 121/164 | | `xml` | 74.4% | 7,296 | 122/164 | ##### E-commerce orders with nested structures | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `toon` | 82.3% | 7,458 | 135/164 | | `json-compact` | 78.7% | 7,110 | 129/164 | | `yaml` | 79.9% | 8,755 | 131/164 | | `json-pretty` | 79.3% | 11,234 | 130/164 | | `xml` | 77.4% | 12,649 | 127/164 | ##### Time-series analytics data | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 75.0% | 1,411 | 90/120 | | `toon` | 78.3% | 1,553 | 94/120 | | `json-compact` | 74.2% | 2,354 | 89/120 | | `yaml` | 75.8% | 2,954 | 91/120 | | `json-pretty` | 75.0% | 3,681 | 90/120 | | `xml` | 72.5% | 4,389 | 87/120 | ##### Top 100 GitHub repositories | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 65.9% | 8,527 | 87/132 | | `toon` | 66.7% | 8,779 | 88/132 | | `yaml` | 65.2% | 13,141 | 86/132 | | `json-compact` | 59.8% | 11,464 | 79/132 | | `json-pretty` | 63.6% | 15,157 | 84/132 | | `xml` | 56.1% | 17,105 | 74/132 | ##### Semi-uniform event logs | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `json-compact` | 68.3% | 4,839 | 82/120 | | `toon` | 65.0% | 5,819 | 78/120 | | `json-pretty` | 69.2% | 6,817 | 83/120 | | `yaml` | 61.7% | 5,847 | 74/120 | | `xml` | 58.3% | 7,729 | 70/120 | ##### Deeply nested configuration | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `json-compact` | 90.5% | 568 | 105/116 | | `toon` | 94.8% | 655 | 110/116 | | `yaml` | 93.1% | 675 | 108/116 | | `json-pretty` | 92.2% | 924 | 107/116 | | `xml` | 91.4% | 1,013 | 106/116 | ##### Valid complete dataset (control) | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `toon` | 100.0% | 535 | 4/4 | | `json-compact` | 100.0% | 787 | 4/4 | | `yaml` | 100.0% | 992 | 4/4 | | `json-pretty` | 100.0% | 1,274 | 4/4 | | `xml` | 25.0% | 1,462 | 1/4 | | `csv` | 0.0% | 483 | 0/4 | ##### Array truncated: 3 rows removed from end | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 413 | 4/4 | | `xml` | 100.0% | 1,243 | 4/4 | | `toon` | 0.0% | 462 | 0/4 | | `json-pretty` | 0.0% | 1,085 | 0/4 | | `yaml` | 0.0% | 843 | 0/4 | | `json-compact` | 0.0% | 670 | 0/4 | ##### Extra rows added beyond declared length | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 550 | 4/4 | | `toon` | 75.0% | 605 | 3/4 | | `json-compact` | 75.0% | 901 | 3/4 | | `xml` | 100.0% | 1,678 | 4/4 | | `yaml` | 75.0% | 1,138 | 3/4 | | `json-pretty` | 50.0% | 1,460 | 2/4 | ##### Inconsistent field count (missing salary in row 10) | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 480 | 4/4 | | `json-compact` | 100.0% | 782 | 4/4 | | `yaml` | 100.0% | 985 | 4/4 | | `toon` | 100.0% | 1,008 | 4/4 | | `json-pretty` | 100.0% | 1,266 | 4/4 | | `xml` | 100.0% | 1,453 | 4/4 | ##### Missing required fields (no email in multiple rows) | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 340 | 4/4 | | `xml` | 100.0% | 1,409 | 4/4 | | `toon` | 75.0% | 974 | 3/4 | | `json-pretty` | 50.0% | 1,225 | 2/4 | | `yaml` | 25.0% | 951 | 1/4 | | `json-compact` | 0.0% | 750 | 0/4 | #### Performance by Model ##### claude-haiku-4-5-20251001 | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `toon` | 59.8% | 125/209 | | `json-pretty` | 57.4% | 120/209 | | `yaml` | 56.0% | 117/209 | | `xml` | 55.5% | 116/209 | | `json-compact` | 55.0% | 115/209 | | `csv` | 50.5% | 55/109 | ##### gemini-3-flash-preview | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `xml` | 98.1% | 205/209 | | `json-pretty` | 97.1% | 203/209 | | `yaml` | 97.1% | 203/209 | | `toon` | 96.7% | 202/209 | | `json-compact` | 96.7% | 202/209 | | `csv` | 96.3% | 105/109 | ##### gpt-5-nano | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `toon` | 90.9% | 190/209 | | `json-compact` | 90.9% | 190/209 | | `json-pretty` | 89.0% | 186/209 | | `csv` | 89.0% | 97/109 | | `yaml` | 87.1% | 182/209 | | `xml` | 80.9% | 169/209 | ##### grok-4-1-fast-non-reasoning | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `toon` | 58.4% | 122/209 | | `yaml` | 57.9% | 121/209 | | `json-pretty` | 56.5% | 118/209 | | `xml` | 54.1% | 113/209 | | `json-compact` | 52.2% | 109/209 | | `csv` | 51.4% | 56/109 | #### What's Being Measured This benchmark tests **LLM comprehension and data retrieval accuracy** across different input formats. Each LLM receives formatted data and must answer questions about it. This does **not** test the model's ability to generate TOON output – only to read and understand it. #### Datasets Tested Eleven datasets designed to test different structural patterns and validation capabilities: **Primary datasets:** 1. **Tabular** (100 employee records): Uniform objects with identical fields – optimal for TOON's tabular format. 2. **Nested** (50 e-commerce orders): Complex structures with nested customer objects and item arrays. 3. **Analytics** (60 days of metrics): Time-series data with dates and numeric values. 4. **GitHub** (100 repositories): Real-world data from top GitHub repos by stars. 5. **Event Logs** (75 logs): Semi-uniform data with ~50% flat logs and ~50% with nested error objects. 6. **Nested Config** (1 configuration): Deeply nested configuration with minimal tabular eligibility. **Structural validation datasets:** 7. **Control**: Valid complete dataset (baseline for validation) 8. **Truncated**: Array with 3 rows removed from end (tests `[N]` length detection) 9. **Extra rows**: Array with 3 additional rows beyond declared length 10. **Width mismatch**: Inconsistent field count (missing salary in row 10) 11. **Missing fields**: Systematic field omissions (no email in multiple rows) #### Question Types 209 questions are generated dynamically across five categories: - **Field retrieval (33%)**: Direct value lookups or values that can be read straight off a record (including booleans and simple counts such as array lengths) - Example: "What is Alice's salary?" → `75000` - Example: "How many items are in order ORD-0042?" → `3` - Example: "What is the customer name for order ORD-0042?" → `John Doe` - **Aggregation (30%)**: Dataset-level totals and averages plus single-condition filters (counts, sums, min/max comparisons) - Example: "How many employees work in Engineering?" → `17` - Example: "What is the total revenue across all orders?" → `45123.50` - Example: "How many employees have salary > 80000?" → `23` - **Filtering (23%)**: Multi-condition queries requiring compound logic (AND constraints across fields) - Example: "How many employees in Sales have salary > 80000?" → `5` - Example: "How many active employees have more than 10 years of experience?" → `8` - **Structure awareness (12%)**: Tests format-native structural affordances (TOON's `[N]` count and `{fields}`, CSV's header row) - Example: "How many employees are in the dataset?" → `100` - Example: "List the field names for employees" → `id, name, email, department, salary, yearsExperience, active` - Example: "What is the department of the last employee?" → `Sales` - **Structural validation (2%)**: Tests ability to detect incomplete, truncated, or corrupted data using structural metadata - Example: "Is this data complete and valid?" → `YES` (control dataset) or `NO` (corrupted datasets) - Tests TOON's `[N]` length validation and `{fields}` consistency checking - Demonstrates CSV's lack of structural validation capabilities #### Evaluation Process 1. **Format conversion**: Each dataset is converted to all 6 formats (TOON, JSON, YAML, JSON compact, XML, CSV). 2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer. 3. **Validate deterministically**: Answers are validated using type-aware comparison (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`) without requiring an LLM judge. #### Models & Configuration - **Models tested**: `claude-haiku-4-5-20251001`, `gemini-3-flash-preview`, `gpt-5-nano`, `grok-4-1-fast-non-reasoning` - **Token counting**: Using `gpt-tokenizer` with `o200k_base` encoding (GPT-5 tokenizer) - **Temperature**: Not set (models use their defaults) - **Total evaluations**: 209 questions × 6 formats × 4 models = 5,016 LLM calls ================================================ FILE: benchmarks/results/token-efficiency.md ================================================ #### Mixed-Structure Track Datasets with nested or semi-uniform structures. CSV excluded as it cannot properly represent these structures. ``` 🛒 E-commerce orders with nested structures ┊ Tabular: 33% │ TOON █████████████░░░░░░░ 73,126 tokens ├─ vs JSON (−33.3%) 109,599 tokens ├─ vs JSON compact (+5.3%) 69,459 tokens ├─ vs YAML (−14.4%) 85,415 tokens └─ vs XML (−40.7%) 123,344 tokens 🧾 Semi-uniform event logs ┊ Tabular: 50% │ TOON █████████████████░░░ 154,084 tokens ├─ vs JSON (−15.0%) 181,201 tokens ├─ vs JSON compact (+19.9%) 128,529 tokens ├─ vs YAML (−0.8%) 155,397 tokens └─ vs XML (−25.2%) 205,859 tokens 🧩 Deeply nested configuration ┊ Tabular: 0% │ TOON ██████████████░░░░░░ 620 tokens ├─ vs JSON (−31.9%) 911 tokens ├─ vs JSON compact (+11.1%) 558 tokens ├─ vs YAML (−6.3%) 662 tokens └─ vs XML (−38.2%) 1,003 tokens ──────────────────────────────────── Total ──────────────────────────────────── TOON ████████████████░░░░ 227,830 tokens ├─ vs JSON (−21.9%) 291,711 tokens ├─ vs JSON compact (+14.7%) 198,546 tokens ├─ vs YAML (−5.7%) 241,474 tokens └─ vs XML (−31.0%) 330,206 tokens ``` #### Flat-Only Track Datasets with flat tabular structures where CSV is applicable. ``` 👥 Uniform employee records ┊ Tabular: 100% │ CSV ███████████████████░ 47,102 tokens TOON ████████████████████ 49,919 tokens (+6.0% vs CSV) ├─ vs JSON (−60.7%) 127,063 tokens ├─ vs JSON compact (−36.9%) 79,059 tokens ├─ vs YAML (−50.1%) 100,011 tokens └─ vs XML (−65.9%) 146,579 tokens 📈 Time-series analytics data ┊ Tabular: 100% │ CSV ██████████████████░░ 8,383 tokens TOON ████████████████████ 9,115 tokens (+8.7% vs CSV) ├─ vs JSON (−59.0%) 22,245 tokens ├─ vs JSON compact (−35.9%) 14,211 tokens ├─ vs YAML (−49.0%) 17,858 tokens └─ vs XML (−65.8%) 26,616 tokens ⭐ Top 100 GitHub repositories ┊ Tabular: 100% │ CSV ███████████████████░ 8,512 tokens TOON ████████████████████ 8,744 tokens (+2.7% vs CSV) ├─ vs JSON (−42.3%) 15,144 tokens ├─ vs JSON compact (−23.7%) 11,454 tokens ├─ vs YAML (−33.4%) 13,128 tokens └─ vs XML (−48.9%) 17,095 tokens ──────────────────────────────────── Total ──────────────────────────────────── CSV ███████████████████░ 63,997 tokens TOON ████████████████████ 67,778 tokens (+5.9% vs CSV) ├─ vs JSON (−58.8%) 164,452 tokens ├─ vs JSON compact (−35.3%) 104,724 tokens ├─ vs YAML (−48.3%) 130,997 tokens └─ vs XML (−64.4%) 190,290 tokens ``` Show detailed examples #### 📈 Time-series analytics data **Savings:** 13,130 tokens (59.0% reduction vs JSON) **JSON** (22,245 tokens): ```json { "metrics": [ { "date": "2025-01-01", "views": 6138, "clicks": 174, "conversions": 12, "revenue": 2712.49, "bounceRate": 0.35 }, { "date": "2025-01-02", "views": 4616, "clicks": 274, "conversions": 34, "revenue": 9156.29, "bounceRate": 0.56 }, { "date": "2025-01-03", "views": 4460, "clicks": 143, "conversions": 8, "revenue": 1317.98, "bounceRate": 0.59 }, { "date": "2025-01-04", "views": 4740, "clicks": 125, "conversions": 13, "revenue": 2934.77, "bounceRate": 0.37 }, { "date": "2025-01-05", "views": 6428, "clicks": 369, "conversions": 19, "revenue": 1317.24, "bounceRate": 0.3 } ] } ``` **TOON** (9,115 tokens): ``` metrics[5]{date,views,clicks,conversions,revenue,bounceRate}: 2025-01-01,6138,174,12,2712.49,0.35 2025-01-02,4616,274,34,9156.29,0.56 2025-01-03,4460,143,8,1317.98,0.59 2025-01-04,4740,125,13,2934.77,0.37 2025-01-05,6428,369,19,1317.24,0.3 ``` --- #### ⭐ Top 100 GitHub repositories **Savings:** 6,400 tokens (42.3% reduction vs JSON) **JSON** (15,144 tokens): ```json { "repositories": [ { "id": 28457823, "name": "freeCodeCamp", "repo": "freeCodeCamp/freeCodeCamp", "description": "freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…", "createdAt": "2014-12-24T17:49:19Z", "updatedAt": "2025-10-28T11:58:08Z", "pushedAt": "2025-10-28T10:17:16Z", "stars": 430886, "watchers": 8583, "forks": 42146, "defaultBranch": "main" }, { "id": 132750724, "name": "build-your-own-x", "repo": "codecrafters-io/build-your-own-x", "description": "Master programming by recreating your favorite technologies from scratch.", "createdAt": "2018-05-09T12:03:18Z", "updatedAt": "2025-10-28T12:37:11Z", "pushedAt": "2025-10-10T18:45:01Z", "stars": 430877, "watchers": 6332, "forks": 40453, "defaultBranch": "master" }, { "id": 21737465, "name": "awesome", "repo": "sindresorhus/awesome", "description": "😎 Awesome lists about all kinds of interesting topics", "createdAt": "2014-07-11T13:42:37Z", "updatedAt": "2025-10-28T12:40:21Z", "pushedAt": "2025-10-27T17:57:31Z", "stars": 410052, "watchers": 8017, "forks": 32029, "defaultBranch": "main" } ] } ``` **TOON** (8,744 tokens): ``` repositories[3]{id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch}: 28457823,freeCodeCamp,freeCodeCamp/freeCodeCamp,"freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…","2014-12-24T17:49:19Z","2025-10-28T11:58:08Z","2025-10-28T10:17:16Z",430886,8583,42146,main 132750724,build-your-own-x,codecrafters-io/build-your-own-x,Master programming by recreating your favorite technologies from scratch.,"2018-05-09T12:03:18Z","2025-10-28T12:37:11Z","2025-10-10T18:45:01Z",430877,6332,40453,master 21737465,awesome,sindresorhus/awesome,😎 Awesome lists about all kinds of interesting topics,"2014-07-11T13:42:37Z","2025-10-28T12:40:21Z","2025-10-27T17:57:31Z",410052,8017,32029,main ``` ================================================ FILE: benchmarks/scripts/accuracy-benchmark.ts ================================================ import type { Question } from '../src/types.ts' import * as fsp from 'node:fs/promises' import * as path from 'node:path' import process from 'node:process' import * as prompts from '@clack/prompts' import PQueue from 'p-queue' import { BENCHMARKS_DIR, DEFAULT_CONCURRENCY, DRY_RUN, DRY_RUN_LIMITS, MODEL_RPM_LIMITS, ROOT_DIR } from '../src/constants.ts' import { ACCURACY_DATASETS } from '../src/datasets.ts' import { evaluateQuestion, models } from '../src/evaluate.ts' import { formatters, supportsCSV } from '../src/formatters.ts' import { generateQuestions } from '../src/questions/index.ts' import { calculateFormatResults, calculateTokenCounts, generateAccuracyReport } from '../src/report.ts' import { getAllModelResults, hasModelResults, saveModelResults } from '../src/storage.ts' import { ensureDir } from '../src/utils.ts' // Constants const PROGRESS_UPDATE_INTERVAL = 10 const RATE_LIMIT_INTERVAL_MS = 60_000 prompts.intro('Retrieval Accuracy Benchmark') /** * Generate evaluation tasks for a model */ function generateEvaluationTasks(questions: Question[]): { question: Question, formatName: string }[] { const tasks: { question: Question, formatName: string }[] = [] for (const question of questions) { for (const [formatName] of Object.entries(formatters)) { // Skip CSV for datasets that don't support it const dataset = ACCURACY_DATASETS.find(d => d.name === question.dataset) if (formatName === 'csv' && dataset && !supportsCSV(dataset)) continue tasks.push({ question, formatName }) } } return tasks } /** * Check which models already have saved results */ async function checkExistingResults(activeModels: typeof models) { const existingModelResults: Record = {} for (const model of activeModels) { const existingResult = await hasModelResults(model.modelId) if (existingResult) existingModelResults[model.modelId] = existingResult } return existingModelResults } /** * Create a progress updater function */ function createProgressUpdater(spinner: ReturnType, total: number) { let completed = 0 return () => { completed++ if (completed % PROGRESS_UPDATE_INTERVAL === 0 || completed === total) { const percent = ((completed / total) * 100).toFixed(1) spinner.message(`Progress: ${completed}/${total} (${percent}%)`) } } } /** * Create a rate-limited queue for model evaluation */ function createEvaluationQueue(modelId: string) { const rpmLimit = MODEL_RPM_LIMITS[modelId] return new PQueue({ concurrency: DEFAULT_CONCURRENCY, intervalCap: rpmLimit ?? Infinity, interval: rpmLimit ? RATE_LIMIT_INTERVAL_MS : 0, }) } // Prompt user to select which models to benchmark const modelChoices = models.map(({ modelId }) => ({ value: modelId, label: modelId, })) const selectedModels = await prompts.multiselect({ message: 'Select models to benchmark (Space to select, Enter to confirm)', options: modelChoices, required: true, }) if (prompts.isCancel(selectedModels)) { prompts.cancel('Benchmark cancelled') process.exit(0) } const activeModels = models.filter(m => selectedModels.includes(m.modelId)) prompts.log.info(`Selected ${activeModels.length} model(s): ${activeModels.map(m => m.modelId).join(', ')}`) // Check which models already have results const existingModelResults = await checkExistingResults(activeModels) if (Object.keys(existingModelResults).length > 0) { prompts.log.info(`Found existing results for ${Object.keys(existingModelResults).length} model(s)`) } if (DRY_RUN) { prompts.log.info('Limiting questions and models for dry run') } let questions = generateQuestions() // Apply dry run limits if enabled if (DRY_RUN && DRY_RUN_LIMITS.maxQuestions) { questions = questions.slice(0, DRY_RUN_LIMITS.maxQuestions) } prompts.log.info(`Evaluating ${questions.length} questions`) prompts.log.info(`Testing ${Object.keys(formatters).length} formats`) // Evaluate each model separately and save results incrementally for (const model of activeModels) { const modelId = model.modelId // Skip if results already exist if (existingModelResults[modelId]) { prompts.log.info(`Skipping ${modelId} (results already exist)`) continue } prompts.log.step(`Running benchmark for ${modelId}`) // Generate evaluation tasks for this model const tasks = generateEvaluationTasks(questions) const total = tasks.length const rpmLimit = MODEL_RPM_LIMITS[modelId] const queue = createEvaluationQueue(modelId) const evalSpinner = prompts.spinner() evalSpinner.start(`Running ${total} evaluations (concurrency: ${DEFAULT_CONCURRENCY}, RPM limit: ${rpmLimit ?? 'unlimited'})`) const updateProgress = createProgressUpdater(evalSpinner, total) // Queue all tasks const modelResultPromises = tasks.map(task => queue.add(async () => { // Format data on-demand const dataset = ACCURACY_DATASETS.find(d => d.name === task.question.dataset)! const formatter = formatters[task.formatName]! const formattedData = formatter(dataset.data) const result = await evaluateQuestion({ question: task.question, formatName: task.formatName, formattedData, model, }) // Progress update after task completes updateProgress() return result }), ) // Wait for all tasks to complete const modelResults = await Promise.all(modelResultPromises) evalSpinner.stop(`Evaluation complete for ${modelId}`) // Save results immediately for this model await saveModelResults(modelId, modelResults) prompts.log.success(`Saved results for ${modelId}`) } // Generate/regenerate markdown report from all available model results const reportSpinner = prompts.spinner() reportSpinner.start('Generating report from all model results') // Load all available model results (including any that were skipped) const allModelResults = await getAllModelResults() const allResults = Object.values(allModelResults).flat() if (allResults.length === 0) { prompts.log.warn('No results available to generate report') process.exit(0) } const tokenCounts = calculateTokenCounts(formatters) const formatResults = calculateFormatResults(allResults, tokenCounts) const accuracyReport = generateAccuracyReport(allResults, formatResults, tokenCounts) const resultsDir = path.join(BENCHMARKS_DIR, 'results') await ensureDir(resultsDir) const outputFilePath = path.join(resultsDir, 'retrieval-accuracy.md') await fsp.writeFile(outputFilePath, accuracyReport) reportSpinner.stop('Report generation complete!') prompts.log.info(`Report saved to: \`${path.relative(ROOT_DIR, outputFilePath)}\``) ================================================ FILE: benchmarks/scripts/fetch-github-repos.ts ================================================ import * as fsp from 'node:fs/promises' import * as path from 'node:path' import process from 'node:process' import * as prompts from '@clack/prompts' import { ofetch } from 'ofetch' import pMap from 'p-map' import { BENCHMARKS_DIR } from '../src/constants.ts' import { ensureDir } from '../src/utils.ts' prompts.intro('GitHub Repositories Fetcher') try { // Fetch top 100 repos from GitHub const repoList = await searchTop100Repos() const repos = await fetchRepoDetails(repoList) if (repos.length === 0) { prompts.log.error('No repositories fetched. Exiting.') process.exit(1) } // Sort by stars descending repos.sort((a, b) => b.stars - a.stars) await saveRepos(repos) prompts.log.success('Done!') } catch (error) { prompts.log.error(String(error)) process.exit(1) } async function searchTop100Repos(): Promise { const s = prompts.spinner() s.start('Fetching top 100 starred repositories') const response = await ofetch<{ items: { full_name: string }[] }>( 'https://api.github.com/search/repositories', { query: { q: 'stars:>1', sort: 'stars', order: 'desc', per_page: 100, }, headers: { 'Accept': 'application/vnd.github+json', 'X-GitHub-Api-Version': '2022-11-28', }, }, ) s.stop('Fetched top 100 repositories') return response.items.map(item => item.full_name) } async function fetchRepoDetails(repoList: string[]): Promise[]> { const s = prompts.spinner() s.start(`Fetching ${repoList.length} GitHub repositories`) const repos = await pMap( repoList, async (repoPath, index) => { s.message(`[${index + 1}/${repoList.length}] Fetching ${repoPath}`) const { repo } = await ofetch(`https://ungh.cc/repos/${repoPath}`) return repo }, { concurrency: 5 }, ) s.stop(`Successfully fetched ${repos.length}/${repoList.length} repositories`) return repos } async function saveRepos(repos: Record[]): Promise { const outputDir = path.join(BENCHMARKS_DIR, 'data') const outputFile = path.join(outputDir, 'github-repos.json') await ensureDir(outputDir) const jsonOutput = JSON.stringify(repos, undefined, 2) await fsp.writeFile(outputFile, `${jsonOutput}\n`, 'utf-8') const relativePath = path.relative(BENCHMARKS_DIR, outputFile) prompts.log.info(`Result saved to \`${relativePath}\``) } ================================================ FILE: benchmarks/scripts/token-efficiency-benchmark.ts ================================================ import type { Dataset } from '../src/types.ts' import * as fsp from 'node:fs/promises' import * as path from 'node:path' import * as prompts from '@clack/prompts' import { encode } from '../../packages/toon/src/index.ts' import { BENCHMARKS_DIR, FORMATTER_DISPLAY_NAMES, ROOT_DIR } from '../src/constants.ts' import { TOKEN_EFFICIENCY_DATASETS } from '../src/datasets.ts' import { formatters, supportsCSV } from '../src/formatters.ts' import { createProgressBar, ensureDir, tokenize } from '../src/utils.ts' interface FormatMetrics { name: string tokens: number savings: number savingsPercent: number } interface BenchmarkResult { dataset: Dataset formats: FormatMetrics[] } // Constants const DATASET_ICONS: Record = { 'tabular': '👥', 'nested': '🛒', 'analytics': '📈', 'github': '⭐', 'event-logs': '🧾', 'nested-config': '🧩', } const COMPARISON_FORMAT_ORDER = ['json-pretty', 'json-compact', 'yaml', 'xml'] as const const PROGRESS_BAR_WIDTH = 20 const TOKEN_PADDING = 7 const DEFAULT_DATASET_ICON = '📊' const DETAILED_EXAMPLE_DATASETS = ['github', 'analytics'] as const const GITHUB_REPO_LIMIT = 3 const GITHUB_DESC_LIMIT = 80 const ANALYTICS_METRICS_LIMIT = 5 prompts.intro('Token Efficiency Benchmark') /** * Format a comparison line showing savings vs TOON */ function formatComparisonLine(format: FormatMetrics, isLast: boolean = false): string { const label = FORMATTER_DISPLAY_NAMES[format.name] || format.name.toUpperCase() const signedPercent = format.savingsPercent >= 0 ? `−${format.savingsPercent.toFixed(1)}%` : `+${Math.abs(format.savingsPercent).toFixed(1)}%` const connector = isLast ? '└─' : '├─' const tokenStr = format.tokens.toLocaleString('en-US').padStart(TOKEN_PADDING) return `${connector} vs ${label.padEnd(13)} ${`(${signedPercent})`.padEnd(20)} ${tokenStr} tokens` } /** * Calculate total tokens and savings for a set of datasets */ function calculateTotalMetrics(datasets: BenchmarkResult[], formatNames: readonly string[]) { const totalToonTokens = datasets.reduce((sum, r) => { const toon = r.formats.find(f => f.name === 'toon')! return sum + toon.tokens }, 0) const totals = formatNames.map((formatName) => { const totalTokens = datasets.reduce((sum, r) => { const format = r.formats.find(f => f.name === formatName) return sum + (format?.tokens || 0) }, 0) const savings = totalTokens - totalToonTokens const savingsPercent = (savings / totalTokens) * 100 return { name: formatName, tokens: totalTokens, savingsPercent } }) return { totalToonTokens, totals } } /** * Generate total lines for a track */ function generateTotalLines( totalToonTokens: number, totals: { name: string, tokens: number, savingsPercent: number }[], baselineFormat?: { name: string, tokens: number }, ) { const separatorHalf = '─'.repeat(36) const lines = [`${separatorHalf} Total ${separatorHalf}`] if (baselineFormat) { // Flat-only track with CSV baseline const csvPercentage = Math.min(100, (baselineFormat.tokens / totalToonTokens) * 100) const csvBar = createProgressBar(csvPercentage, 100, PROGRESS_BAR_WIDTH) const csvStr = baselineFormat.tokens.toLocaleString('en-US').padStart(TOKEN_PADDING) lines.push(` CSV ${csvBar} ${csvStr} tokens`) const overheadPercent = ((totalToonTokens - baselineFormat.tokens) / baselineFormat.tokens) * 100 const toonBar = createProgressBar(100, 100, PROGRESS_BAR_WIDTH) const toonStr = totalToonTokens.toLocaleString('en-US').padStart(TOKEN_PADDING) lines.push(` TOON ${toonBar} ${toonStr} tokens (+${overheadPercent.toFixed(1)}% vs CSV)`) } else { // Mixed-structure track const totalPercentage = Math.min(100, (totalToonTokens / totals[0]!.tokens) * 100) const totalBar = createProgressBar(totalPercentage, 100, PROGRESS_BAR_WIDTH) const toonStr = totalToonTokens.toLocaleString('en-US').padStart(TOKEN_PADDING) lines.push(` TOON ${totalBar} ${toonStr} tokens`) } // Add comparison lines for (let i = 0; i < totals.length; i++) { const format = totals[i]! const isLast = i === totals.length - 1 lines.push(` ${formatComparisonLine({ name: format.name, tokens: format.tokens, savings: 0, // Not used in this context savingsPercent: format.savingsPercent, }, isLast)}`) } return lines.join('\n') } /** * Generate bar chart for a dataset */ function generateDatasetChart(result: BenchmarkResult): string { const { dataset, formats } = result const toon = formats.find(f => f.name === 'toon')! const jsonPretty = formats.find(f => f.name === 'json-pretty')! const emoji = DATASET_ICONS[dataset.name] || DEFAULT_DATASET_ICON const eligibility = dataset.metadata.tabularEligibility const name = dataset.description const percentage = Math.min(100, 100 - jsonPretty.savingsPercent) const bar = createProgressBar(percentage, 100, PROGRESS_BAR_WIDTH) const toonStr = toon.tokens.toLocaleString('en-US') const line1 = `${emoji} ${name} ┊ Tabular: ${eligibility}%` const line2 = ` │` const line3 = ` TOON ${bar} ${toonStr.padStart(TOKEN_PADDING)} tokens` const comparisonLines = COMPARISON_FORMAT_ORDER.map((formatName, index, array) => { const format = formats.find(f => f.name === formatName) if (!format) return undefined return ` ${formatComparisonLine(format, index === array.length - 1)}` }).filter(Boolean) return [line1, line2, line3, ...comparisonLines].join('\n') } const results: BenchmarkResult[] = [] // Calculate token counts for all datasets for (const dataset of TOKEN_EFFICIENCY_DATASETS) { const formatMetrics: FormatMetrics[] = [] const tokensByFormat: Record = {} // Calculate tokens for each format for (const [formatName, formatter] of Object.entries(formatters)) { // Skip CSV for datasets that don't support it if (formatName === 'csv' && !supportsCSV(dataset)) continue const formattedData = formatter(dataset.data) const tokens = tokenize(formattedData) tokensByFormat[formatName] = tokens } // Calculate savings vs TOON const toonTokens = tokensByFormat.toon! for (const [formatName, tokens] of Object.entries(tokensByFormat)) { const savings = tokens - toonTokens formatMetrics.push({ name: formatName, tokens, savings, savingsPercent: formatName === 'toon' ? 0 : (savings / tokens) * 100, }) } results.push({ dataset, formats: formatMetrics, }) } // Separate datasets by CSV support const mixedStructureDatasets = results.filter(r => !supportsCSV(r.dataset)) const flatOnlyDatasets = results.filter(r => supportsCSV(r.dataset)) // Mixed-Structure Track (no CSV) const mixedCharts = mixedStructureDatasets .map(result => generateDatasetChart(result)) .join('\n\n') // Flat-Only Track (with CSV) const flatCharts = flatOnlyDatasets .map((result) => { const csv = result.formats.find(f => f.name === 'csv') const toon = result.formats.find(f => f.name === 'toon')! if (!csv) return generateDatasetChart(result) // Special handling to show CSV first with TOON overhead const { dataset } = result const emoji = DATASET_ICONS[dataset.name] || DEFAULT_DATASET_ICON const eligibility = dataset.metadata.tabularEligibility const name = dataset.description // CSV line const csvPercentage = Math.min(100, (csv.tokens / toon.tokens) * 100) const csvBar = createProgressBar(csvPercentage, 100, PROGRESS_BAR_WIDTH) const csvStr = csv.tokens.toLocaleString('en-US') const line1 = `${emoji} ${name} ┊ Tabular: ${eligibility}%` const line2 = ` │` const line3 = ` CSV ${csvBar} ${csvStr.padStart(TOKEN_PADDING)} tokens` const toonOverhead = toon.tokens - csv.tokens const toonOverheadPercent = (toonOverhead / csv.tokens) * 100 const toonBar = createProgressBar(100, 100, PROGRESS_BAR_WIDTH) const toonStr = toon.tokens.toLocaleString('en-US') const toonVsCSV = toonOverheadPercent >= 0 ? `(+${toonOverheadPercent.toFixed(1)}% vs CSV)` : `(${toonOverheadPercent.toFixed(1)}% vs CSV)` const toonLine = ` TOON ${toonBar} ${toonStr.padStart(TOKEN_PADDING)} tokens ${toonVsCSV}` // Other format comparisons (vs TOON) const comparisonLines = COMPARISON_FORMAT_ORDER.map((formatName, index, array) => { const format = result.formats.find(f => f.name === formatName) if (!format) return undefined return ` ${formatComparisonLine(format, index === array.length - 1)}` }).filter(Boolean) return [line1, line2, line3, toonLine, ...comparisonLines].join('\n') }) .join('\n\n') // Calculate totals for mixed structure const { totalToonTokens: totalToonTokensMixed, totals: mixedTotals } = calculateTotalMetrics(mixedStructureDatasets, COMPARISON_FORMAT_ORDER) const mixedTotalLines = generateTotalLines(totalToonTokensMixed, mixedTotals) // Calculate totals for flat-only const { totalToonTokens: totalToonTokensFlat, totals: flatTotals } = calculateTotalMetrics(flatOnlyDatasets, COMPARISON_FORMAT_ORDER) const totalCSVTokensFlat = flatOnlyDatasets.reduce((sum, r) => { const csv = r.formats.find(f => f.name === 'csv') return sum + (csv?.tokens || 0) }, 0) const flatTotalLines = generateTotalLines(totalToonTokensFlat, flatTotals, { name: 'csv', tokens: totalCSVTokensFlat }) const barChartSection = ` #### Mixed-Structure Track Datasets with nested or semi-uniform structures. CSV excluded as it cannot properly represent these structures. \`\`\` ${mixedCharts} ${mixedTotalLines} \`\`\` #### Flat-Only Track Datasets with flat tabular structures where CSV is applicable. \`\`\` ${flatCharts} ${flatTotalLines} \`\`\` `.trim() // Generate detailed examples (optional: show a few examples) const detailedExamples = results .filter(r => DETAILED_EXAMPLE_DATASETS.includes(r.dataset.name as any)) .map((result, i, filtered) => { let displayData = result.dataset.data // Truncate for display if (result.dataset.name === 'github') { displayData = { repositories: displayData.repositories.slice(0, GITHUB_REPO_LIMIT).map((repo: Record) => ({ ...repo, description: repo.description?.slice(0, GITHUB_DESC_LIMIT) + (repo.description?.length > GITHUB_DESC_LIMIT ? '…' : ''), })), } } else if (result.dataset.name === 'analytics') { displayData = { metrics: displayData.metrics.slice(0, ANALYTICS_METRICS_LIMIT) } } const emoji = DATASET_ICONS[result.dataset.name] || DEFAULT_DATASET_ICON const json = result.formats.find(f => f.name === 'json-pretty')! const toon = result.formats.find(f => f.name === 'toon')! const separator = i < filtered.length - 1 ? '---' : '' return ` #### ${emoji} ${result.dataset.description} **Savings:** ${json.savings.toLocaleString('en-US')} tokens (${json.savingsPercent.toFixed(1)}% reduction vs JSON) **JSON** (${json.tokens.toLocaleString('en-US')} tokens): \`\`\`json ${JSON.stringify(displayData, undefined, 2)} \`\`\` **TOON** (${toon.tokens.toLocaleString('en-US')} tokens): \`\`\` ${encode(displayData)} \`\`\` ${separator} `.trim() }) .join('\n\n') const markdown = ` ${barChartSection} Show detailed examples ${detailedExamples} `.trimStart() prompts.log.message(barChartSection) const resultsDir = path.join(BENCHMARKS_DIR, 'results') await ensureDir(resultsDir) const outputFilePath = path.join(resultsDir, 'token-efficiency.md') await fsp.writeFile(outputFilePath, markdown, 'utf-8') prompts.log.success(`Report saved to \`${path.relative(ROOT_DIR, outputFilePath)}\``) ================================================ FILE: benchmarks/src/constants.ts ================================================ import process from 'node:process' import * as url from 'node:url' export const ROOT_DIR: string = url.fileURLToPath(new URL('../../', import.meta.url)) export const BENCHMARKS_DIR: string = url.fileURLToPath(new URL('../', import.meta.url)) /** * Default concurrency for parallel evaluations to prevent bursting */ export const DEFAULT_CONCURRENCY = 10 /** * Enable dry run mode for quick testing with limited AI requests * * @remarks * Set via environment variable: `DRY_RUN=true`. */ export const DRY_RUN: boolean = process.env.DRY_RUN === 'true' /** * Limits applied during dry run mode */ export const DRY_RUN_LIMITS = { /** Maximum number of questions to evaluate */ maxQuestions: 10, } /** * Model-specific RPM (requests per minute) limits to handle API quotas * * @remarks * Set `undefined` for models without specific limits. */ /// keep-sorted export const MODEL_RPM_LIMITS: Record = { 'claude-haiku-4-5-20251001': 50, 'gemini-3-flash-preview': 25, 'gpt-5-nano': 50, 'grok-4-1-fast-non-reasoning': 25, } /** * Display names for data format types */ export const FORMATTER_DISPLAY_NAMES: Record = { 'json-pretty': 'JSON', 'json-compact': 'JSON compact', 'toon': 'TOON', 'csv': 'CSV', 'xml': 'XML', 'yaml': 'YAML', } as const /** * Question type identifiers */ export const QUESTION_TYPES = [ 'field-retrieval', 'retrieval', 'aggregation', 'filtering', 'structure-awareness', 'structural-validation', ] as const /** * Display names for question types */ export const QUESTION_TYPE_LABELS = { 'field-retrieval': 'Field Retrieval', 'retrieval': 'Retrieval', 'aggregation': 'Aggregation', 'filtering': 'Filtering', 'structure-awareness': 'Structure Awareness', 'structural-validation': 'Structural Validation', } as const /** * Dataset identifiers */ export const DATASET_NAMES = [ 'tabular', 'nested', 'analytics', 'github', 'event-logs', 'nested-config', 'large-uniform', 'structural-validation-control', 'structural-validation-truncated', 'structural-validation-extra-rows', 'structural-validation-width-mismatch', 'structural-validation-missing-fields', ] as const /** * Structure class identifiers */ export const STRUCTURE_CLASSES = [ 'uniform', 'semi-uniform', 'nested', 'deep', ] as const /** * Threshold values for filtering and aggregation questions */ export const QUESTION_THRESHOLDS = { tabular: { salaryRanges: [60000, 80000, 100000], experienceYears: [5, 10, 15, 20], departmentSalaryThreshold: 80000, departmentExperienceThreshold: 10, }, nested: { highValueOrders: [200, 400, 600], statusValueThreshold: 300, itemCountThreshold: 3, totalThresholdsForItems: [300, 500], }, analytics: { views: [6000], conversions: [20], viewsForFiltering: [6000, 7000], conversionsForFiltering: 15, revenueThresholds: [1000, 1500, 2000], viewsThresholdForRevenue: 6000, clicksForFiltering: [250, 400], conversionsForClickFiltering: 15, revenueForBounceRate: [1000, 1500], bounceRateThreshold: 0.5, }, github: { stars: [100000, 150000, 200000], forks: [20000, 35000], watchers: [8000], starForkCombinations: [ { stars: 75000, forks: 15000 }, { stars: 100000, forks: 20000 }, { stars: 150000, forks: 30000 }, { stars: 200000, forks: 45000 }, ], starWatcherCombinations: [ { stars: 100000, watchers: 7000 }, { stars: 150000, watchers: 9000 }, ], }, } as const /** * Question generation configuration */ export const QUESTION_LIMITS = { tabular: { fieldRetrieval: 12, aggregationDepartments: 3, filteringMultiConditionDepartments: 5, filteringExperience: 3, filteringDepartmentExp: 3, filteringDepartmentActive: 2, }, nested: { fieldRetrievalOrders: 8, fieldRetrievalCustomers: 8, aggregationStatuses: 3, filteringStatusAndValue: 4, filteringStatusAndItems: 3, }, analytics: { fieldRetrievalDates: 9, }, github: { fieldRetrievalRepos: 11, aggregationBranches: 2, filteringStarsAndForks: 3, }, eventLogs: { fieldRetrieval: 10, aggregationEndpoints: 2, filteringLevelAndStatus: 3, filteringEndpointAndStatus: 3, filteringEndpointRetryable: 2, }, nestedConfig: { fieldRetrieval: 10, filteringComplex: 5, }, } as const ================================================ FILE: benchmarks/src/datasets.ts ================================================ import type { Dataset } from './types.ts' import { faker } from '@faker-js/faker' import githubRepos from '../data/github-repos.json' with { type: 'json' } // Seed for reproducibility faker.seed(12345) /** * Employee record structure for tabular dataset */ export interface Employee { id: number name: string email: string department: string salary: number yearsExperience: number active: boolean } /** * E-commerce order structure for nested dataset */ export interface Order { orderId: string customer: { id: number name: string email: string phone: string } items: { sku: string name: string quantity: number price: number }[] subtotal: number tax: number total: number status: string orderDate?: string createdAt?: string } /** * Analytics metric structure for time-series dataset */ export interface AnalyticsMetric { date: string views: number clicks: number conversions: number revenue: number bounceRate: number } /** * GitHub repository structure for real-world dataset */ export interface Repository { id: number name: string repo: string description: string stars: number watchers: number forks: number defaultBranch: string createdAt: string updatedAt: string pushedAt: string } /** * Event log structure for semi-uniform dataset */ export interface EventLog { timestamp: string level: 'info' | 'warn' | 'error' endpoint: string statusCode: number responseTime: number userId: number error?: { message: string stack: string retryable: boolean } } /** * Nested configuration structure for deeply nested dataset */ export interface NestedConfig { environment: string version: string database: { host: string port: number name: string pool: { min: number max: number idleTimeout: number } replicas: { host: string port: number priority: number }[] } features: Record }[] }> authentication: { providers: { name: string clientId: string scopes: string[] config: Record }[] session: { secret: string duration: number refreshThreshold: number } } permissions: { roles: Record groups: Record } } /** * Product structure for large uniform arrays */ export interface Product { sku: string name: string category: string price: number qty: number lastUpdated: string } /** * Internal types for structural validation pattern generation */ type StructuralValidationType = 'truncated' | 'extra-rows' | 'width-mismatch' | 'missing-fields' interface StructuralValidationFixture { type: StructuralValidationType description: string data: Record isValid: boolean } /** * Generate analytics time-series data */ export function generateAnalyticsData(days: number, startDate = '2025-01-01'): { metrics: AnalyticsMetric[] } { const date = new Date(startDate) return { metrics: Array.from({ length: days }, (_, i) => { const currentDate = new Date(date) currentDate.setDate(currentDate.getDate() + i) // Simulate realistic web traffic with some variation const baseViews = 5000 const weekendMultiplier = currentDate.getDay() === 0 || currentDate.getDay() === 6 ? 0.7 : 1.0 const views = Math.round(baseViews * weekendMultiplier + faker.number.int({ min: -1000, max: 3000 })) const clicks = Math.round(views * faker.number.float({ min: 0.02, max: 0.08 })) const conversions = Math.round(clicks * faker.number.float({ min: 0.05, max: 0.15 })) const avgOrderValue = faker.number.float({ min: 49.99, max: 299.99 }) const revenue = Number((conversions * avgOrderValue).toFixed(2)) return { date: currentDate.toISOString().split('T')[0]!, views, clicks, conversions, revenue, bounceRate: faker.number.float({ min: 0.3, max: 0.7, fractionDigits: 2 }), } }), } } /** * Generate employee data (uniform tabular structure) */ const departments = ['Engineering', 'Sales', 'Marketing', 'HR', 'Operations', 'Finance'] as const function generateEmployees(count: number): { employees: Employee[] } { return { employees: Array.from({ length: count }, (_, i): Employee => { const yearsExp = faker.number.int({ min: 1, max: 25 }) return { id: i + 1, name: faker.person.fullName(), email: faker.internet.email().toLowerCase(), department: departments[i % departments.length]!, salary: faker.number.int({ min: 45000, max: 150000 }), yearsExperience: yearsExp, active: faker.datatype.boolean(0.8), // 80% active } }), } } /** * Tabular dataset: Uniform employee records * * @remarks * Tests TOON's tabular array format. */ const tabularDataset: Dataset = { name: 'tabular', description: 'Uniform employee records', data: generateEmployees(100), metadata: { supportsCSV: true, structureClass: 'uniform', tabularEligibility: 100, // All arrays contain uniform objects with primitive values only }, } /** * Generate e-commerce orders (nested structure) */ const PRODUCT_NAMES = ['Wireless Mouse', 'USB Cable', 'Laptop Stand', 'Keyboard', 'Webcam', 'Headphones', 'Monitor', 'Desk Lamp'] as const const ORDER_STATUSES = ['pending', 'processing', 'shipped', 'delivered', 'cancelled'] as const function generateOrders(count: number): { orders: Order[] } { return { orders: Array.from({ length: count }, (_, i) => { const customerId = (i % 20) + 1 // Rotate through 20 customers const itemCount = faker.number.int({ min: 1, max: 4 }) // 1-4 items per order const items = Array.from({ length: itemCount }, (_, j) => { const price = faker.number.float({ min: 9.99, max: 199.99, fractionDigits: 2, }) const quantity = faker.number.int({ min: 1, max: 5 }) return { sku: `SKU-${faker.string.alphanumeric({ length: 6 }).toUpperCase()}`, name: PRODUCT_NAMES[j % PRODUCT_NAMES.length]!, quantity, price, } }) const subtotal = Number(items.reduce((sum, item) => sum + (item.price * item.quantity), 0).toFixed(2)) const tax = Number((subtotal * 0.08).toFixed(2)) // 8% tax rate const total = Number((subtotal + tax).toFixed(2)) return { orderId: `ORD-${String(i + 1).padStart(4, '0')}`, customer: { id: customerId, name: faker.person.fullName(), email: faker.internet.email().toLowerCase(), phone: faker.phone.number(), }, items, subtotal, tax, total, status: ORDER_STATUSES[i % ORDER_STATUSES.length]!, orderDate: faker.date.recent({ days: 90 }).toISOString().split('T')[0], } }), } } /** * Nested dataset: E-commerce orders with nested structures * * @remarks * Tests TOON's handling of complex nested objects. */ const nestedDataset: Dataset = { name: 'nested', description: 'E-commerce orders with nested structures', data: generateOrders(50), metadata: { supportsCSV: false, structureClass: 'nested', tabularEligibility: 33, // Top-level orders array has nested objects (not tabular), but nested items arrays are tabular }, } /** * Analytics dataset: Time-series metrics * * @remarks * Tests TOON's handling of numeric data and date fields. */ const analyticsDataset: Dataset = { name: 'analytics', description: 'Time-series analytics data', data: generateAnalyticsData(60), metadata: { supportsCSV: true, structureClass: 'uniform', tabularEligibility: 100, // Uniform time-series records with consistent primitive fields }, } /** * Real-world dataset: Top 100 starred GitHub repositories * * @remarks * Tests TOON's tabular format with real data. */ const githubDataset: Dataset = { name: 'github', description: 'Top 100 GitHub repositories', data: { repositories: githubRepos, }, metadata: { supportsCSV: true, structureClass: 'uniform', tabularEligibility: 100, // Repository array contains uniform objects with primitive values }, } /** * Generate a single e-commerce order with nested structure * * @remarks * Used for token efficiency benchmarks. */ export function generateOrderData(): Order { return { orderId: faker.string.alphanumeric({ length: 12, casing: 'upper' }), customer: { id: faker.number.int({ min: 1000, max: 9999 }), name: faker.person.fullName(), email: faker.internet.email(), phone: faker.phone.number(), }, items: Array.from({ length: faker.number.int({ min: 2, max: 5 }) }, () => ({ sku: faker.string.alphanumeric({ length: 8, casing: 'upper' }), name: faker.commerce.productName(), quantity: faker.number.int({ min: 1, max: 5 }), price: Number(faker.commerce.price({ min: 10, max: 200 })), })), subtotal: Number(faker.commerce.price({ min: 100, max: 500 })), tax: Number(faker.commerce.price({ min: 10, max: 50 })), total: Number(faker.commerce.price({ min: 110, max: 550 })), status: faker.helpers.arrayElement(['pending', 'processing', 'shipped', 'delivered']), createdAt: faker.date.recent({ days: 7 }).toISOString(), } } /** * Generate event logs (semi-uniform structure) * * @remarks * Approximately 50% of logs include nested error objects, 50% are flat. * This creates ~45% tabular eligibility. */ export function generateEventLogs(count: number): { logs: EventLog[] } { const endpoints = ['/api/users', '/api/orders', '/api/products', '/api/auth', '/api/payments'] const levels = ['info', 'warn', 'error'] as const return { logs: Array.from({ length: count }, () => { const level = faker.helpers.arrayElement(levels) const hasError = level === 'error' || (level === 'warn' && faker.datatype.boolean(0.3)) const log: EventLog = { timestamp: faker.date.recent({ days: 7 }).toISOString(), level, endpoint: faker.helpers.arrayElement(endpoints), statusCode: hasError ? faker.number.int({ min: 400, max: 599 }) : faker.number.int({ min: 200, max: 299 }), responseTime: faker.number.int({ min: 10, max: 5000 }), userId: faker.number.int({ min: 1000, max: 9999 }), } if (hasError) { log.error = { message: faker.helpers.arrayElement([ 'Database connection timeout', 'Invalid authentication token', 'Resource not found', 'Internal server error', 'Rate limit exceeded', ]), stack: `Error: ${faker.lorem.sentence()}\n at ${faker.lorem.word()}\n at ${faker.lorem.word()}`, retryable: faker.datatype.boolean(0.6), } } return log }), } } /** * Generate deeply nested configuration * * @remarks * Creates a complex nested structure with minimal tabular eligibility (~0%). */ export function generateNestedConfig(): NestedConfig { return { environment: faker.helpers.arrayElement(['production', 'staging', 'development']), version: faker.system.semver(), database: { host: faker.internet.domainName(), port: 5432, name: faker.database.type(), pool: { min: 2, max: faker.number.int({ min: 10, max: 50 }), idleTimeout: 30000, }, replicas: Array.from({ length: 3 }, (_, i) => ({ host: `replica-${i + 1}.${faker.internet.domainName()}`, port: 5432, priority: i + 1, })), }, features: { darkMode: { enabled: faker.datatype.boolean(), rollout: faker.number.int({ min: 0, max: 100 }), variants: [ { name: 'default', weight: 70, config: { theme: 'dark', animations: true }, }, { name: 'minimal', weight: 30, config: { theme: 'dark', animations: false }, }, ], }, analytics: { enabled: faker.datatype.boolean(), rollout: faker.number.int({ min: 0, max: 100 }), variants: [ { name: 'full', weight: 100, config: { tracking: 'all', sampling: 1.0 }, }, ], }, }, authentication: { providers: [ { name: 'oauth2', clientId: faker.string.uuid(), scopes: ['read', 'write', 'admin'], config: { authUrl: faker.internet.url(), tokenUrl: faker.internet.url(), }, }, { name: 'saml', clientId: faker.string.uuid(), scopes: ['read'], config: { entryPoint: faker.internet.url(), cert: faker.string.alphanumeric({ length: 64 }), }, }, ], session: { secret: faker.string.alphanumeric({ length: 32 }), duration: 86400, refreshThreshold: 3600, }, }, permissions: { roles: { admin: { permissions: ['read', 'write', 'delete', 'manage_users', 'manage_roles'], inherits: [], }, editor: { permissions: ['read', 'write'], inherits: ['viewer'], }, viewer: { permissions: ['read'], inherits: [], }, }, groups: { engineering: { members: Array.from({ length: 5 }, () => faker.internet.email()), roles: ['admin', 'editor'], }, support: { members: Array.from({ length: 3 }, () => faker.internet.email()), roles: ['viewer'], }, }, }, } } /** * Generate large uniform product array (5000+ rows) * * @remarks * Tests TOON's token efficiency and structural reliability at scale. */ export function generateProducts(count: number): { products: Product[] } { const categories = ['Electronics', 'Clothing', 'Home & Garden', 'Sports', 'Books', 'Toys'] as const return { products: Array.from({ length: count }, (_, i): Product => ({ sku: `SKU-${String(i + 1).padStart(6, '0')}`, name: faker.commerce.productName(), category: categories[i % categories.length]!, price: Number(faker.commerce.price({ min: 5, max: 500 })), qty: faker.number.int({ min: 0, max: 1000 }), lastUpdated: faker.date.recent({ days: 30 }).toISOString().split('T')[0]!, })), } } /** * Generate structural validation fixtures from employee data * * @remarks * Creates deliberately corrupted datasets to test TOON's structural validation * capabilities via [N] length declarations and {fields} headers. * Internal function used to generate structural validation datasets. */ function generateStructuralValidationFixtures(): StructuralValidationFixture[] { const baseData = generateEmployees(20) return [ // Valid baseline { type: 'truncated' as const, description: 'Valid complete dataset (control)', data: { employees: baseData.employees }, isValid: true, }, // Truncated array (missing last 3 rows) { type: 'truncated' as const, description: 'Array truncated: 3 rows removed from end', data: { employees: baseData.employees.slice(0, -3) }, isValid: false, // [N] won't match actual row count in TOON }, // Extra rows (3 more than original) { type: 'extra-rows' as const, description: 'Extra rows added beyond declared length', data: { employees: [ ...baseData.employees, ...generateEmployees(3).employees, ], }, isValid: false, // [N] won't match actual row count in TOON }, // Width mismatch (inconsistent field count) { type: 'width-mismatch' as const, description: 'Inconsistent field count (missing salary in row 10)', data: { employees: baseData.employees.map((emp, i) => { if (i === 9) { // Row 10, missing salary field const { salary, ...rest } = emp return rest } return emp }), }, isValid: false, // Not all objects have same fields (tabular requirement) }, // Missing required fields { type: 'missing-fields' as const, description: 'Missing required fields (no email in multiple rows)', data: { employees: baseData.employees.map((emp, i) => { if (i % 5 === 0) { // Every 5th row, missing email const { email, ...rest } = emp return rest } return emp }), }, isValid: false, // Not all objects have same fields (tabular requirement) }, ] } /** * Event logs dataset: Semi-uniform structure * * @remarks * Tests TOON with semi-uniform data (~50% flat, ~50% with nested errors). */ const eventLogsDataset: Dataset = { name: 'event-logs', description: 'Semi-uniform event logs', data: generateEventLogs(75), metadata: { supportsCSV: false, structureClass: 'semi-uniform', tabularEligibility: 50, // Top-level logs array is tabular, but ~50% have nested optional error objects }, } /** * Nested config dataset: Deeply nested structure * * @remarks * Tests TOON's worst-case scenario with deeply nested configuration. */ const nestedConfigDataset: Dataset = { name: 'nested-config', description: 'Deeply nested configuration', data: generateNestedConfig(), metadata: { supportsCSV: false, structureClass: 'deep', tabularEligibility: 0, // Deeply nested configuration with no tabular arrays }, } /** * Structural validation datasets: Tests ability to detect incomplete, truncated, or corrupted data * * @remarks * These datasets test TOON's structural validation advantages via [N] length declarations * and {fields} headers. CSV is included to demonstrate its lack of structural metadata. */ const structuralValidationDatasets: Dataset[] = generateStructuralValidationFixtures().map((fixture, index) => { const datasetNames = [ 'structural-validation-control', 'structural-validation-truncated', 'structural-validation-extra-rows', 'structural-validation-width-mismatch', 'structural-validation-missing-fields', ] as const return { name: datasetNames[index]!, description: fixture.description, data: fixture.data, metadata: { supportsCSV: true, // Include CSV to show it can't validate structure structureClass: 'uniform', tabularEligibility: 100, }, } }) /** * Datasets for accuracy benchmarks (smaller sizes for faster evaluation) */ export const ACCURACY_DATASETS: Dataset[] = [ tabularDataset, // 100 employees nestedDataset, // 50 orders analyticsDataset, // 60 days githubDataset, // 100 repos eventLogsDataset, // 75 logs nestedConfigDataset, // 1 config ...structuralValidationDatasets, // 5 validation fixtures ] /** * Datasets for token efficiency benchmarks (larger sizes to amplify token differences) */ export const TOKEN_EFFICIENCY_DATASETS: Dataset[] = [ // Tabular: 2000 employees { name: 'tabular', description: 'Uniform employee records', data: generateEmployees(2000), metadata: { supportsCSV: true, structureClass: 'uniform', tabularEligibility: 100, // All arrays contain uniform objects with primitive values only }, }, // Nested: 500 orders { name: 'nested', description: 'E-commerce orders with nested structures', data: generateOrders(500), metadata: { supportsCSV: false, structureClass: 'nested', tabularEligibility: 33, // Top-level orders array has nested objects (not tabular), but nested items arrays are tabular }, }, // Analytics: 365 days { name: 'analytics', description: 'Time-series analytics data', data: generateAnalyticsData(365), metadata: { supportsCSV: true, structureClass: 'uniform', tabularEligibility: 100, // Uniform time-series records with consistent primitive fields }, }, // GitHub: 100 repos (same as accuracy) githubDataset, // Event logs: 2000 logs { name: 'event-logs', description: 'Semi-uniform event logs', data: generateEventLogs(2000), metadata: { supportsCSV: false, structureClass: 'semi-uniform', tabularEligibility: 50, // Top-level logs array is tabular, but ~50% have nested optional error objects }, }, // Nested config: 1 config (same as accuracy) nestedConfigDataset, ] ================================================ FILE: benchmarks/src/evaluate.ts ================================================ import type { LanguageModelV3 } from '@ai-sdk/provider' import type { EvaluationResult, Question } from './types.ts' import { anthropic } from '@ai-sdk/anthropic' import { google } from '@ai-sdk/google' import { openai } from '@ai-sdk/openai' import { xai } from '@ai-sdk/xai' import { generateText } from 'ai' import { compareAnswers } from './normalize.ts' /** * Models used for evaluation */ export const models: LanguageModelV3[] = [ anthropic('claude-haiku-4-5-20251001'), google('gemini-3-flash-preview'), openai('gpt-5-nano'), xai('grok-4-1-fast-non-reasoning'), ] /** * Format primers * * @remarks * Neutral descriptions to help models parse each format. */ export const PRIMERS: Record = { 'toon': 'TOON: Indentation-based. Arrays declare length and fields (e.g., items[N]{f1,f2}:). Rows use single delimiter. Values may be quoted.', 'json-pretty': 'JSON: Strict JSON objects/arrays with repeated keys per row.', 'json-compact': 'JSON (compact): Strict JSON without extra whitespace.', 'yaml': 'YAML: Indentation-based key/value and lists (- items).', 'xml': 'XML: Tag-based tree structure with nested elements.', 'csv': 'CSV: Header row, comma-separated values. First row contains field names.', } /** * Code fence language tags for proper syntax highlighting */ export const FENCE: Record = { 'toon': 'toon', 'json-pretty': 'json', 'json-compact': 'json', 'yaml': 'yaml', 'xml': 'xml', 'csv': 'csv', } /** * Evaluate a single question with a specific format and model */ export async function evaluateQuestion( { question, formatName, formattedData, model, }: { question: Question formatName: string formattedData: string model: LanguageModelV3 }, ): Promise { const primer = PRIMERS[formatName] ?? '' const fence = FENCE[formatName] ?? '' const prompt = ` ${primer} Given the following data in ${formatName} format: \`\`\`${fence} ${formattedData} \`\`\` Question: ${question.prompt} Answer format requirements: - Provide only the value itself, no explanation - For numbers: output digits only (no commas, currency symbols, or units) - For dates/field names: use the exact string from the data - For lists: output comma-separated values with no spaces Answer: `.trim() const startTime = performance.now() const { text, usage } = await generateText({ model, prompt }) const actual = text.trim() const latencyMs = performance.now() - startTime const comparisonResult = compareAnswers( actual, question.groundTruth, question.answerType ?? 'string', question.normalizationOptions, ) const isCorrect = comparisonResult.match return { questionId: question.id, format: formatName, model: model.modelId, expected: question.groundTruth, actual, isCorrect, inputTokens: usage.inputTokens, outputTokens: usage.outputTokens, latencyMs, } } ================================================ FILE: benchmarks/src/formatters.ts ================================================ import type { Dataset } from './types.ts' import { stringify as stringifyCSV } from 'csv-stringify/sync' import { XMLBuilder } from 'fast-xml-parser' import { stringify as stringifyYAML } from 'yaml' import { encode as encodeToon } from '../../packages/toon/src/index.ts' /** * Format converters registry * * @remarks * All formatters attempt to preserve semantic equivalence with the source data, * meaning the converted data should represent the same information. However, * CSV has inherent limitations with nested structures (see `toCSV` docs). */ export const formatters: Record string> = { 'json-pretty': data => JSON.stringify(data, undefined, 2), 'json-compact': data => JSON.stringify(data), 'toon': data => encodeToon(data), 'csv': data => toCSV(data), 'xml': data => toXML(data), 'yaml': data => stringifyYAML(data), } /** * Convert data to CSV format * * @remarks * Limitations: CSV is designed for flat tabular data only. * * This formatter: * - Only handles top-level objects with arrays of flat objects * - Cannot properly represent deeply nested structures (nested arrays/objects within rows) * - Loses nested structure information during conversion * - May produce misleading results for datasets with complex nesting (e.g., e-commerce orders with nested items) * * For datasets with nested structures, CSV comparisons may not be fair or representative * of how CSV would typically be used in practice. */ function toCSV(data: unknown): string { const sections: string[] = [] // Handle top-level object with arrays if (typeof data === 'object' && data !== null && !Array.isArray(data)) { for (const [key, value] of Object.entries(data)) { if (Array.isArray(value) && value.length > 0) { sections.push(`# ${key}`) sections.push(stringifyCSV(value, { header: true })) } } return sections.join('\n').trim() } // Root-level array if (Array.isArray(data) && data.length > 0) { return stringifyCSV(data, { header: true }).trim() } return '' } /** * Convert data to XML format * * @remarks * Uses `fast-xml-parser` to generate well-formatted XML with: * - 2-space indentation for readability * - Empty nodes suppressed * - Proper escaping of special characters */ function toXML(data: unknown): string { const builder = new XMLBuilder({ format: true, indentBy: ' ', suppressEmptyNode: true, }) return builder.build(data) } /** * Check if a dataset supports CSV format * * @remarks * CSV is only suitable for flat tabular data. Datasets with nested structures * should not be compared using CSV as it cannot properly represent the data. */ export function supportsCSV(dataset: Dataset): boolean { return dataset.metadata.supportsCSV } ================================================ FILE: benchmarks/src/normalize.ts ================================================ /** * Type of expected answer for deterministic comparison */ export type AnswerType = | 'integer' | 'number' | 'boolean' | 'date' | 'string' | 'csv-list-ordered' | 'csv-list-unordered' /** * Options for answer normalization and comparison */ export interface NormalizationOptions { /** * Tolerance for floating-point number comparison (e.g., 1e-6). * @default 1e-6 */ tolerance?: number /** * Whether string comparison should be case-sensitive. * @default false */ caseSensitive?: boolean /** * Allow currency symbols ($, €, etc.) in number extraction. * @default true */ allowCurrency?: boolean /** * Allow percent signs (%) in number extraction (will divide by 100). * @default true */ allowPercent?: boolean /** * Number of decimal places to round to for number comparison. * If specified, overrides tolerance-based comparison. */ decimalPlaces?: number } interface NormalizedResult { success: boolean value?: unknown error?: string } /** * Default normalization options */ const DEFAULT_OPTIONS: Required = { tolerance: 1e-6, caseSensitive: false, allowCurrency: true, allowPercent: true, decimalPlaces: undefined!, } // Regex pattern constants const INTEGER_PATTERN_WITH_CURRENCY = /[$€£¥]?\s*-?\d[\d,]*/ const INTEGER_PATTERN = /-?\d[\d,]*/ const NUMBER_PATTERN_WITH_CURRENCY = /[$€£¥]?\s*-?\d[\d,]*(?:\.\d+)?(?:e[+-]?\d+)?%?/i const NUMBER_PATTERN = /-?\d[\d,]*(?:\.\d+)?(?:e[+-]?\d+)?%?/i const WRAPPING_QUOTES_PATTERN = /^["']|["']$/g const CODE_FENCE_PATTERN = /^```[\s\S]*?```$/g const LANGUAGE_IDENTIFIER_PATTERN = /^\w+\n/ const CURRENCY_AND_FORMATTING_CHARS = /[$€£¥,\s]/g const NUMBER_CLEANUP_CHARS = /[$€£¥,%\s]/g // Boolean value constants const TRUE_VALUES = new Set(['true', 'yes', 'y', '1']) const FALSE_VALUES = new Set(['false', 'no', 'n', '0']) // Numeric constants const PERCENTAGE_DIVISOR = 100 const DECIMAL_BASE = 10 const MONTH_OFFSET = 1 // JavaScript months are 0-indexed const DATE_COMPONENT_WIDTH = 2 const DATE_PAD_CHAR = '0' // String constants const CSV_DELIMITER = ',' /** * Strip wrapping quotes from a string */ function stripWrappingQuotes(text: string): string { return text.trim().replace(WRAPPING_QUOTES_PATTERN, '') } /** * Extract and normalize an integer from a string * * @remarks * Handles: "42", "1,234", "$5,678", " -99 ", "The answer is 42." */ function normalizeInteger(text: string, options: Required): NormalizedResult { // Strip common formatting, extract first integer-like token const pattern = options.allowCurrency ? INTEGER_PATTERN_WITH_CURRENCY : INTEGER_PATTERN const match = text.match(pattern) if (!match) return { success: false, error: `No integer found in: "${text}"` } // Remove currency symbols, spaces, and thousand separators const normalizedValue = match[0].replace(CURRENCY_AND_FORMATTING_CHARS, '') const parsedNumber = Number.parseInt(normalizedValue, DECIMAL_BASE) if (Number.isNaN(parsedNumber)) return { success: false, error: `Failed to parse integer: "${match[0]}"` } return { success: true, value: parsedNumber } } /** * Extract and normalize a floating-point number from a string * * @remarks * Handles: "3.14", "1,234.56", "$5,678.90", "42%", "1.5e-3", "Price: $99.99" */ function normalizeNumber(text: string, options: Required): NormalizedResult { // Extract first number-like token (supports scientific notation) const pattern = options.allowCurrency ? NUMBER_PATTERN_WITH_CURRENCY : NUMBER_PATTERN const match = text.match(pattern) if (!match) return { success: false, error: `No number found in: "${text}"` } const token = match[0] const hasPercentSign = options.allowPercent && token.endsWith('%') // Remove currency, commas, spaces, and percent sign const normalizedToken = token.replace(NUMBER_CLEANUP_CHARS, '') let parsedNumber = Number.parseFloat(normalizedToken) if (Number.isNaN(parsedNumber)) return { success: false, error: `Failed to parse number: "${token}"` } // Convert percentage to decimal if present if (hasPercentSign) parsedNumber = parsedNumber / PERCENTAGE_DIVISOR // Round to specified decimal places if requested if (options.decimalPlaces !== undefined) { const factor = DECIMAL_BASE ** options.decimalPlaces parsedNumber = Math.round(parsedNumber * factor) / factor } return { success: true, value: parsedNumber } } /** * Normalize a boolean/yes-no answer * * @remarks * Handles: "true", "false", "yes", "no", "y", "n", "1", "0" (case-insensitive) */ function normalizeBoolean(text: string): NormalizedResult { const normalizedValue = text.trim().toLowerCase() if (TRUE_VALUES.has(normalizedValue)) return { success: true, value: true } if (FALSE_VALUES.has(normalizedValue)) return { success: true, value: false } return { success: false, error: `Not a boolean: "${text}"` } } /** * Normalize a date string to YYYY-MM-DD format * * @remarks * Handles: ISO dates, "Nov 1, 2025", "2025-11-01", RFC 2822, etc. */ function normalizeDate(text: string): NormalizedResult { const cleaned = stripWrappingQuotes(text) // Try parsing as date const parsedDate = new Date(cleaned) if (Number.isNaN(parsedDate.getTime())) return { success: false, error: `Invalid date: "${text}"` } // Normalize to YYYY-MM-DD (UTC) const year = parsedDate.getUTCFullYear() const monthPadded = String(parsedDate.getUTCMonth() + MONTH_OFFSET).padStart(DATE_COMPONENT_WIDTH, DATE_PAD_CHAR) const dayPadded = String(parsedDate.getUTCDate()).padStart(DATE_COMPONENT_WIDTH, DATE_PAD_CHAR) const normalized = `${year}-${monthPadded}-${dayPadded}` return { success: true, value: normalized } } /** * Normalize a string (trim, optionally case-insensitive) * * @remarks * Handles wrapping quotes and code fences. */ function normalizeString(text: string, options: Required): NormalizedResult { let trimmedText = text.trim() // Strip wrapping quotes trimmedText = trimmedText.replace(WRAPPING_QUOTES_PATTERN, '') // Strip code fences (```...```) trimmedText = trimmedText.replace(CODE_FENCE_PATTERN, (match) => { const inner = match.slice(3, -3).trim() // Remove language identifier if present (e.g., ```json) return inner.replace(LANGUAGE_IDENTIFIER_PATTERN, '') }) trimmedText = trimmedText.trim() const value = options.caseSensitive ? trimmedText : trimmedText.toLowerCase() return { success: true, value } } /** * Normalize a comma-separated list (ordered) * * @remarks * Handles: "a,b,c", "a, b, c", " a , b , c " */ function normalizeCsvListOrdered(text: string, options: Required): NormalizedResult { const strippedText = stripWrappingQuotes(text) const items = strippedText .split(CSV_DELIMITER) .map(item => item.trim()) .filter(item => item.length > 0) const normalizedItems = items.map(item => options.caseSensitive ? item : item.toLowerCase(), ) return { success: true, value: normalizedItems } } /** * Normalize a comma-separated list (unordered, compare as sets) * * @remarks * Handles: "c,a,b" equals "a,b,c" */ function normalizeCsvListUnordered(text: string, options: Required): NormalizedResult { const result = normalizeCsvListOrdered(text, options) if (!result.success) return result // Type guard: ensure result.value is an array if (!Array.isArray(result.value)) return { success: false, error: 'Expected array result from normalizeCsvListOrdered' } // Sort for deterministic comparison const sorted = [...result.value].sort() return { success: true, value: sorted } } /** * Normalize a value based on its expected kind */ export function normalizeAnswer( text: string, kind: AnswerType, options: Partial = {}, ): NormalizedResult { const resolvedOptions: Required = { ...DEFAULT_OPTIONS, ...options } switch (kind) { case 'integer': return normalizeInteger(text, resolvedOptions) case 'number': return normalizeNumber(text, resolvedOptions) case 'boolean': return normalizeBoolean(text) case 'date': return normalizeDate(text) case 'string': return normalizeString(text, resolvedOptions) case 'csv-list-ordered': return normalizeCsvListOrdered(text, resolvedOptions) case 'csv-list-unordered': return normalizeCsvListUnordered(text, resolvedOptions) default: return { success: false, error: `Unknown answer kind: ${kind}` } } } /** * Compare two normalized values based on answer kind */ function compareValues( actual: unknown, expected: unknown, kind: AnswerType, options: Required, ): boolean { switch (kind) { case 'integer': case 'boolean': case 'date': case 'string': return actual === expected case 'number': if (typeof actual !== 'number' || typeof expected !== 'number') return false if (options.decimalPlaces !== undefined) { // Already rounded during normalization return actual === expected } return Math.abs(actual - expected) <= options.tolerance case 'csv-list-ordered': if (!Array.isArray(actual) || !Array.isArray(expected)) return false if (actual.length !== expected.length) return false return actual.every((item, i) => item === expected[i]) case 'csv-list-unordered': if (!Array.isArray(actual) || !Array.isArray(expected)) return false if (actual.length !== expected.length) return false // Already sorted during normalization return actual.every((item, i) => item === expected[i]) default: return false } } /** * Compare actual and expected answers with deterministic, type-aware normalization * * @remarks * Returns true if answers match within the specified tolerance/rules. */ export function compareAnswers( actual: string, expected: string, kind: AnswerType, options: Partial = {}, ): { match: boolean, details?: string } { const resolvedOptions: Required = { ...DEFAULT_OPTIONS, ...options } // Normalize both answers const actualResult = normalizeAnswer(actual, kind, resolvedOptions) const expectedResult = normalizeAnswer(expected, kind, resolvedOptions) // If either normalization failed, return false with details if (!actualResult.success) { return { match: false, details: `Failed to normalize actual answer: ${actualResult.error}`, } } if (!expectedResult.success) { return { match: false, details: `Failed to normalize expected answer: ${expectedResult.error}`, } } // Compare normalized values const match = compareValues(actualResult.value, expectedResult.value, kind, resolvedOptions) return { match, details: match ? undefined : `Mismatch: actual="${actualResult.value}" vs expected="${expectedResult.value}"`, } } ================================================ FILE: benchmarks/src/questions/analytics.ts ================================================ import type { AnalyticsMetric } from '../datasets.ts' import type { Question } from '../types.ts' import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants.ts' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils.ts' /** * Generate analytics (website metrics) questions */ export function generateAnalyticsQuestions(metrics: AnalyticsMetric[], getId: () => string): Question[] { const questions: Question[] = [] // Field retrieval: date-based metrics const metricFieldGenerators: Array<(metric: AnalyticsMetric, getId: () => string) => Question> = [ (metric, getId) => new QuestionBuilder() .id(getId()) .prompt(`What are the views for ${metric.date}?`) .groundTruth(String(metric.views)) .type('field-retrieval') .dataset('analytics') .answerType('integer') .build(), (metric, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the revenue for ${metric.date}?`) .groundTruth(String(metric.revenue)) .type('field-retrieval') .dataset('analytics') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), (metric, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the bounce rate for ${metric.date}?`) .groundTruth(String(metric.bounceRate)) .type('field-retrieval') .dataset('analytics') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), (metric, getId) => new QuestionBuilder() .id(getId()) .prompt(`How many conversions were there on ${metric.date}?`) .groundTruth(String(metric.conversions)) .type('field-retrieval') .dataset('analytics') .answerType('integer') .build(), ] questions.push(...rotateQuestions( metrics, metricFieldGenerators, QUESTION_LIMITS.analytics.fieldRetrievalDates, SAMPLE_STRIDES.ANALYTICS_FIELD, getId, )) // Aggregation: basic statistics const totalDays = metrics.length const totalViews = metrics.reduce((sum, m) => sum + m.views, 0) const totalConversions = metrics.reduce((sum, m) => sum + m.conversions, 0) const totalRevenue = metrics.reduce((sum, m) => sum + m.revenue, 0) const avgBounceRate = metrics.reduce((sum, m) => sum + m.bounceRate, 0) / metrics.length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many days of data are in the dataset?') .groundTruth(String(totalDays)) .type('aggregation') .dataset('analytics') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the total number of views across all dates?') .groundTruth(String(totalViews)) .type('aggregation') .dataset('analytics') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the total number of conversions across all dates?') .groundTruth(String(totalConversions)) .type('aggregation') .dataset('analytics') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the total revenue across all dates?') .groundTruth(String(totalRevenue.toFixed(2))) .type('aggregation') .dataset('analytics') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), new QuestionBuilder() .id(getId()) .prompt('What is the average bounce rate?') .groundTruth(String(avgBounceRate.toFixed(2))) .type('aggregation') .dataset('analytics') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), ) // Aggregation: high views/conversions for (const threshold of QUESTION_THRESHOLDS.analytics.views) { const count = metrics.filter(m => m.views > threshold).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many days had more than ${threshold} views?`) .groundTruth(String(count)) .type('aggregation') .dataset('analytics') .answerType('integer') .build(), ) } for (const threshold of QUESTION_THRESHOLDS.analytics.conversions) { const count = metrics.filter(m => m.conversions > threshold).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many days had more than ${threshold} conversions?`) .groundTruth(String(count)) .type('aggregation') .dataset('analytics') .answerType('integer') .build(), ) } // Filtering: multi-condition (views AND revenue) for (const threshold of QUESTION_THRESHOLDS.analytics.viewsForFiltering) { const count = metrics.filter( m => m.views > threshold && m.conversions > QUESTION_THRESHOLDS.analytics.conversionsForFiltering, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many days had more than ${threshold} views and more than ${QUESTION_THRESHOLDS.analytics.conversionsForFiltering} conversions?`) .groundTruth(String(count)) .type('filtering') .dataset('analytics') .answerType('integer') .build(), ) } // Filtering: revenue thresholds for (const threshold of QUESTION_THRESHOLDS.analytics.revenueThresholds) { const count = metrics.filter( m => m.revenue > threshold && m.views > QUESTION_THRESHOLDS.analytics.viewsThresholdForRevenue, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many days had revenue greater than ${threshold} with views above ${QUESTION_THRESHOLDS.analytics.viewsThresholdForRevenue}?`) .groundTruth(String(count)) .type('filtering') .dataset('analytics') .answerType('integer') .build(), ) } // Filtering: clicks and conversions for (const threshold of QUESTION_THRESHOLDS.analytics.clicksForFiltering) { const count = metrics.filter( m => m.clicks > threshold && m.conversions > QUESTION_THRESHOLDS.analytics.conversionsForClickFiltering, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many days had more than ${threshold} clicks and more than ${QUESTION_THRESHOLDS.analytics.conversionsForClickFiltering} conversions?`) .groundTruth(String(count)) .type('filtering') .dataset('analytics') .answerType('integer') .build(), ) } // Filtering: revenue and bounce rate for (const threshold of QUESTION_THRESHOLDS.analytics.revenueForBounceRate) { const count = metrics.filter( m => m.revenue > threshold && m.bounceRate < QUESTION_THRESHOLDS.analytics.bounceRateThreshold, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many days had revenue greater than ${threshold} with bounce rate below ${QUESTION_THRESHOLDS.analytics.bounceRateThreshold}?`) .groundTruth(String(count)) .type('filtering') .dataset('analytics') .answerType('integer') .build(), ) } return questions } ================================================ FILE: benchmarks/src/questions/event-logs.ts ================================================ import type { EventLog } from '../datasets.ts' import type { Question } from '../types.ts' import { QUESTION_LIMITS } from '../constants.ts' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils.ts' /** * Generate event log questions */ export function generateEventLogsQuestions(logs: EventLog[], getId: () => string): Question[] { const questions: Question[] = [] // Field retrieval: log metadata const logFieldGenerators: Array<(log: EventLog, getId: () => string) => Question> = [ (log, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the level of the log at ${log.timestamp}?`) .groundTruth(log.level) .type('field-retrieval') .dataset('event-logs') .answerType('string') .build(), (log, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the endpoint for the log at ${log.timestamp}?`) .groundTruth(log.endpoint) .type('field-retrieval') .dataset('event-logs') .answerType('string') .build(), (log, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the status code for the log at ${log.timestamp}?`) .groundTruth(String(log.statusCode)) .type('field-retrieval') .dataset('event-logs') .answerType('integer') .build(), (log, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the response time for the log at ${log.timestamp}?`) .groundTruth(String(log.responseTime)) .type('field-retrieval') .dataset('event-logs') .answerType('integer') .build(), ] questions.push(...rotateQuestions( logs, logFieldGenerators, QUESTION_LIMITS.eventLogs.fieldRetrieval, SAMPLE_STRIDES.EVENT_LOG_FIELD, getId, )) // Aggregation: basic statistics const totalLogs = logs.length const avgResponseTime = logs.reduce((sum, l) => sum + l.responseTime, 0) / logs.length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many log entries are in the dataset?') .groundTruth(String(totalLogs)) .type('aggregation') .dataset('event-logs') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the average response time across all logs?') .groundTruth(String(avgResponseTime.toFixed(2))) .type('aggregation') .dataset('event-logs') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), ) // Aggregation: by level const levels = [...new Set(logs.map(l => l.level))] for (const level of levels) { const count = logs.filter(l => l.level === level).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many log entries have level "${level}"?`) .groundTruth(String(count)) .type('aggregation') .dataset('event-logs') .answerType('integer') .build(), ) } // Aggregation: by endpoint const endpoints = [...new Set(logs.map(l => l.endpoint))] for (const endpoint of endpoints.slice(0, QUESTION_LIMITS.eventLogs.aggregationEndpoints)) { const count = logs.filter(l => l.endpoint === endpoint).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many log entries are for endpoint "${endpoint}"?`) .groundTruth(String(count)) .type('aggregation') .dataset('event-logs') .answerType('integer') .build(), ) } // Aggregation: by status code range const errorCount = logs.filter(l => l.statusCode >= 400).length const successCount = logs.filter(l => l.statusCode >= 200 && l.statusCode < 300).length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many log entries have a status code indicating an error (>= 400)?') .groundTruth(String(errorCount)) .type('aggregation') .dataset('event-logs') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many log entries have a successful status code (200-299)?') .groundTruth(String(successCount)) .type('aggregation') .dataset('event-logs') .answerType('integer') .build(), ) // Aggregation: retryable errors const retryableErrorCount = logs.filter(l => l.error?.retryable === true).length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many log entries have a retryable error?') .groundTruth(String(retryableErrorCount)) .type('aggregation') .dataset('event-logs') .answerType('integer') .build(), ) // Filtering: multi-condition (level AND status) for (const level of levels.slice(0, QUESTION_LIMITS.eventLogs.filteringLevelAndStatus)) { // Skip `info` level as it never has status >= 400 by design if (level === 'info') continue const count = logs.filter(l => l.level === level && l.statusCode >= 400).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many log entries have level "${level}" and status code >= 400?`) .groundTruth(String(count)) .type('filtering') .dataset('event-logs') .answerType('integer') .build(), ) } // Filtering: endpoint AND status for (const endpoint of endpoints.slice(0, QUESTION_LIMITS.eventLogs.filteringEndpointAndStatus)) { const count = logs.filter(l => l.endpoint === endpoint && l.statusCode >= 500).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many log entries are for endpoint "${endpoint}" with status code >= 500?`) .groundTruth(String(count)) .type('filtering') .dataset('event-logs') .answerType('integer') .build(), ) } // Filtering: endpoint AND retryable error for (const endpoint of endpoints.slice(0, QUESTION_LIMITS.eventLogs.filteringEndpointRetryable)) { const count = logs.filter(l => l.endpoint === endpoint && l.error?.retryable === true).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many log entries for endpoint "${endpoint}" have a retryable error?`) .groundTruth(String(count)) .type('filtering') .dataset('event-logs') .answerType('integer') .build(), ) } return questions } ================================================ FILE: benchmarks/src/questions/github.ts ================================================ import type { Repository } from '../datasets.ts' import type { Question } from '../types.ts' import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants.ts' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils.ts' /** * Generate GitHub repository questions */ export function generateGithubQuestions(repos: Repository[], getId: () => string): Question[] { const questions: Question[] = [] // Field retrieval: repository metadata const repoFieldGenerators: Array<(repo: Repository, getId: () => string) => Question> = [ (repo, getId) => new QuestionBuilder() .id(getId()) .prompt(`How many stars does ${repo.repo} have?`) .groundTruth(String(repo.stars)) .type('field-retrieval') .dataset('github') .answerType('integer') .build(), (repo, getId) => new QuestionBuilder() .id(getId()) .prompt(`How many forks does ${repo.repo} have?`) .groundTruth(String(repo.forks)) .type('field-retrieval') .dataset('github') .answerType('integer') .build(), (repo, getId) => new QuestionBuilder() .id(getId()) .prompt(`How many watchers does ${repo.repo} have?`) .groundTruth(String(repo.watchers)) .type('field-retrieval') .dataset('github') .answerType('integer') .build(), (repo, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the main branch of ${repo.repo}?`) .groundTruth(repo.defaultBranch) .type('field-retrieval') .dataset('github') .answerType('string') .normalize({ caseSensitive: true }) .build(), ] questions.push(...rotateQuestions( repos, repoFieldGenerators, QUESTION_LIMITS.github.fieldRetrievalRepos, SAMPLE_STRIDES.REPO_FIELD, getId, )) // Aggregation: basic statistics const totalRepos = repos.length const totalStars = repos.reduce((sum, r) => sum + r.stars, 0) const totalForks = repos.reduce((sum, r) => sum + r.forks, 0) const avgStars = totalStars / totalRepos questions.push( new QuestionBuilder() .id(getId()) .prompt('How many repositories are in the dataset?') .groundTruth(String(totalRepos)) .type('aggregation') .dataset('github') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the total number of stars across all repositories?') .groundTruth(String(totalStars)) .type('aggregation') .dataset('github') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the total number of forks across all repositories?') .groundTruth(String(totalForks)) .type('aggregation') .dataset('github') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the average number of stars per repository?') .groundTruth(String(Math.round(avgStars))) .type('aggregation') .dataset('github') .answerType('integer') .build(), ) // Aggregation: by default branch const branches = [...new Set(repos.map(r => r.defaultBranch))] for (const branch of branches.slice(0, QUESTION_LIMITS.github.aggregationBranches)) { const count = repos.filter(r => r.defaultBranch === branch).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many repositories use "${branch}" as their default branch?`) .groundTruth(String(count)) .type('aggregation') .dataset('github') .answerType('integer') .build(), ) } // Aggregation: high star counts for (const threshold of QUESTION_THRESHOLDS.github.stars) { const count = repos.filter(r => r.stars > threshold).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many repositories have more than ${threshold} stars?`) .groundTruth(String(count)) .type('aggregation') .dataset('github') .answerType('integer') .build(), ) } // Aggregation: high fork counts for (const threshold of QUESTION_THRESHOLDS.github.forks) { const count = repos.filter(r => r.forks > threshold).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many repositories have more than ${threshold} forks?`) .groundTruth(String(count)) .type('aggregation') .dataset('github') .answerType('integer') .build(), ) } // Aggregation: high watcher counts for (const threshold of QUESTION_THRESHOLDS.github.watchers) { const count = repos.filter(r => r.watchers > threshold).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many repositories have more than ${threshold} watchers?`) .groundTruth(String(count)) .type('aggregation') .dataset('github') .answerType('integer') .build(), ) } // Filtering: multi-condition (stars AND forks) for (const combo of QUESTION_THRESHOLDS.github.starForkCombinations.slice(0, QUESTION_LIMITS.github.filteringStarsAndForks)) { const count = repos.filter( r => r.stars > combo.stars && r.forks > combo.forks, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many repositories have more than ${combo.stars} stars and more than ${combo.forks} forks?`) .groundTruth(String(count)) .type('filtering') .dataset('github') .answerType('integer') .build(), ) } // Filtering: stars AND watchers for (const combo of QUESTION_THRESHOLDS.github.starWatcherCombinations) { const count = repos.filter( r => r.stars > combo.stars && r.watchers > combo.watchers, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many repositories have more than ${combo.stars} stars and more than ${combo.watchers} watchers?`) .groundTruth(String(count)) .type('filtering') .dataset('github') .answerType('integer') .build(), ) } return questions } ================================================ FILE: benchmarks/src/questions/index.ts ================================================ import type { AnalyticsMetric, Employee, EventLog, NestedConfig, Order, Repository } from '../datasets.ts' import type { Question } from '../types.ts' import { ACCURACY_DATASETS } from '../datasets.ts' import { generateAnalyticsQuestions } from './analytics.ts' import { generateEventLogsQuestions } from './event-logs.ts' import { generateGithubQuestions } from './github.ts' import { generateNestedConfigQuestions } from './nested-config.ts' import { generateNestedQuestions } from './nested.ts' import { generateStructuralValidationQuestions } from './structural-validation.ts' import { generateStructureQuestions } from './structure.ts' import { generateTabularQuestions } from './tabular.ts' import { createIdGenerator } from './utils.ts' /** * Generate questions from all datasets * * @remarks * - Field Retrieval: Direct field access with no computation * Examples: "What is X's salary?", "What is the status of order Y?" * - Aggregation: Counts, sums, averages, min/max operations (including single-condition filters) * Examples: "How many X?", "What is the total/average?", "How many X > threshold?" * - Filtering: Multi-condition queries requiring complex logical operations * Examples: "How many X WHERE condition1 AND condition2?" * - Structure Awareness: Tests format-native structural affordances (TOON's [N] and {fields}, CSV's header) * Examples: "How many records?", "List the field names", "What is the last record's field?" */ export function generateQuestions(): Question[] { const questions: Question[] = [] const idGen = createIdGenerator() const getId = () => idGen.next().value // Get datasets with proper typing const tabular = (ACCURACY_DATASETS.find(d => d.name === 'tabular')?.data.employees as Employee[]) ?? [] const nested = (ACCURACY_DATASETS.find(d => d.name === 'nested')?.data.orders as Order[]) ?? [] const analytics = (ACCURACY_DATASETS.find(d => d.name === 'analytics')?.data.metrics as AnalyticsMetric[]) ?? [] const github = (ACCURACY_DATASETS.find(d => d.name === 'github')?.data.repositories as Repository[]) ?? [] const eventLogs = (ACCURACY_DATASETS.find(d => d.name === 'event-logs')?.data.logs as EventLog[]) ?? [] const nestedConfig = ACCURACY_DATASETS.find(d => d.name === 'nested-config')?.data as NestedConfig | undefined // Generate questions for each dataset questions.push(...generateTabularQuestions(tabular, getId)) questions.push(...generateNestedQuestions(nested, getId)) questions.push(...generateAnalyticsQuestions(analytics, getId)) questions.push(...generateGithubQuestions(github, getId)) questions.push(...generateEventLogsQuestions(eventLogs, getId)) questions.push(...generateNestedConfigQuestions(nestedConfig, getId)) // Generate structure-awareness questions (tests format-native affordances) questions.push(...generateStructureQuestions(tabular, nested, analytics, github, eventLogs, getId)) // Generate structural-validation questions (tests ability to detect corrupted data) questions.push(...generateStructuralValidationQuestions(getId)) return questions } ================================================ FILE: benchmarks/src/questions/nested-config.ts ================================================ import type { NestedConfig } from '../datasets.ts' import type { Question } from '../types.ts' import { QUESTION_LIMITS } from '../constants.ts' import { QuestionBuilder } from './utils.ts' /** * Generate nested configuration questions */ export function generateNestedConfigQuestions(config: NestedConfig | undefined, getId: () => string): Question[] { const questions: Question[] = [] if (!config) return questions // Field retrieval: top-level config values const fieldRetrievalQuestions = [ { prompt: 'What is the environment in the configuration?', groundTruth: config.environment, answerType: 'string' as const, }, { prompt: 'What is the database host?', groundTruth: config.database.host, answerType: 'string' as const, }, { prompt: 'What is the database port?', groundTruth: String(config.database.port), answerType: 'integer' as const, }, { prompt: 'What is the maximum connection pool size?', groundTruth: String(config.database.pool.max), answerType: 'integer' as const, }, { prompt: 'What is the session duration?', groundTruth: String(config.authentication.session.duration), answerType: 'integer' as const, }, { prompt: 'What is the minimum connection pool size?', groundTruth: String(config.database.pool.min), answerType: 'integer' as const, }, { prompt: 'What is the connection pool idle timeout?', groundTruth: String(config.database.pool.idleTimeout), answerType: 'integer' as const, }, { prompt: 'What is the database name?', groundTruth: config.database.name, answerType: 'string' as const, }, { prompt: 'What is the session refresh threshold?', groundTruth: String(config.authentication.session.refreshThreshold), answerType: 'integer' as const, }, { prompt: 'What is the version in the configuration?', groundTruth: config.version, answerType: 'string' as const, }, ] for (const q of fieldRetrievalQuestions.slice(0, QUESTION_LIMITS.nestedConfig.fieldRetrieval)) { questions.push( new QuestionBuilder() .id(getId()) .prompt(q.prompt) .groundTruth(q.groundTruth) .type('field-retrieval') .dataset('nested-config') .answerType(q.answerType) .build(), ) } // Aggregation: counts of nested structures const roleCount = Object.keys(config.permissions.roles).length const groupCount = Object.keys(config.permissions.groups).length const providerCount = config.authentication.providers.length const featureCount = Object.keys(config.features).length const replicaCount = config.database.replicas.length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many roles are defined in permissions?') .groundTruth(String(roleCount)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many groups are defined in permissions?') .groundTruth(String(groupCount)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many authentication providers are configured?') .groundTruth(String(providerCount)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many feature flags are defined?') .groundTruth(String(featureCount)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many database replicas are configured?') .groundTruth(String(replicaCount)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), ) // Aggregation: providers with admin scope const adminScopeProviderCount = config.authentication.providers.filter(p => p.scopes.includes('admin')).length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many authentication providers include the "admin" scope?') .groundTruth(String(adminScopeProviderCount)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), ) // Aggregation: feature flag details const enabledFeatures = Object.entries(config.features).filter(([_, f]) => f.enabled).length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many feature flags are enabled?') .groundTruth(String(enabledFeatures)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), ) // Aggregation: role permissions const adminPermissions = config.permissions.roles.admin?.permissions.length ?? 0 questions.push( new QuestionBuilder() .id(getId()) .prompt('How many permissions does the admin role have?') .groundTruth(String(adminPermissions)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), ) // Aggregation: additional nested counts const totalPermissions = Object.values(config.permissions.roles).reduce((sum, role) => sum + role.permissions.length, 0) const distinctPermissions = new Set(Object.values(config.permissions.roles).flatMap(r => r.permissions)).size const totalVariants = Object.values(config.features).reduce((sum, f) => sum + f.variants.length, 0) const highPriorityReplicas = config.database.replicas.filter(r => r.priority > 2).length const featuresWithHighRollout = Object.values(config.features).filter(f => f.rollout > 50).length const groupsWithMultipleRoles = Object.values(config.permissions.groups).filter(g => g.roles.length > 1).length questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the total number of permissions across all roles?') .groundTruth(String(totalPermissions)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many distinct permissions are defined across all roles?') .groundTruth(String(distinctPermissions)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the total number of variants across all feature flags?') .groundTruth(String(totalVariants)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many database replicas have a priority greater than 2?') .groundTruth(String(highPriorityReplicas)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many feature flags have a rollout percentage greater than 50?') .groundTruth(String(featuresWithHighRollout)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many groups have more than one role assigned?') .groundTruth(String(groupsWithMultipleRoles)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), ) // Filtering: complex multi-condition queries const filteringQuestions = [ { prompt: 'How many feature flags are enabled with rollout greater than 50%?', groundTruth: String(Object.entries(config.features) .filter(([_, f]) => f.enabled && f.rollout > 50).length), }, { prompt: 'How many groups have the admin role?', groundTruth: String(Object.entries(config.permissions.groups) .filter(([_, g]) => g.roles.includes('admin')).length), }, { prompt: 'How many database replicas have priority greater than 2 and port 5432?', groundTruth: String(config.database.replicas .filter(r => r.priority > 2 && r.port === 5432).length), }, { prompt: 'How many authentication providers have more than 2 scopes?', groundTruth: String(config.authentication.providers .filter(p => p.scopes.length > 2).length), }, { prompt: 'How many roles have at least 5 permissions?', groundTruth: String(Object.values(config.permissions.roles) .filter(r => r.permissions.length >= 5).length), }, { prompt: 'How many feature flags are disabled with rollout less than 25%?', groundTruth: String(Object.values(config.features) .filter(f => !f.enabled && f.rollout < 25).length), }, { prompt: 'How many enabled features have at least 2 variants?', groundTruth: String(Object.values(config.features) .filter(f => f.enabled && f.variants.length >= 2).length), }, ] for (const q of filteringQuestions.slice(0, QUESTION_LIMITS.nestedConfig.filteringComplex)) { questions.push( new QuestionBuilder() .id(getId()) .prompt(q.prompt) .groundTruth(q.groundTruth) .type('filtering') .dataset('nested-config') .answerType('integer') .build(), ) } return questions } ================================================ FILE: benchmarks/src/questions/nested.ts ================================================ import type { Order } from '../datasets.ts' import type { Question } from '../types.ts' import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants.ts' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils.ts' /** * Generate nested (orders) questions */ export function generateNestedQuestions(orders: Order[], getId: () => string): Question[] { const questions: Question[] = [] // Field retrieval: order totals and statuses const orderFieldGenerators: Array<(order: Order, getId: () => string) => Question> = [ (order, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the total for order ${order.orderId}?`) .groundTruth(String(order.total)) .type('field-retrieval') .dataset('nested') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), (order, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the status of order ${order.orderId}?`) .groundTruth(order.status) .type('field-retrieval') .dataset('nested') .answerType('string') .build(), ] questions.push(...rotateQuestions( orders, orderFieldGenerators, QUESTION_LIMITS.nested.fieldRetrievalOrders, SAMPLE_STRIDES.ORDER_FIELD, getId, )) // Field retrieval: customer info and order dates const customerFieldGenerators: Array<(order: Order, getId: () => string) => Question> = [ (order, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the customer name for order ${order.orderId}?`) .groundTruth(order.customer.name) .type('field-retrieval') .dataset('nested') .answerType('string') .build(), (order, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the customer email for order ${order.orderId}?`) .groundTruth(order.customer.email) .type('field-retrieval') .dataset('nested') .answerType('string') .build(), (order, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the order date for order ${order.orderId}?`) .groundTruth(order.orderDate || '') .type('field-retrieval') .dataset('nested') .answerType('string') .build(), (order, getId) => new QuestionBuilder() .id(getId()) .prompt(`How many items are in order ${order.orderId}?`) .groundTruth(String(order.items.length)) .type('field-retrieval') .dataset('nested') .answerType('integer') .build(), ] // Use stride + 1 for customer fields to offset from order fields const customerOrders = orders.map((_, i) => orders[i * SAMPLE_STRIDES.CUSTOMER_FIELD + 1] || orders[i]).filter(Boolean) as Order[] questions.push(...rotateQuestions( customerOrders, customerFieldGenerators, QUESTION_LIMITS.nested.fieldRetrievalCustomers, 1, getId, )) // Aggregation: totals and averages const totalRevenue = orders.reduce((sum, o) => sum + o.total, 0) const avgOrderValue = totalRevenue / orders.length const totalOrders = orders.length const maxOrderValue = Math.max(...orders.map(o => o.total)) // Count by status const statuses = [...new Set(orders.map(o => o.status))] for (const status of statuses.slice(0, QUESTION_LIMITS.nested.aggregationStatuses)) { const count = orders.filter(o => o.status === status).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many orders have status "${status}"?`) .groundTruth(String(count)) .type('aggregation') .dataset('nested') .answerType('integer') .build(), ) } questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the total revenue across all orders?') .groundTruth(String(totalRevenue.toFixed(2))) .type('aggregation') .dataset('nested') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), new QuestionBuilder() .id(getId()) .prompt('What is the average order value?') .groundTruth(String(avgOrderValue.toFixed(2))) .type('aggregation') .dataset('nested') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), new QuestionBuilder() .id(getId()) .prompt('How many orders are in the dataset?') .groundTruth(String(totalOrders)) .type('aggregation') .dataset('nested') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the highest order total?') .groundTruth(String(maxOrderValue.toFixed(2))) .type('aggregation') .dataset('nested') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), ) // Aggregation: high-value orders (single-condition filter) for (const threshold of QUESTION_THRESHOLDS.nested.highValueOrders) { const count = orders.filter(o => o.total > threshold).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many orders have a total greater than ${threshold}?`) .groundTruth(String(count)) .type('aggregation') .dataset('nested') .answerType('integer') .build(), ) } // Filtering: multi-condition queries (status AND value) const orderStatuses = [...new Set(orders.map(o => o.status))] for (const status of orderStatuses.slice(0, QUESTION_LIMITS.nested.filteringStatusAndValue)) { const count = orders.filter( o => o.status === status && o.total > QUESTION_THRESHOLDS.nested.statusValueThreshold, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many orders have status "${status}" and total greater than ${QUESTION_THRESHOLDS.nested.statusValueThreshold}?`) .groundTruth(String(count)) .type('filtering') .dataset('nested') .answerType('integer') .build(), ) } // Filtering: status AND items count (multi-condition) for (const status of orderStatuses.slice(0, QUESTION_LIMITS.nested.filteringStatusAndItems)) { const count = orders.filter( o => o.status === status && o.items.length >= QUESTION_THRESHOLDS.nested.itemCountThreshold, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many orders have status "${status}" and at least ${QUESTION_THRESHOLDS.nested.itemCountThreshold} items?`) .groundTruth(String(count)) .type('filtering') .dataset('nested') .answerType('integer') .build(), ) } // Filtering: total AND items count (multi-condition) for (const threshold of QUESTION_THRESHOLDS.nested.totalThresholdsForItems) { const count = orders.filter( o => o.total > threshold && o.items.length >= QUESTION_THRESHOLDS.nested.itemCountThreshold, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many orders have a total greater than ${threshold} and at least ${QUESTION_THRESHOLDS.nested.itemCountThreshold} items?`) .groundTruth(String(count)) .type('filtering') .dataset('nested') .answerType('integer') .build(), ) } return questions } ================================================ FILE: benchmarks/src/questions/structural-validation.ts ================================================ import type { Question } from '../types.ts' import { QuestionBuilder } from './utils.ts' /** * Generate structural validation questions for all incompleteness fixtures * * These questions test the ability to detect incomplete, truncated, or corrupted data * by validating structural metadata (TOON's [N] length declarations and {fields} headers). * * @remarks * - TOON's advantage: Explicit [N] and {fields} enable validation * - CSV disadvantage: No structural metadata to validate against * - JSON/YAML disadvantage: Require manual counting and schema inference */ export function generateStructuralValidationQuestions( getId: () => string, ): Question[] { const questions: Question[] = [] // Dataset names and their expected validity const validationFixtures = [ { dataset: 'structural-validation-control', isValid: true, description: 'Valid complete dataset (control)' }, { dataset: 'structural-validation-truncated', isValid: false, description: 'Array truncated: 3 rows removed from end' }, { dataset: 'structural-validation-extra-rows', isValid: false, description: 'Extra rows added beyond declared length' }, { dataset: 'structural-validation-width-mismatch', isValid: false, description: 'Inconsistent field count (missing salary in row 10)' }, { dataset: 'structural-validation-missing-fields', isValid: false, description: 'Missing required fields (no email in multiple rows)' }, ] as const // Generate one validation question per fixture for (const fixture of validationFixtures) { questions.push( new QuestionBuilder() .id(getId()) .prompt('Is this data complete and valid? Answer only YES or NO.') .groundTruth(fixture.isValid ? 'YES' : 'NO') .type('structural-validation') .dataset(fixture.dataset) .answerType('boolean') .build(), ) } return questions } ================================================ FILE: benchmarks/src/questions/structure.ts ================================================ import type { AnalyticsMetric, Employee, EventLog, Order, Repository } from '../datasets.ts' import type { Question } from '../types.ts' import { QuestionBuilder } from './utils.ts' /** * Generate structure-awareness questions across all datasets * * These questions test format-native structural affordances: * - TOON's explicit array length [N] and field declarations {fields} * - CSV's header row (but no explicit length) * - JSON/YAML have neither unless the model counts manually */ export function generateStructureQuestions( employees: Employee[], orders: Order[], metrics: AnalyticsMetric[], repos: Repository[], logs: EventLog[], getId: () => string, ): Question[] { const questions: Question[] = [] // ========== TABULAR DATASET (Employees) ========== // Count: Total employees (tests array length awareness) questions.push( new QuestionBuilder() .id(getId()) .prompt('How many employees are in the dataset?') .groundTruth(String(employees.length)) .type('structure-awareness') .dataset('tabular') .answerType('integer') .build(), ) // Field list: Employee fields (tests field name awareness) const employeeFields = 'id,name,email,department,salary,yearsExperience,active' questions.push( new QuestionBuilder() .id(getId()) .prompt('List the field names for employees (comma-separated, in order).') .groundTruth(employeeFields) .type('structure-awareness') .dataset('tabular') .answerType('csv-list-ordered') .build(), ) // Positional: Third field name for employees (tests TOON {fields} syntax) questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the 3rd field name for employees?') .groundTruth('email') .type('structure-awareness') .dataset('tabular') .answerType('string') .build(), ) // Last row: Last employee's department (tests ability to find last row using length) const lastEmployee = employees.at(-1)! questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the department of the last employee in the dataset?') .groundTruth(lastEmployee.department) .type('structure-awareness') .dataset('tabular') .answerType('string') .build(), ) // Last row: Last employee's name questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the name of the last employee in the dataset?') .groundTruth(lastEmployee.name) .type('structure-awareness') .dataset('tabular') .answerType('string') .build(), ) // Field count: How many fields per employee (tests schema awareness) questions.push( new QuestionBuilder() .id(getId()) .prompt('How many fields does each employee record have?') .groundTruth('7') .type('structure-awareness') .dataset('tabular') .answerType('integer') .build(), ) // ========== NESTED DATASET (Orders) ========== // Count: Total orders questions.push( new QuestionBuilder() .id(getId()) .prompt('How many orders are in the dataset?') .groundTruth(String(orders.length)) .type('structure-awareness') .dataset('nested') .answerType('integer') .build(), ) // Field list: Order fields const orderFields = 'orderId,customer,items,subtotal,tax,total,status,orderDate' questions.push( new QuestionBuilder() .id(getId()) .prompt('List the top-level field names for orders (comma-separated, in order).') .groundTruth(orderFields) .type('structure-awareness') .dataset('nested') .answerType('csv-list-ordered') .build(), ) // Nested count: Items in specific order const orderWithManyItems = orders.reduce((max, order) => order.items.length > max.items.length ? order : max, ) questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many items are in order ${orderWithManyItems.orderId}?`) .groundTruth(String(orderWithManyItems.items.length)) .type('structure-awareness') .dataset('nested') .answerType('integer') .build(), ) // Nested field list: Item fields const itemFields = 'sku,name,quantity,price' questions.push( new QuestionBuilder() .id(getId()) .prompt('What are the field names for items within orders (comma-separated, in order)?') .groundTruth(itemFields) .type('structure-awareness') .dataset('nested') .answerType('csv-list-ordered') .build(), ) // Last row: Last order's status const lastOrder = orders.at(-1)! questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the status of the last order in the dataset?') .groundTruth(lastOrder.status) .type('structure-awareness') .dataset('nested') .answerType('string') .build(), ) // Customer field list const customerFields = 'id,name,email,phone' questions.push( new QuestionBuilder() .id(getId()) .prompt('What are the field names for customer objects within orders (comma-separated, in order)?') .groundTruth(customerFields) .type('structure-awareness') .dataset('nested') .answerType('csv-list-ordered') .build(), ) // ========== ANALYTICS DATASET (Metrics) ========== // Count: Total metrics questions.push( new QuestionBuilder() .id(getId()) .prompt('How many metric records are in the dataset?') .groundTruth(String(metrics.length)) .type('structure-awareness') .dataset('analytics') .answerType('integer') .build(), ) // Field list: Metric fields const metricFields = 'date,views,clicks,conversions,revenue,bounceRate' questions.push( new QuestionBuilder() .id(getId()) .prompt('List the field names for metrics (comma-separated, in order).') .groundTruth(metricFields) .type('structure-awareness') .dataset('analytics') .answerType('csv-list-ordered') .build(), ) // Positional: Fifth field name for metrics (tests TOON {fields} syntax) questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the 5th field name for analytics metrics?') .groundTruth('revenue') .type('structure-awareness') .dataset('analytics') .answerType('string') .build(), ) // Last row: Last metric's date const lastMetric = metrics.at(-1)! questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the date of the last metric record in the dataset?') .groundTruth(lastMetric.date) .type('structure-awareness') .dataset('analytics') .answerType('string') .build(), ) // Field count: How many fields per metric questions.push( new QuestionBuilder() .id(getId()) .prompt('How many fields does each metric record have?') .groundTruth('6') .type('structure-awareness') .dataset('analytics') .answerType('integer') .build(), ) // ========== GITHUB DATASET (Repositories) ========== // Count: Total repositories questions.push( new QuestionBuilder() .id(getId()) .prompt('How many repositories are in the dataset?') .groundTruth(String(repos.length)) .type('structure-awareness') .dataset('github') .answerType('integer') .build(), ) // Field list: Repository fields const repoFields = 'id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt' questions.push( new QuestionBuilder() .id(getId()) .prompt('List the field names for repositories (comma-separated, in order).') .groundTruth(repoFields) .type('structure-awareness') .dataset('github') .answerType('csv-list-ordered') .build(), ) // Positional: Seventh field name for repos (tests TOON {fields} syntax) questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the 7th field name for GitHub repositories?') .groundTruth('forks') .type('structure-awareness') .dataset('github') .answerType('string') .build(), ) // Last row: Last repo's name const lastRepo = repos.at(-1)! questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the name of the last repository in the dataset?') .groundTruth(lastRepo.name) .type('structure-awareness') .dataset('github') .answerType('string') .build(), ) // Field count: How many fields per repository questions.push( new QuestionBuilder() .id(getId()) .prompt('How many fields does each repository record have?') .groundTruth('11') .type('structure-awareness') .dataset('github') .answerType('integer') .build(), ) // ========== EVENT LOGS DATASET ========== // Count: Total logs questions.push( new QuestionBuilder() .id(getId()) .prompt('How many log entries are in the dataset?') .groundTruth(String(logs.length)) .type('structure-awareness') .dataset('event-logs') .answerType('integer') .build(), ) // Field list: Base log fields (including optional error) const logFields = 'timestamp,level,endpoint,statusCode,responseTime,userId,error' questions.push( new QuestionBuilder() .id(getId()) .prompt('List the field names for log entries (comma-separated, any order, including optional fields).') .groundTruth(logFields) .type('structure-awareness') .dataset('event-logs') .answerType('csv-list-unordered') .build(), ) // Last row: Last log's level const lastLog = logs.at(-1)! questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the level of the last log entry in the dataset?') .groundTruth(lastLog.level) .type('structure-awareness') .dataset('event-logs') .answerType('string') .build(), ) return questions } ================================================ FILE: benchmarks/src/questions/tabular.ts ================================================ import type { Employee } from '../datasets.ts' import type { Question } from '../types.ts' import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants.ts' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils.ts' /** * Generate tabular (employee) questions */ export function generateTabularQuestions(employees: Employee[], getId: () => string): Question[] { const questions: Question[] = [] // Field retrieval: specific employees const fieldGenerators: Array<(emp: Employee, getId: () => string) => Question> = [ (emp, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the salary of ${emp.name}?`) .groundTruth(String(emp.salary)) .type('field-retrieval') .dataset('tabular') .answerType('integer') .build(), (emp, getId) => new QuestionBuilder() .id(getId()) .prompt(`What department does ${emp.name} work in?`) .groundTruth(emp.department) .type('field-retrieval') .dataset('tabular') .answerType('string') .build(), (emp, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the email address of ${emp.name}?`) .groundTruth(emp.email) .type('field-retrieval') .dataset('tabular') .answerType('string') .build(), (emp, getId) => new QuestionBuilder() .id(getId()) .prompt(`How many years of experience does ${emp.name} have?`) .groundTruth(String(emp.yearsExperience)) .type('field-retrieval') .dataset('tabular') .answerType('integer') .build(), (emp, getId) => new QuestionBuilder() .id(getId()) .prompt(`Is ${emp.name} an active employee?`) .groundTruth(emp.active ? 'yes' : 'no') .type('field-retrieval') .dataset('tabular') .answerType('boolean') .build(), ] questions.push(...rotateQuestions( employees, fieldGenerators, QUESTION_LIMITS.tabular.fieldRetrieval, SAMPLE_STRIDES.EMPLOYEE_FIELD, getId, )) // Aggregation: count by department const departments = [...new Set(employees.map(e => e.department))] for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.aggregationDepartments)) { const count = employees.filter(e => e.department === dept).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many employees work in ${dept}?`) .groundTruth(String(count)) .type('aggregation') .dataset('tabular') .answerType('integer') .build(), ) } // Aggregation: salary ranges (single-condition filters) for (const threshold of QUESTION_THRESHOLDS.tabular.salaryRanges) { const count = employees.filter(e => e.salary > threshold).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many employees have a salary greater than ${threshold}?`) .groundTruth(String(count)) .type('aggregation') .dataset('tabular') .answerType('integer') .build(), ) } // Aggregation: totals and averages const totalEmployees = employees.length const avgSalary = Math.round(employees.reduce((sum, e) => sum + e.salary, 0) / totalEmployees) const activeCount = employees.filter(e => e.active).length const inactiveCount = employees.filter(e => !e.active).length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many employees are in the dataset?') .groundTruth(String(totalEmployees)) .type('aggregation') .dataset('tabular') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the average salary across all employees?') .groundTruth(String(avgSalary)) .type('aggregation') .dataset('tabular') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many employees are active?') .groundTruth(String(activeCount)) .type('aggregation') .dataset('tabular') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many employees are inactive?') .groundTruth(String(inactiveCount)) .type('aggregation') .dataset('tabular') .answerType('integer') .build(), ) // Filtering: count by department with salary filter (multi-condition) for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringMultiConditionDepartments)) { const count = employees.filter( e => e.department === dept && e.salary > QUESTION_THRESHOLDS.tabular.departmentSalaryThreshold, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many employees in ${dept} have a salary greater than ${QUESTION_THRESHOLDS.tabular.departmentSalaryThreshold}?`) .groundTruth(String(count)) .type('filtering') .dataset('tabular') .answerType('integer') .build(), ) } // Filtering: active employees by experience (multi-condition) for (const exp of QUESTION_THRESHOLDS.tabular.experienceYears.slice(0, QUESTION_LIMITS.tabular.filteringExperience)) { const count = employees.filter(e => e.yearsExperience > exp && e.active).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many active employees have more than ${exp} years of experience?`) .groundTruth(String(count)) .type('filtering') .dataset('tabular') .answerType('integer') .build(), ) } // Filtering: department by experience (multi-condition) for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringDepartmentExp)) { const count = employees.filter( e => e.department === dept && e.yearsExperience > QUESTION_THRESHOLDS.tabular.departmentExperienceThreshold, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many employees in ${dept} have more than ${QUESTION_THRESHOLDS.tabular.departmentExperienceThreshold} years of experience?`) .groundTruth(String(count)) .type('filtering') .dataset('tabular') .answerType('integer') .build(), ) } // Filtering: department by active status (multi-condition) for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringDepartmentActive)) { const count = employees.filter(e => e.department === dept && e.active).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many active employees work in ${dept}?`) .groundTruth(String(count)) .type('filtering') .dataset('tabular') .answerType('integer') .build(), ) } return questions } ================================================ FILE: benchmarks/src/questions/utils.ts ================================================ import type { AnswerType, NormalizationOptions } from '../normalize.ts' import type { Question } from '../types.ts' // Constants for sampling strides export const SAMPLE_STRIDES = { EMPLOYEE_FIELD: 2, ORDER_FIELD: 2, CUSTOMER_FIELD: 2, ANALYTICS_FIELD: 3, METRIC_FIELD: 3, REPO_FIELD: 7, EVENT_LOG_FIELD: 5, } as const /** * ID Generator */ export function* createIdGenerator(): Generator { let id = 1 while (true) { yield `q${id++}` } } /** * Question Builder class for fluent question creation */ export class QuestionBuilder { private question: Partial = {} id(id: string): this { this.question.id = id return this } prompt(prompt: string): this { this.question.prompt = prompt return this } groundTruth(groundTruth: string): this { this.question.groundTruth = groundTruth return this } type(type: Question['type']): this { this.question.type = type return this } dataset(dataset: Question['dataset']): this { this.question.dataset = dataset return this } answerType(kind: AnswerType): this { this.question.answerType = kind return this } normalize(options: Partial): this { this.question.normalizationOptions = options return this } build(): Question { if (!this.question.id || !this.question.prompt || !this.question.groundTruth || !this.question.type || !this.question.dataset) { throw new Error('Incomplete question') } return this.question as Question } } /** * Rotate through question generators */ export function rotateQuestions( items: T[], generators: ((item: T, getId: () => string) => Question)[], limit: number, stride: number, getId: () => string, ): Question[] { const questions: Question[] = [] for (let i = 0; i < Math.min(limit, items.length); i++) { const item = items[i * stride] || items[i] if (!item) continue const generatorIndex = i % generators.length const generator = generators[generatorIndex] if (generator) { questions.push(generator(item, getId)) } } return questions } ================================================ FILE: benchmarks/src/report.ts ================================================ import type { Dataset, EfficiencyRanking, EvaluationResult, FormatResult, Question } from './types.ts' import { FORMATTER_DISPLAY_NAMES, QUESTION_TYPE_LABELS, QUESTION_TYPES } from './constants.ts' import { ACCURACY_DATASETS } from './datasets.ts' import { models, PRIMERS } from './evaluate.ts' import { supportsCSV } from './formatters.ts' import { generateQuestions } from './questions/index.ts' import { createProgressBar, tokenize } from './utils.ts' const EFFICIENCY_CHART_STYLE: 'vertical' | 'horizontal' = 'horizontal' /** * Calculate token counts for all format+dataset combinations * * @remarks * Includes primer tokens for fairer comparison across formats */ export function calculateTokenCounts( formatters: Record string>, ): Record { const tokenCounts: Record = {} for (const [formatName, formatter] of Object.entries(formatters)) { for (const dataset of ACCURACY_DATASETS) { // Skip CSV for datasets that don't support it if (formatName === 'csv' && !supportsCSV(dataset)) continue const formattedData = formatter(dataset.data) const primer = PRIMERS[formatName] ?? '' // Include primer in token count for fair comparison const fullPrompt = primer ? `${primer}\n\n${formattedData}` : formattedData const key = `${formatName}-${dataset.name}` tokenCounts[key] = tokenize(fullPrompt) } } return tokenCounts } /** * Calculate per-format statistics from evaluation results */ export function calculateFormatResults( results: EvaluationResult[], tokenCounts: Record, ): FormatResult[] { const formatNames = [...new Set(results.map(r => r.format))] return formatNames.map((formatName) => { const formatResults = results.filter(r => r.format === formatName) const correctCount = formatResults.filter(r => r.isCorrect).length const totalCount = formatResults.length const accuracy = correctCount / totalCount // Calculate average tokens across all datasets for this format const formatTokenEntries = Object.entries(tokenCounts) .filter(([key]) => key.startsWith(`${formatName}-`)) const avgTokens = formatTokenEntries.reduce((sum, [, tokens]) => sum + tokens, 0) / formatTokenEntries.length const averageLatency = formatResults.reduce((sum, r) => sum + r.latencyMs, 0) / totalCount return { format: formatName, accuracy, totalTokens: Math.round(avgTokens), averageLatency: Math.round(averageLatency), correctCount, totalCount, } }).sort((a, b) => b.accuracy - a.accuracy) } /** * Generate consolidated retrieval accuracy report */ export function generateAccuracyReport( results: EvaluationResult[], formatResults: FormatResult[], tokenCounts: Record, ): string { const questions = generateQuestions() const totalQuestions = [...new Set(results.map(r => r.questionId))].length const modelIds = models.map(m => m.modelId) const modelNames = modelIds.filter(id => results.some(r => r.model === id)) return ` Benchmarks test LLM comprehension across different input formats using ${totalQuestions} data retrieval questions on ${modelNames.length} ${modelNames.length === 1 ? 'model' : 'models'}. Show Dataset Catalog ${generateDatasetCatalog(ACCURACY_DATASETS)} #### Efficiency Ranking (Accuracy per 1K Tokens) ${generateEfficiencyRankingReport(formatResults, totalQuestions, modelNames.length)} #### Per-Model Accuracy ${generateDetailedAccuracyReport(formatResults, results, questions, tokenCounts)} `.trimStart() } /** * Generate dataset catalog section */ function generateDatasetCatalog(datasets: Dataset[]): string { const rows = datasets.map((dataset) => { const csvSupport = supportsCSV(dataset) ? '✓' : '✗' const rowCount = Object.values(dataset.data)[0]?.length ?? 1 const structure = dataset.metadata.structureClass const eligibility = `${dataset.metadata.tabularEligibility}%` return `| ${dataset.description} | ${rowCount} | ${structure} | ${csvSupport} | ${eligibility} |` }).join('\n') return ` #### Dataset Catalog | Dataset | Rows | Structure | CSV Support | Eligibility | | ------- | ---- | --------- | ----------- | ----------- | ${rows} **Structure classes:** - **uniform**: All objects have identical fields with primitive values - **semi-uniform**: Mix of uniform and non-uniform structures - **nested**: Objects with nested structures (nested objects or arrays) - **deep**: Highly nested with minimal tabular eligibility **CSV Support:** ✓ (supported), ✗ (not supported – would require lossy flattening) **Eligibility:** Percentage of arrays that qualify for TOON's tabular format (uniform objects with primitive values) `.trim() } /** * Generate efficiency ranking report */ function generateEfficiencyRankingReport( formatResults: FormatResult[], totalQuestions: number, modelCount: number, ): string { const toon = formatResults.find(r => r.format === 'toon') const json = formatResults.find(r => r.format === 'json-pretty') const csv = formatResults.find(r => r.format === 'csv') // Build efficiency ranking (accuracy per 1k tokens) const efficiencyRanking = formatResults // Exclude CSV since it only supports a subset of datasets (~half the questions) .filter(fr => fr.format !== 'csv') .map((fr) => { const efficiency = (fr.accuracy * 100) / (fr.totalTokens / 1000) return { format: fr.format, efficiency, accuracy: fr.accuracy, tokens: fr.totalTokens, } }) .sort((a, b) => b.efficiency - a.efficiency) const efficiencyChart = EFFICIENCY_CHART_STYLE === 'vertical' ? generateVerticalEfficiencyChart(efficiencyRanking) : generateHorizontalEfficiencyChart(efficiencyRanking) // Build summary text let summary = '' if (toon && json) { const toonVsJson = `**${(toon.accuracy * 100).toFixed(1)}%** accuracy (vs JSON's ${(json.accuracy * 100).toFixed(1)}%)` const tokenSavings = `**${((1 - toon.totalTokens / json.totalTokens) * 100).toFixed(1)}% fewer tokens**` summary = `TOON achieves ${toonVsJson} while using ${tokenSavings}.` } // Add CSV note if available let csvNote = '' if (csv) { // CSV totalCount is evaluations (questions × models), so divide by number of models to get question count const csvQuestionCount = csv.totalCount / modelCount csvNote = `**Note on CSV:** Excluded from ranking as it only supports ${csvQuestionCount} of ${totalQuestions} questions (flat tabular data only). While CSV is highly token-efficient for simple tabular data, it cannot represent nested structures that other formats handle.` } return ` Each format ranked by efficiency (accuracy percentage per 1,000 tokens): \`\`\` ${efficiencyChart} \`\`\` *Efficiency score = (Accuracy % ÷ Tokens) × 1,000. Higher is better.* > [!TIP] > ${summary} ${csvNote} `.trim() } /** * Generate detailed accuracy report with breakdowns and methodology */ function generateDetailedAccuracyReport( formatResults: FormatResult[], results: EvaluationResult[], questions: Question[], tokenCounts: Record, ): string { const toon = formatResults.find(r => r.format === 'toon') const json = formatResults.find(r => r.format === 'json-pretty') const modelIds = models.map(m => m.modelId) const modelNames = modelIds.filter(id => results.some(r => r.model === id)) // Generate model breakdown section const modelBreakdown = generateModelBreakdown(formatResults, results, modelNames) // Generate summary comparison const summaryComparison = generateSummaryComparison(toon, json) // Generate performance by dataset const datasetBreakdown = generateDatasetBreakdown(formatResults, results, questions, tokenCounts) // Generate performance by model const modelPerformance = generateModelPerformanceTable(formatResults, results, modelNames) // Generate question type breakdown const questionTypeBreakdown = generateQuestionTypeBreakdown(formatResults, results, questions) const totalQuestions = [...new Set(results.map(r => r.questionId))].length // Calculate question type distribution const fieldRetrievalCount = questions.filter(q => q.type === 'field-retrieval').length const aggregationCount = questions.filter(q => q.type === 'aggregation').length const filteringCount = questions.filter(q => q.type === 'filtering').length const structureAwarenessCount = questions.filter(q => q.type === 'structure-awareness').length const structuralValidationCount = questions.filter(q => q.type === 'structural-validation').length const fieldRetrievalPercent = ((fieldRetrievalCount / totalQuestions) * 100).toFixed(0) const aggregationPercent = ((aggregationCount / totalQuestions) * 100).toFixed(0) const filteringPercent = ((filteringCount / totalQuestions) * 100).toFixed(0) const structureAwarenessPercent = ((structureAwarenessCount / totalQuestions) * 100).toFixed(0) const structuralValidationPercent = ((structuralValidationCount / totalQuestions) * 100).toFixed(0) // Calculate dataset sizes const tabularSize = ACCURACY_DATASETS.find(d => d.name === 'tabular')?.data.employees?.length || 0 const nestedSize = ACCURACY_DATASETS.find(d => d.name === 'nested')?.data.orders?.length || 0 const analyticsSize = ACCURACY_DATASETS.find(d => d.name === 'analytics')?.data.metrics?.length || 0 const githubSize = ACCURACY_DATASETS.find(d => d.name === 'github')?.data.repositories?.length || 0 const eventLogsSize = ACCURACY_DATASETS.find(d => d.name === 'event-logs')?.data.logs?.length || 0 const nestedConfigSize = 1 // Single config object // Calculate number of formats and evaluations const formatCount = formatResults.length const totalEvaluations = totalQuestions * formatCount * modelNames.length return ` Accuracy across ${modelNames.length} ${modelNames.length === 1 ? 'LLM' : 'LLMs'} on ${totalQuestions} data retrieval questions: \`\`\` ${modelBreakdown} \`\`\` ${summaryComparison} Performance by dataset, model, and question type #### Performance by Question Type ${questionTypeBreakdown} #### Performance by Dataset ${datasetBreakdown} #### Performance by Model ${modelPerformance} #### What's Being Measured This benchmark tests **LLM comprehension and data retrieval accuracy** across different input formats. Each LLM receives formatted data and must answer questions about it. This does **not** test the model's ability to generate TOON output – only to read and understand it. #### Datasets Tested Eleven datasets designed to test different structural patterns and validation capabilities: **Primary datasets:** 1. **Tabular** (${tabularSize} employee records): Uniform objects with identical fields – optimal for TOON's tabular format. 2. **Nested** (${nestedSize} e-commerce orders): Complex structures with nested customer objects and item arrays. 3. **Analytics** (${analyticsSize} days of metrics): Time-series data with dates and numeric values. 4. **GitHub** (${githubSize} repositories): Real-world data from top GitHub repos by stars. 5. **Event Logs** (${eventLogsSize} logs): Semi-uniform data with ~50% flat logs and ~50% with nested error objects. 6. **Nested Config** (${nestedConfigSize} configuration): Deeply nested configuration with minimal tabular eligibility. **Structural validation datasets:** 7. **Control**: Valid complete dataset (baseline for validation) 8. **Truncated**: Array with 3 rows removed from end (tests \`[N]\` length detection) 9. **Extra rows**: Array with 3 additional rows beyond declared length 10. **Width mismatch**: Inconsistent field count (missing salary in row 10) 11. **Missing fields**: Systematic field omissions (no email in multiple rows) #### Question Types ${totalQuestions} questions are generated dynamically across five categories: - **Field retrieval (${fieldRetrievalPercent}%)**: Direct value lookups or values that can be read straight off a record (including booleans and simple counts such as array lengths) - Example: "What is Alice's salary?" → \`75000\` - Example: "How many items are in order ORD-0042?" → \`3\` - Example: "What is the customer name for order ORD-0042?" → \`John Doe\` - **Aggregation (${aggregationPercent}%)**: Dataset-level totals and averages plus single-condition filters (counts, sums, min/max comparisons) - Example: "How many employees work in Engineering?" → \`17\` - Example: "What is the total revenue across all orders?" → \`45123.50\` - Example: "How many employees have salary > 80000?" → \`23\` - **Filtering (${filteringPercent}%)**: Multi-condition queries requiring compound logic (AND constraints across fields) - Example: "How many employees in Sales have salary > 80000?" → \`5\` - Example: "How many active employees have more than 10 years of experience?" → \`8\` - **Structure awareness (${structureAwarenessPercent}%)**: Tests format-native structural affordances (TOON's \`[N]\` count and \`{fields}\`, CSV's header row) - Example: "How many employees are in the dataset?" → \`100\` - Example: "List the field names for employees" → \`id, name, email, department, salary, yearsExperience, active\` - Example: "What is the department of the last employee?" → \`Sales\` - **Structural validation (${structuralValidationPercent}%)**: Tests ability to detect incomplete, truncated, or corrupted data using structural metadata - Example: "Is this data complete and valid?" → \`YES\` (control dataset) or \`NO\` (corrupted datasets) - Tests TOON's \`[N]\` length validation and \`{fields}\` consistency checking - Demonstrates CSV's lack of structural validation capabilities #### Evaluation Process 1. **Format conversion**: Each dataset is converted to all ${formatCount} formats (${formatResults.map(f => FORMATTER_DISPLAY_NAMES[f.format] || f.format).join(', ')}). 2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer. 3. **Validate deterministically**: Answers are validated using type-aware comparison (e.g., \`50000\` = \`$50,000\`, \`Engineering\` = \`engineering\`, \`2025-01-01\` = \`January 1, 2025\`) without requiring an LLM judge. #### Models & Configuration - **Models tested**: ${modelNames.map(m => `\`${m}\``).join(', ')} - **Token counting**: Using \`gpt-tokenizer\` with \`o200k_base\` encoding (GPT-5 tokenizer) - **Temperature**: Not set (models use their defaults) - **Total evaluations**: ${totalQuestions} questions × ${formatCount} formats × ${modelNames.length} models = ${totalEvaluations.toLocaleString('en-US')} LLM calls `.trim() } /** * Generate ASCII bar chart showing per-model accuracy across formats */ function generateModelBreakdown( formatResults: FormatResult[], results: EvaluationResult[], modelNames: string[], ): string { const maxDisplayNameWidth = Math.max( ...Object.values(FORMATTER_DISPLAY_NAMES).map(name => name.length), ) const progressBarWidth = 20 return modelNames.map((modelName, i) => { const modelResults = formatResults.map((fr) => { const modelFormatResults = results.filter(r => r.model === modelName && r.format === fr.format) const correctCount = modelFormatResults.filter(r => r.isCorrect).length const totalCount = modelFormatResults.length const accuracy = totalCount > 0 ? correctCount / totalCount : 0 return { format: fr.format, accuracy, correctCount, totalCount, } }).sort((a, b) => b.accuracy - a.accuracy) const formatLines = modelResults.map((result) => { const bar = createProgressBar(result.accuracy, 1, progressBarWidth) const accuracyString = `${(result.accuracy * 100).toFixed(1)}%`.padStart(6) const countString = `(${result.correctCount}/${result.totalCount})` const prefix = result.format === 'toon' ? '→ ' : ' ' const displayName = FORMATTER_DISPLAY_NAMES[result.format] || result.format return `${prefix}${displayName.padEnd(maxDisplayNameWidth)} ${bar} ${accuracyString} ${countString}` }).join('\n') // Add blank line before model name, except for first model return `${i > 0 ? '\n' : ''}${modelName}\n${formatLines}` }).join('\n') } /** * Generate summary comparison between TOON and JSON formats */ function generateSummaryComparison( toon: FormatResult | undefined, json: FormatResult | undefined, ): string { if (!toon || !json) return '' return ` > [!TIP] > TOON achieves **${(toon.accuracy * 100).toFixed(1)}% accuracy** (vs JSON's ${(json.accuracy * 100).toFixed(1)}%) while using **${((1 - toon.totalTokens / json.totalTokens) * 100).toFixed(1)}% fewer tokens** on these datasets. `.trim() } /** * Generate per-dataset performance breakdown tables */ function generateDatasetBreakdown( formatResults: FormatResult[], results: EvaluationResult[], questions: Question[], tokenCounts: Record, ): string { // Build question ID to dataset mapping for O(1) lookups const questionDatasetMap = new Map(questions.map(q => [q.id, q.dataset])) return ACCURACY_DATASETS.map((dataset) => { const datasetResults = formatResults.map((fr) => { const datasetFormatResults = results.filter(r => questionDatasetMap.get(r.questionId) === dataset.name) if (datasetFormatResults.length === 0) return undefined const formatDatasetResults = datasetFormatResults.filter(r => r.format === fr.format) if (formatDatasetResults.length === 0) return undefined const correctCount = formatDatasetResults.filter(r => r.isCorrect).length const totalCount = formatDatasetResults.length const accuracy = totalCount > 0 ? correctCount / totalCount : 0 // Get token count for this dataset+format const tokenKey = `${fr.format}-${dataset.name}` const tokens = tokenCounts[tokenKey] || fr.totalTokens return { format: fr.format, accuracy, tokens, correctCount, totalCount, } }).filter(Boolean) as { format: string, accuracy: number, tokens: number, correctCount: number, totalCount: number }[] if (datasetResults.length === 0) return '' // Sort by efficiency datasetResults.sort((a, b) => { const effA = (a.accuracy ** 2) / (a.tokens / 1000) const effB = (b.accuracy ** 2) / (b.tokens / 1000) return effB - effA }) const tableRows = datasetResults.slice(0, 6).map(result => `| \`${result.format}\` | ${(result.accuracy * 100).toFixed(1)}% | ${result.tokens.toLocaleString('en-US')} | ${result.correctCount}/${result.totalCount} |`, ).join('\n') return ` ##### ${dataset.description} | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | ${tableRows} `.trimStart() }).filter(Boolean).join('\n').trim() } /** * Generate question type breakdown table */ function generateQuestionTypeBreakdown( formatResults: FormatResult[], results: EvaluationResult[], questions: Question[], ): string { // Build header const formatNames = formatResults.map(fr => FORMATTER_DISPLAY_NAMES[fr.format] || fr.format) const header = `| Question Type | ${formatNames.join(' | ')} |` const separator = `| ------------- | ${formatNames.map(() => '----').join(' | ')} |` // Build rows const rows = QUESTION_TYPES.map((type) => { const questionIds = questions.filter(q => q.type === type).map(q => q.id) const typeResults = results.filter(r => questionIds.includes(r.questionId)) if (typeResults.length === 0) return undefined const accuracies = formatResults.map((fr) => { const formatTypeResults = typeResults.filter(r => r.format === fr.format) if (formatTypeResults.length === 0) return 'N/A' const correctCount = formatTypeResults.filter(r => r.isCorrect).length const totalCount = formatTypeResults.length const accuracy = totalCount > 0 ? correctCount / totalCount : 0 return `${(accuracy * 100).toFixed(1)}%` }) return `| ${QUESTION_TYPE_LABELS[type]} | ${accuracies.join(' | ')} |` }).filter(Boolean) return ` ${header} ${separator} ${rows.join('\n')} `.trim() } /** * Generate per-model performance comparison tables */ function generateModelPerformanceTable( formatResults: FormatResult[], results: EvaluationResult[], modelNames: string[], ): string { return modelNames.map((modelName) => { const modelResults = formatResults.map((fr) => { const modelFormatResults = results.filter(r => r.model === modelName && r.format === fr.format) const correctCount = modelFormatResults.filter(r => r.isCorrect).length const totalCount = modelFormatResults.length const accuracy = correctCount / totalCount return { format: fr.format, accuracy, correctCount, totalCount, } }).sort((a, b) => b.accuracy - a.accuracy) const tableRows = modelResults.map(result => `| \`${result.format}\` | ${(result.accuracy * 100).toFixed(1)}% | ${result.correctCount}/${result.totalCount} |`, ).join('\n') return ` ##### ${modelName} | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | ${tableRows} `.trimStart() }).join('\n').trim() } /** * Generate horizontal bar chart for efficiency ranking */ function generateHorizontalEfficiencyChart( ranking: EfficiencyRanking[], ): string { const barWidth = 20 const maxEfficiency = Math.max(...ranking.map(r => r.efficiency)) const maxFormatWidth = Math.max(...ranking.map((r) => { const displayName = FORMATTER_DISPLAY_NAMES[r.format] || r.format return displayName.length })) return ranking .map((r) => { const normalizedValue = r.efficiency / maxEfficiency const bar = createProgressBar(normalizedValue, 1, barWidth) const displayName = FORMATTER_DISPLAY_NAMES[r.format] || r.format const formatName = displayName.padEnd(maxFormatWidth) const efficiency = r.efficiency.toFixed(1).padStart(4) const accuracy = `${(r.accuracy * 100).toFixed(1)}%`.padStart(5) const tokens = r.tokens.toLocaleString('en-US').padStart(5) return `${formatName} ${bar} ${efficiency} acc%/1K tok │ ${accuracy} acc │ ${tokens} tokens` }) .join('\n') } /** * Generate vertical bar chart for efficiency ranking */ function generateVerticalEfficiencyChart( ranking: EfficiencyRanking[], ): string { const maxEfficiency = Math.max(...ranking.map(r => r.efficiency)) const chartHeight = 8 // Generate rows from top to bottom const rows: string[] = [] // Y-axis and bars for (let i = chartHeight; i >= 0; i--) { const threshold = (i / chartHeight) * maxEfficiency const yLabel = i === chartHeight || i === Math.floor(chartHeight / 2) || i === 0 ? Math.round(threshold).toString().padStart(4) : ' ' const bars = ranking .map((r) => { const barHeight = (r.efficiency / maxEfficiency) * chartHeight let char = ' ' if (barHeight >= i) { // Use different characters for visual distinction if (ranking.indexOf(r) === 0) char = '▓' // Top format else if (ranking.indexOf(r) <= 2) char = '▒' // Top 3 else char = '░' // Rest } return char }) .join(' ') rows.push(`${yLabel}│ ${bars}`) } // X-axis const axis = ` └──${ranking.map(() => '┴').join('────')}──` rows.push(axis) // Format labels (split long names into multiple rows) const formatRow1 = ranking .map((r) => { const parts = r.format.split('-') return (parts[0] || '').padEnd(5).substring(0, 5) }) .join('') rows.push(` ${formatRow1}`) const formatRow2 = ranking .map((r) => { const parts = r.format.split('-') return (parts[1] || '').padEnd(5).substring(0, 5) }) .join('') if (formatRow2.trim()) rows.push(` ${formatRow2}`) return rows.join('\n') } ================================================ FILE: benchmarks/src/storage.ts ================================================ import type { Storage, StorageValue } from 'unstorage' import type { EvaluationResult } from './types.ts' import * as path from 'node:path' import { createStorage } from 'unstorage' import fsDriver from 'unstorage/drivers/fs' import { BENCHMARKS_DIR } from './constants.ts' /** * Storage instance for model results * * @remarks * Stores results in: `benchmarks/results/accuracy/models/` */ export const resultsStorage: Storage = createStorage({ driver: fsDriver({ base: path.join(BENCHMARKS_DIR, 'results', 'accuracy', 'models'), }), }) export async function loadModelResults(modelId: string): Promise { const data = await resultsStorage.getItem(modelId) return data ?? undefined } export async function saveModelResults(modelId: string, results: EvaluationResult[]): Promise { await resultsStorage.setItem(modelId, results) } export async function getAllModelResults(): Promise> { const keys = await resultsStorage.getKeys() const results: Record = {} await Promise.all( keys.map(async (modelId) => { const data = await resultsStorage.getItem(modelId) if (data) results[modelId] = data }), ) return results } export async function hasModelResults(modelId: string): Promise { return await resultsStorage.hasItem(modelId) } ================================================ FILE: benchmarks/src/types.ts ================================================ import type { DATASET_NAMES, QUESTION_TYPES, STRUCTURE_CLASSES } from './constants.ts' import type { AnswerType, NormalizationOptions } from './normalize.ts' export type QuestionType = typeof QUESTION_TYPES[number] export type DatasetName = typeof DATASET_NAMES[number] export type StructureClass = typeof STRUCTURE_CLASSES[number] export interface DatasetMetadata { supportsCSV: boolean structureClass: StructureClass tabularEligibility: number } export interface Dataset { name: DatasetName description: string data: Record metadata: DatasetMetadata } export interface Question { id: string prompt: string groundTruth: string type: QuestionType dataset: DatasetName /** * Expected answer kind for deterministic comparison. * @default 'string' */ answerType?: AnswerType /** * Options for answer normalization and comparison. */ normalizationOptions?: Partial } export interface EvaluationResult { questionId: string format: string model: string expected: string actual: string isCorrect: boolean inputTokens?: number outputTokens?: number latencyMs: number } export interface FormatResult { format: string accuracy: number totalTokens: number averageLatency: number correctCount: number totalCount: number } export interface EfficiencyRanking { format: string efficiency: number accuracy: number tokens: number } ================================================ FILE: benchmarks/src/utils.ts ================================================ import * as fsp from 'node:fs/promises' import { encode } from 'gpt-tokenizer' /** * Generate visual progress bar using ASCII characters * * @param value - Current value * @param max - Maximum value * @param width - Width of the bar in characters (default: 25) * @param chars - Characters to use for filled and empty sections * @param chars.filled - Character for filled portion (default: '█') * @param chars.empty - Character for empty portion (default: '░') * @returns ASCII progress bar string * * @example * createProgressBar(75, 100, 20) // "███████████████░░░░░" * createProgressBar(0.5, 1, 10) // "█████░░░░░" * createProgressBar(0.75, 1, 20, { filled: '▓', empty: '░' }) // "▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓░░░░░" */ export function createProgressBar( value: number, max: number, width = 25, chars: { filled: string, empty: string } = { filled: '█', empty: '░' }, ): string { const filled = Math.round((value / max) * width) const empty = width - filled return chars.filled.repeat(filled) + chars.empty.repeat(empty) } /** * Count tokens in text using gpt-tokenizer (o200k_base encoding) * * @param text - Text to tokenize * @returns Number of tokens * * @example * tokenize("Hello, world!") // 4 */ export function tokenize(text: string): number { return encode(text).length } /** * Ensure a directory exists, creating it recursively if needed * * @param dirPath - Directory path to ensure exists */ export async function ensureDir(dirPath: string): Promise { await fsp.mkdir(dirPath, { recursive: true }) } ================================================ FILE: commitlint.config.ts ================================================ import type { Rule, UserConfig } from '@commitlint/types' import { RuleConfigSeverity } from '@commitlint/types' // #region Rules /** * Rule to ensure the first letter of the commit subject is lowercase. * * @param parsed - Parsed commit object containing commit message parts. * @returns A tuple where the first element is a boolean indicating * if the rule passed, and the second is an optional error message. */ const subjectLowercaseFirst: Rule = async (parsed) => { const firstChar = parsed.subject!.match(/[a-z]/i)?.[0] if (firstChar && firstChar === firstChar.toUpperCase()) { return [false, 'Subject must start with a lowercase letter'] } return [true] } // #endregion const Configuration: UserConfig = { extends: ['@commitlint/config-conventional'], rules: { 'subject-case': [RuleConfigSeverity.Disabled], 'subject-lowercase-first': [RuleConfigSeverity.Error, 'always'], }, plugins: [ { rules: { 'subject-lowercase-first': subjectLowercaseFirst, }, }, ], } export default Configuration ================================================ FILE: docs/.vitepress/config.ts ================================================ import type { DefaultTheme } from 'vitepress' import UnoCSS from 'unocss/vite' import { defineConfig } from 'vitepress' import llmstxt, { copyOrDownloadAsMarkdownButtons } from 'vitepress-plugin-llms' import { description, github, name, ogImage, ogUrl, releases, twitterImage, version } from './meta' export default defineConfig({ title: name, description, head: [ ['link', { rel: 'icon', href: '/favicon.svg', type: 'image/svg+xml' }], ['meta', { name: 'author', content: 'Johann Schopplich' }], ['meta', { property: 'og:type', content: 'website' }], ['meta', { property: 'og:url', content: ogUrl }], ['meta', { property: 'og:title', content: name }], ['meta', { property: 'og:description', content: description }], ['meta', { property: 'og:image', content: ogImage }], ['meta', { name: 'twitter:title', content: name }], ['meta', { name: 'twitter:description', content: description }], ['meta', { name: 'twitter:image', content: twitterImage }], ['meta', { name: 'twitter:site', content: '@jschopplich' }], ['meta', { name: 'twitter:creator', content: '@jschopplich' }], ['meta', { name: 'twitter:card', content: 'summary_large_image' }], ], vite: { // @ts-expect-error – UnoCSS types are not compatible with Vite yet plugins: [UnoCSS(), llmstxt()], }, themeConfig: { logo: '/favicon.svg', nav: [ { text: 'Playground', link: '/playground', }, { text: 'Guide', activeMatch: '^/guide/', items: [ { text: 'Getting Started', link: '/guide/getting-started' }, { text: 'Format Overview', link: '/guide/format-overview' }, { text: 'Using TOON with LLMs', link: '/guide/llm-prompts' }, { text: 'Benchmarks', link: '/guide/benchmarks' }, ], }, { text: 'CLI', link: '/cli/', }, { text: 'Reference', activeMatch: '^/reference/', items: [ { text: 'API', link: '/reference/api' }, { text: 'Syntax Cheatsheet', link: '/reference/syntax-cheatsheet' }, { text: 'Specification', link: '/reference/spec' }, { text: 'Efficiency Formalization', link: '/reference/efficiency-formalization' }, ], }, { text: 'Ecosystem', activeMatch: '^/ecosystem/', items: [ { text: 'Tools & Playgrounds', link: '/ecosystem/tools-and-playgrounds' }, { text: 'Implementations', link: '/ecosystem/implementations' }, ], }, { text: `v${version}`, items: [ { text: 'Release Notes', link: releases, }, ], }, ], sidebar: { '/guide/': sidebarPrimary(), '/cli/': sidebarPrimary(), '/reference/': sidebarPrimary(), '/ecosystem/': sidebarPrimary(), }, socialLinks: [ { icon: 'github', link: github }, ], footer: { message: 'Released under the MIT License.', copyright: 'Copyright © 2025-PRESENT Johann Schopplich', }, search: { provider: 'local', }, }, markdown: { config(md) { md.use(copyOrDownloadAsMarkdownButtons) }, math: true, }, }) function sidebarPrimary(): DefaultTheme.SidebarItem[] { return [ { text: 'Guide', items: [ { text: 'Getting Started', link: '/guide/getting-started' }, { text: 'Format Overview', link: '/guide/format-overview' }, { text: 'Using TOON with LLMs', link: '/guide/llm-prompts' }, { text: 'Benchmarks', link: '/guide/benchmarks' }, ], }, { text: 'Tooling', items: [ { text: 'Playground', link: '/playground' }, { text: 'CLI Reference', link: '/cli/' }, ], }, { text: 'Ecosystem', items: [ { text: 'Tools & Playgrounds', link: '/ecosystem/tools-and-playgrounds' }, { text: 'Implementations', link: '/ecosystem/implementations' }, ], }, { text: 'Reference', items: [ { text: 'API (TypeScript)', link: '/reference/api' }, { text: 'Syntax Cheatsheet', link: '/reference/syntax-cheatsheet' }, { text: 'Specification', link: '/reference/spec' }, { text: 'Efficiency Formalization', link: '/reference/efficiency-formalization' }, ], }, ] } ================================================ FILE: docs/.vitepress/meta.ts ================================================ export { description, version } from '../../packages/toon/package.json' /* VitePress head */ export const name = 'TOON' export const ogUrl = 'https://toonformat.dev/' export const ogImage = `${ogUrl}og.png` export const twitterImage = `${ogUrl}twitter.png` /* GitHub and social links */ export const github = 'https://github.com/toon-format/toon' export const releases = 'https://github.com/toon-format/toon/releases' export const twitter = 'https://twitter.com/jschopplich' ================================================ FILE: docs/.vitepress/theme/components/PlaygroundLayout.vue ================================================ Playground Experiment with JSON to TOON encoding in real-time. {{ opt.label }} Off Safe loadPreset((e.target as HTMLSelectElement).value as keyof typeof PRESETS)"> Load example… Hikes (mixed structure) Orders (nested objects) Metrics (tabular data) Events (semi-uniform) {{ opt.label }} Too large to share {{ hasCopiedUrl ? 'Copied!' : 'Share' }} JSON Input {{ jsonTokens ?? '…' }} tokens {{ formattedJson.length }} chars TOON Output {{ tokenSavings.sign }}{{ tokenSavings.percent }}% {{ toonTokens ?? '…' }} tokens {{ toonOutput.length }} chars {{ toonOutput }} {{ error }} ================================================ FILE: docs/.vitepress/theme/components/VPInput.vue ================================================ {{ label }} ================================================ FILE: docs/.vitepress/theme/index.ts ================================================ import type { Theme } from 'vitepress' import CopyOrDownloadAsMarkdownButtons from 'vitepress-plugin-llms/vitepress-components/CopyOrDownloadAsMarkdownButtons.vue' import DefaultTheme from 'vitepress/theme' import PlaygroundLayout from './components/PlaygroundLayout.vue' import VPInput from './components/VPInput.vue' import './vars.css' import './overrides.css' import 'uno.css' const config: Theme = { extends: DefaultTheme, enhanceApp({ app }) { app.config.globalProperties.$spec = { version: '3.0', } app.component('CopyOrDownloadAsMarkdownButtons', CopyOrDownloadAsMarkdownButtons) app.component('PlaygroundLayout', PlaygroundLayout) app.component('VPInput', VPInput) }, } export default config ================================================ FILE: docs/.vitepress/theme/overrides.css ================================================ .dark [img-light] { display: none; } html:not(.dark) [img-dark] { display: none; } details summary { cursor: pointer; } .vp-doc [class*="language-"] code { color: var(--vp-c-text-1) } .VPHomeHero .image-src { max-width: 112px; max-height: 112px; } @media (min-width: 640px) { .VPHomeHero .image-src { max-width: 144px; max-height: 144px; } } @media (min-width:960px) { .VPHomeHero .image-src { max-width: 176px; max-height: 176px; } } ================================================ FILE: docs/.vitepress/theme/vars.css ================================================ /** * Colors Theme * -------------------------------------------------------------------------- */ :root { --vp-c-brand-1: #d97c06; --vp-c-brand-2: #C57105; --vp-c-brand-3: #B16505; --vp-nav-logo-height: 20px; } /** * Component: Home * -------------------------------------------------------------------------- */ :root { --vp-home-hero-name-color: transparent; --vp-home-hero-name-background: -webkit-linear-gradient( 120deg, #fde98a 15%, #d97c06 ); --vp-home-hero-image-background-image: linear-gradient( -45deg, #d97c0660 30%, #fde98a60 ); --vp-home-hero-image-filter: blur(30px); } @media (min-width: 640px) { :root { --vp-home-hero-image-filter: blur(56px); } } @media (min-width: 960px) { :root { --vp-home-hero-image-filter: blur(72px); } } ================================================ FILE: docs/cli/index.md ================================================ # Command Line Interface The `@toon-format/cli` package provides a command-line interface for encoding JSON to TOON and decoding TOON back to JSON. Use it to analyze token savings before integrating TOON into your application, or to process JSON data through TOON in shell pipelines using stdin/stdout with tools like curl and jq. The CLI supports token statistics, streaming for large datasets, and all encoding options available in the library. The CLI is built on top of the `@toon-format/toon` TypeScript implementation and adheres to the [latest specification](/reference/spec). ## Usage ### Without Installation Use `npx` to run the CLI without installing: ::: code-group ```bash [Encode] npx @toon-format/cli input.json -o output.toon ``` ```bash [Decode] npx @toon-format/cli data.toon -o output.json ``` ```bash [Stdin] echo '{"name": "Ada"}' | npx @toon-format/cli ``` ::: ### Global Installation Or install globally for repeated use: ::: code-group ```bash [npm] npm install -g @toon-format/cli ``` ```bash [pnpm] pnpm add -g @toon-format/cli ``` ```bash [yarn] yarn global add @toon-format/cli ``` ::: After global installation, use the `toon` command: ```bash toon input.json -o output.toon ``` ## Basic Usage ### Auto-Detection The CLI automatically detects the operation based on file extension: - `.json` files → encode (JSON to TOON) - `.toon` files → decode (TOON to JSON) When reading from stdin, use `--encode` or `--decode` flags to specify the operation (defaults to encode). ::: code-group ```bash [Encode JSON to TOON] toon input.json -o output.toon ``` ```bash [Decode TOON to JSON] toon data.toon -o output.json ``` ```bash [Output to stdout] toon input.json ``` ```bash [Pipe from stdin] cat data.json | toon echo '{"name": "Ada"}' | toon ``` ```bash [Decode from stdin] cat data.toon | toon --decode ``` ::: By convention, TOON files use the `.toon` extension and the provisional media type `text/toon` (see [spec §18.2](https://github.com/toon-format/spec/blob/main/SPEC.md#182-provisional-media-type)). ### Standard Input Omit the input argument or use `-` to read from stdin. This enables piping data directly from other commands: ```bash # No argument needed cat data.json | toon # Explicit stdin with hyphen (equivalent) cat data.json | toon - # Decode from stdin cat data.toon | toon --decode ``` ## Performance ### Streaming Output Both encoding and decoding operations use streaming output, writing incrementally without building the full output string in memory. This makes the CLI efficient for large datasets without requiring additional configuration. **JSON → TOON (Encode)**: - Streams TOON lines to output. - No full TOON string in memory. **TOON → JSON (Decode)**: - Uses the same event-based streaming decoder as the `decodeStream` API in `@toon-format/toon`. - Streams JSON tokens to output. - No full JSON string in memory. - When `--expandPaths safe` is enabled, falls back to non-streaming decode internally to apply deep-merge expansion before writing JSON. Process large files with minimal memory usage: ```bash # Encode large JSON file toon huge-dataset.json -o output.toon # Decode large TOON file toon huge-dataset.toon -o output.json # Process millions of records efficiently via stdin cat million-records.json | toon > output.toon cat million-records.toon | toon --decode > output.json ``` Peak memory usage scales with data depth, not total size. This allows processing arbitrarily large files as long as individual nested structures fit in memory. ::: info Token Statistics When using the `--stats` flag with encode, the CLI builds the full TOON string once to compute accurate token counts. For maximum memory efficiency on very large files, omit `--stats`. ::: ## Options | Option | Description | | ------ | ----------- | | `-o, --output ` | Output file path (prints to stdout if omitted) | | `-e, --encode` | Force encode mode (overrides auto-detection) | | `-d, --decode` | Force decode mode (overrides auto-detection) | | `--delimiter ` | Array delimiter: `,` (comma), `\t` (tab), `\|` (pipe) | | `--indent ` | Indentation size (default: `2`) | | `--stats` | Show token count estimates and savings (encode only) | | `--no-strict` | Disable strict validation when decoding | | `--keyFolding ` | Key folding mode: `off`, `safe` (default: `off`) | | `--flattenDepth ` | Maximum segments to fold (default: `Infinity`) – requires `--keyFolding safe` | | `--expandPaths ` | Path expansion mode: `off`, `safe` (default: `off`) | ## Advanced Examples ### Token Statistics Show token savings when encoding: ```bash toon data.json --stats -o output.toon ``` This helps you estimate token cost savings before sending data to LLMs. Example output: ``` ✔ Encoded data.json → output.toon ℹ Token estimates: ~15,145 (JSON) → ~8,745 (TOON) ✔ Saved ~6,400 tokens (-42.3%) ``` ### Alternative Delimiters TOON supports three delimiters: comma (default), tab, and pipe. Alternative delimiters can provide additional token savings in specific contexts. ::: code-group ```bash [Tab-separated] toon data.json --delimiter "\t" -o output.toon ``` ```bash [Pipe-separated] toon data.json --delimiter "|" -o output.toon ``` ::: **Tab delimiter example:** ::: code-group ```yaml [Tab] items[2 ]{id name qty price}: A1 Widget 2 9.99 B2 Gadget 1 14.5 ``` ```yaml [Comma (default)] items[2]{id,name,qty,price}: A1,Widget,2,9.99 B2,Gadget,1,14.5 ``` ::: > [!TIP] > Tab delimiters often tokenize more efficiently than commas and reduce the need for quote-escaping. Use `--delimiter "\t"` for maximum token savings on large tabular data. ### Lenient Decoding Skip validation for faster processing: ```bash toon data.toon --no-strict -o output.json ``` Lenient mode (`--no-strict`) disables strict validation checks like array count matching, indentation multiples, and delimiter consistency. Use this when you trust the input and want faster decoding. ### Stdin Workflows The CLI integrates seamlessly with Unix pipes and other command-line tools: ```bash # Convert API response to TOON curl https://api.example.com/data | toon --stats # Process large dataset cat large-dataset.json | toon --delimiter "\t" > output.toon # Chain with jq jq '.results' data.json | toon > filtered.toon ``` ### Key Folding Collapse nested wrapper chains to reduce tokens (since spec v1.5): ::: code-group ```bash [Basic key folding] toon input.json --keyFolding safe -o output.toon ``` ```bash [Limit folding depth] toon input.json --keyFolding safe --flattenDepth 2 -o output.toon ``` ::: **Example:** For data like: ```json { "data": { "metadata": { "items": ["a", "b"] } } } ``` With `--keyFolding safe`, output becomes: ```yaml data.metadata.items[2]: a,b ``` Instead of: ```yaml data: metadata: items[2]: a,b ``` ### Path Expansion Reconstruct nested structure from folded keys when decoding: ```bash toon data.toon --expandPaths safe -o output.json ``` This pairs with `--keyFolding safe` for lossless round-trips. ### Round-Trip Workflow ```bash # Encode with folding toon input.json --keyFolding safe -o compressed.toon # Decode with expansion (restores original structure) toon compressed.toon --expandPaths safe -o output.json # Verify round-trip diff input.json output.json ``` ### Combined Options Combine multiple options for maximum efficiency: ```bash # Key folding + tab delimiter + stats toon data.json --keyFolding safe --delimiter "\t" --stats -o output.toon ``` ================================================ FILE: docs/ecosystem/implementations.md ================================================ # Implementations TOON has official and community implementations across multiple programming languages. All implementations are intended to conform to the same [Specification](https://github.com/toon-format/spec) to ensure compatibility and interoperability. The code examples throughout this documentation site use the TypeScript implementation by default, but the format and concepts apply equally to all languages. > [!NOTE] > When implementing TOON in other languages, please follow the [spec](https://github.com/toon-format/spec/blob/main/SPEC.md) to ensure compatibility across implementations. The [conformance tests](https://github.com/toon-format/spec/tree/main/tests) provide language-agnostic test fixtures that validate your implementation. ## Official Implementations These implementations are actively being developed by dedicated teams. Contributions are welcome! Join the effort by opening issues, submitting PRs, or discussing implementation details in the respective repositories. | Language | Repository | Status | Maintainer | |----------|------------|--------|------------| | **.NET** | [toon-dotnet](https://github.com/toon-format/toon-dotnet) | In Development | Official Team | | **Dart** | [toon-dart](https://github.com/toon-format/toon-dart) | In Development | Official Team | | **Go** | [toon-go](https://github.com/toon-format/toon-go) | In Development | Official Team | | **Java** | [toon-java](https://github.com/toon-format/toon-java) | ✅ Stable | Official Team | | **Julia** | [ToonFormat.jl](https://github.com/toon-format/ToonFormat.jl) | ✅ Stable | Official Team | | **Python** | [toon-python](https://github.com/toon-format/toon-python) | ✅ Stable | Official Team | | **Rust** | [toon-rust](https://github.com/toon-format/toon-rust) | ✅ Stable | Official Team | | **Swift** | [toon-swift](https://github.com/toon-format/toon-swift) | ✅ Stable | Official Team | | **TypeScript/JavaScript** | [toon](https://github.com/toon-format/toon/tree/main/packages/toon) | ✅ Stable | Official Team | ## Community Implementations Community members have created implementations in additional languages: | Language | Repository | Maintainer | |----------|------------|------------| | **Apex** | [ApexToon](https://github.com/Eacaw/ApexToon) | [@Eacaw](https://github.com/Eacaw) | | **C** | [TOONc](https://github.com/UsboKirishima/TOONc) | [@UsboKirishima](https://github.com/UsboKirishima) | | **C++** | [ctoon](https://github.com/mohammadraziei/ctoon) | [@mohammadraziei](https://github.com/mohammadraziei) | | **C#** | [ToonEncoder](https://github.com/Cysharp/ToonEncoder) | [@Cysharp](https://github.com/Cysharp/ToonEncoder) | | **Clojure** | [toon](https://github.com/vadelabs/toon) | [@vadelabs](https://github.com/vadelabs) | | **Crystal** | [toon-crystal](https://github.com/mamantoha/toon-crystal) | [@mamantoha](https://github.com/mamantoha) | | **Elixir** | [toon_ex](https://github.com/kentaro/toon_ex) | [@kentaro](https://github.com/kentaro) | | **Gleam** | [toon_codec](https://github.com/axelbellec/toon_codec) | [@axelbellec](https://github.com/axelbellec) | | **Go** | [gotoon](https://github.com/alpkeskin/gotoon) | [@alpkeskin](https://github.com/alpkeskin) | | **Java** | [json-io](https://github.com/jdereg/json-io) | [@jdereg](https://github.com/jdereg) | | **Kotlin** | [ktoon](https://github.com/lukelast/ktoon)| [@lukelast](https://github.com/lukelast) | | **Laravel Framework** | [laravel-toon](https://github.com/mischasigtermans/laravel-toon) | [@mischasigtermans](https://github.com/mischasigtermans) | | **Lua/Neovim** | [toon.nvim](https://github.com/thalesgelinger/toon.nvim) | [@thalesgelinger](https://github.com/thalesgelinger) | | **OCaml** | [ocaml-toon](https://github.com/davesnx/ocaml-toon) | [@davesnx](https://github.com/davesnx) | | **Perl** | [Data::TOON](https://github.com/ytnobody/p5-Data-TOON) | [@ytnobody](https://github.com/ytnobody) | | **PHP** | [toon-php](https://github.com/HelgeSverre/toon-php) | [@HelgeSverre](https://github.com/HelgeSverre) | | **R** | [toon](https://github.com/laresbernardo/toon) | [@laresbernardo](https://github.com/laresbernardo) | | **Ruby** | [toon-ruby](https://github.com/andrepcg/toon-ruby) | [@andrepcg](https://github.com/andrepcg) | | **Scala** | [toon4s](https://github.com/vim89/toon4s) | [@vim89](https://github.com/vim89) | | **Python** (Rust backend) | [toons](https://github.com/alesanfra/toons) | [@alesanfra](https://github.com/alesanfra) | ## Contributing an Implementation Building a TOON implementation for a new language? Great! Here are some steps to get started: 1. **Follow the spec**: Implement the [latest specification](https://github.com/toon-format/spec/blob/main/SPEC.md). 2. **Add tests**: Run the [reference test suite](https://github.com/toon-format/spec/tree/main/tests). 3. **Document usage**: Provide clear README with installation and usage examples. 4. **Share it**: Open a PR to add your implementation to the README at [github.com/toon-format/toon](https://github.com/toon-format/toon). ================================================ FILE: docs/ecosystem/tools-and-playgrounds.md ================================================ # Tools & Playgrounds Experiment with TOON format interactively using these tools for token comparison, format conversion, and validation. ## Playgrounds ### Official Playground The [TOON Playground](/playground) lets you convert JSON to TOON in real-time, compare token counts, and share your experiments via URL. ### Community Playgrounds - [Format Tokenization Playground](https://www.curiouslychase.com/playground/format-tokenization-exploration) - [TOON Tools](https://toontools.vercel.app/) ## CLI Tool The official TOON CLI provides command-line conversion, token statistics, and all encoding/decoding features. See the [CLI reference](/cli/) for full documentation. ```bash npx @toon-format/cli input.json --stats -o output.toon ``` ## Editor Support ### VS Code [TOON Language Support](https://marketplace.visualstudio.com/items?itemName=vishalraut.vscode-toon) - Syntax highlighting, validation, conversion, and token analysis. Install from the [VS Code Marketplace](https://marketplace.visualstudio.com/items?itemName=vishalraut.vscode-toon) or via command line: ```bash code --install-extension vishalraut.vscode-toon ``` ### Tree-sitter Grammar [tree-sitter-toon](https://github.com/3swordman/tree-sitter-toon) - Grammar for Tree-sitter-compatible editors (Neovim, Helix, Emacs, Zed). ### Neovim [toon.nvim](https://github.com/thalesgelinger/toon.nvim) - Lua-based plugin for Neovim. ### Other Editors Use YAML syntax highlighting as a close approximation. Most editors allow associating `.toon` files with YAML language mode. ## Databases ### ToonStore [ToonStore](https://github.com/Kalama-Tech/toonstoredb) - Redis-compatible embedded database (Rust) that stores data in TOON format. ## ORMs ### TORM [TORM](https://github.com/Kalama-Tech/torm) - ORM that works with the ToonStore database, with SDKs for Node.js, Python, Go, and PHP. ## Web APIs If you're building web applications that work with TOON, you can use the TypeScript library in the browser: ```ts import { decode, encode } from '@toon-format/toon' // Works in browsers, Node.js, Deno, and Bun const toon = encode(data) const data = decode(toon) ``` See the [API Reference](/reference/api) for details. ================================================ FILE: docs/guide/benchmarks.md ================================================ # Benchmarks The benchmarks on this page measure TOON's performance across two key dimensions: - **Retrieval Accuracy**: How well LLMs understand and extract information from different input formats. - **Token Efficiency**: How many tokens each format requires to represent the same data. Benchmarks are organized into two tracks to ensure fair comparisons: - **Mixed-Structure Track**: Datasets with nested or semi-uniform structures (TOON vs JSON, YAML, XML). CSV excluded as it cannot properly represent these structures. - **Flat-Only Track**: Datasets with flat tabular structures where CSV is applicable (CSV vs TOON vs JSON, YAML, XML). ## Retrieval Accuracy Benchmarks test LLM comprehension across different input formats using 209 data retrieval questions on 4 models. Show Dataset Catalog #### Dataset Catalog | Dataset | Rows | Structure | CSV Support | Eligibility | | ------- | ---- | --------- | ----------- | ----------- | | Uniform employee records | 100 | uniform | ✓ | 100% | | E-commerce orders with nested structures | 50 | nested | ✗ | 33% | | Time-series analytics data | 60 | uniform | ✓ | 100% | | Top 100 GitHub repositories | 100 | uniform | ✓ | 100% | | Semi-uniform event logs | 75 | semi-uniform | ✗ | 50% | | Deeply nested configuration | 11 | deep | ✗ | 0% | | Valid complete dataset (control) | 20 | uniform | ✓ | 100% | | Array truncated: 3 rows removed from end | 17 | uniform | ✓ | 100% | | Extra rows added beyond declared length | 23 | uniform | ✓ | 100% | | Inconsistent field count (missing salary in row 10) | 20 | uniform | ✓ | 100% | | Missing required fields (no email in multiple rows) | 20 | uniform | ✓ | 100% | **Structure classes:** - **uniform**: All objects have identical fields with primitive values - **semi-uniform**: Mix of uniform and non-uniform structures - **nested**: Objects with nested structures (nested objects or arrays) - **deep**: Highly nested with minimal tabular eligibility **CSV Support:** ✓ (supported), ✗ (not supported – would require lossy flattening) **Eligibility:** Percentage of arrays that qualify for TOON's tabular format (uniform objects with primitive values) #### Efficiency Ranking (Accuracy per 1K Tokens) Each format ranked by efficiency (accuracy percentage per 1,000 tokens): ``` TOON ████████████████████ 27.7 acc%/1K tok │ 76.4% acc │ 2,759 tokens JSON compact █████████████████░░░ 23.7 acc%/1K tok │ 73.7% acc │ 3,104 tokens YAML ██████████████░░░░░░ 19.9 acc%/1K tok │ 74.5% acc │ 3,749 tokens JSON ████████████░░░░░░░░ 16.4 acc%/1K tok │ 75.0% acc │ 4,587 tokens XML ██████████░░░░░░░░░░ 13.8 acc%/1K tok │ 72.1% acc │ 5,221 tokens ``` *Efficiency score = (Accuracy % ÷ Tokens) × 1,000. Higher is better.* > [!TIP] > TOON achieves **76.4%** accuracy (vs JSON's 75.0%) while using **39.9% fewer tokens**. **Note on CSV:** Excluded from ranking as it only supports 109 of 209 questions (flat tabular data only). While CSV is highly token-efficient for simple tabular data, it cannot represent nested structures that other formats handle. #### Per-Model Accuracy Accuracy across 4 LLMs on 209 data retrieval questions: ``` claude-haiku-4-5-20251001 → TOON ████████████░░░░░░░░ 59.8% (125/209) JSON ███████████░░░░░░░░░ 57.4% (120/209) YAML ███████████░░░░░░░░░ 56.0% (117/209) XML ███████████░░░░░░░░░ 55.5% (116/209) JSON compact ███████████░░░░░░░░░ 55.0% (115/209) CSV ██████████░░░░░░░░░░ 50.5% (55/109) gemini-3-flash-preview XML ████████████████████ 98.1% (205/209) JSON ███████████████████░ 97.1% (203/209) YAML ███████████████████░ 97.1% (203/209) → TOON ███████████████████░ 96.7% (202/209) JSON compact ███████████████████░ 96.7% (202/209) CSV ███████████████████░ 96.3% (105/109) gpt-5-nano → TOON ██████████████████░░ 90.9% (190/209) JSON compact ██████████████████░░ 90.9% (190/209) JSON ██████████████████░░ 89.0% (186/209) CSV ██████████████████░░ 89.0% (97/109) YAML █████████████████░░░ 87.1% (182/209) XML ████████████████░░░░ 80.9% (169/209) grok-4-1-fast-non-reasoning → TOON ████████████░░░░░░░░ 58.4% (122/209) YAML ████████████░░░░░░░░ 57.9% (121/209) JSON ███████████░░░░░░░░░ 56.5% (118/209) XML ███████████░░░░░░░░░ 54.1% (113/209) JSON compact ██████████░░░░░░░░░░ 52.2% (109/209) CSV ██████████░░░░░░░░░░ 51.4% (56/109) ``` > [!TIP] > TOON achieves **76.4% accuracy** (vs JSON's 75.0%) while using **39.9% fewer tokens** on these datasets. Performance by dataset, model, and question type #### Performance by Question Type | Question Type | TOON | JSON | YAML | JSON compact | XML | CSV | | ------------- | ---- | ---- | ---- | ---- | ---- | ---- | | Field Retrieval | 99.6% | 99.3% | 98.5% | 98.5% | 98.9% | 100.0% | | Aggregation | 61.9% | 61.9% | 59.9% | 58.3% | 54.4% | 50.9% | | Filtering | 56.8% | 53.1% | 56.3% | 55.2% | 51.6% | 50.9% | | Structure Awareness | 89.0% | 87.0% | 84.0% | 84.0% | 81.0% | 85.9% | | Structural Validation | 70.0% | 60.0% | 60.0% | 55.0% | 85.0% | 80.0% | #### Performance by Dataset ##### Uniform employee records | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 73.2% | 2,334 | 120/164 | | `toon` | 73.2% | 2,498 | 120/164 | | `json-compact` | 73.8% | 3,924 | 121/164 | | `yaml` | 73.8% | 4,959 | 121/164 | | `json-pretty` | 73.8% | 6,331 | 121/164 | | `xml` | 74.4% | 7,296 | 122/164 | ##### E-commerce orders with nested structures | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `toon` | 82.3% | 7,458 | 135/164 | | `json-compact` | 78.7% | 7,110 | 129/164 | | `yaml` | 79.9% | 8,755 | 131/164 | | `json-pretty` | 79.3% | 11,234 | 130/164 | | `xml` | 77.4% | 12,649 | 127/164 | ##### Time-series analytics data | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 75.0% | 1,411 | 90/120 | | `toon` | 78.3% | 1,553 | 94/120 | | `json-compact` | 74.2% | 2,354 | 89/120 | | `yaml` | 75.8% | 2,954 | 91/120 | | `json-pretty` | 75.0% | 3,681 | 90/120 | | `xml` | 72.5% | 4,389 | 87/120 | ##### Top 100 GitHub repositories | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 65.9% | 8,527 | 87/132 | | `toon` | 66.7% | 8,779 | 88/132 | | `yaml` | 65.2% | 13,141 | 86/132 | | `json-compact` | 59.8% | 11,464 | 79/132 | | `json-pretty` | 63.6% | 15,157 | 84/132 | | `xml` | 56.1% | 17,105 | 74/132 | ##### Semi-uniform event logs | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `json-compact` | 68.3% | 4,839 | 82/120 | | `toon` | 65.0% | 5,819 | 78/120 | | `json-pretty` | 69.2% | 6,817 | 83/120 | | `yaml` | 61.7% | 5,847 | 74/120 | | `xml` | 58.3% | 7,729 | 70/120 | ##### Deeply nested configuration | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `json-compact` | 90.5% | 568 | 105/116 | | `toon` | 94.8% | 655 | 110/116 | | `yaml` | 93.1% | 675 | 108/116 | | `json-pretty` | 92.2% | 924 | 107/116 | | `xml` | 91.4% | 1,013 | 106/116 | ##### Valid complete dataset (control) | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `toon` | 100.0% | 535 | 4/4 | | `json-compact` | 100.0% | 787 | 4/4 | | `yaml` | 100.0% | 992 | 4/4 | | `json-pretty` | 100.0% | 1,274 | 4/4 | | `xml` | 25.0% | 1,462 | 1/4 | | `csv` | 0.0% | 483 | 0/4 | ##### Array truncated: 3 rows removed from end | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 413 | 4/4 | | `xml` | 100.0% | 1,243 | 4/4 | | `toon` | 0.0% | 462 | 0/4 | | `json-pretty` | 0.0% | 1,085 | 0/4 | | `yaml` | 0.0% | 843 | 0/4 | | `json-compact` | 0.0% | 670 | 0/4 | ##### Extra rows added beyond declared length | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 550 | 4/4 | | `toon` | 75.0% | 605 | 3/4 | | `json-compact` | 75.0% | 901 | 3/4 | | `xml` | 100.0% | 1,678 | 4/4 | | `yaml` | 75.0% | 1,138 | 3/4 | | `json-pretty` | 50.0% | 1,460 | 2/4 | ##### Inconsistent field count (missing salary in row 10) | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 480 | 4/4 | | `json-compact` | 100.0% | 782 | 4/4 | | `yaml` | 100.0% | 985 | 4/4 | | `toon` | 100.0% | 1,008 | 4/4 | | `json-pretty` | 100.0% | 1,266 | 4/4 | | `xml` | 100.0% | 1,453 | 4/4 | ##### Missing required fields (no email in multiple rows) | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 340 | 4/4 | | `xml` | 100.0% | 1,409 | 4/4 | | `toon` | 75.0% | 974 | 3/4 | | `json-pretty` | 50.0% | 1,225 | 2/4 | | `yaml` | 25.0% | 951 | 1/4 | | `json-compact` | 0.0% | 750 | 0/4 | #### Performance by Model ##### claude-haiku-4-5-20251001 | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `toon` | 59.8% | 125/209 | | `json-pretty` | 57.4% | 120/209 | | `yaml` | 56.0% | 117/209 | | `xml` | 55.5% | 116/209 | | `json-compact` | 55.0% | 115/209 | | `csv` | 50.5% | 55/109 | ##### gemini-3-flash-preview | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `xml` | 98.1% | 205/209 | | `json-pretty` | 97.1% | 203/209 | | `yaml` | 97.1% | 203/209 | | `toon` | 96.7% | 202/209 | | `json-compact` | 96.7% | 202/209 | | `csv` | 96.3% | 105/109 | ##### gpt-5-nano | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `toon` | 90.9% | 190/209 | | `json-compact` | 90.9% | 190/209 | | `json-pretty` | 89.0% | 186/209 | | `csv` | 89.0% | 97/109 | | `yaml` | 87.1% | 182/209 | | `xml` | 80.9% | 169/209 | ##### grok-4-1-fast-non-reasoning | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `toon` | 58.4% | 122/209 | | `yaml` | 57.9% | 121/209 | | `json-pretty` | 56.5% | 118/209 | | `xml` | 54.1% | 113/209 | | `json-compact` | 52.2% | 109/209 | | `csv` | 51.4% | 56/109 | #### What's Being Measured This benchmark tests **LLM comprehension and data retrieval accuracy** across different input formats. Each LLM receives formatted data and must answer questions about it. This does **not** test the model's ability to generate TOON output – only to read and understand it. #### Datasets Tested Eleven datasets designed to test different structural patterns and validation capabilities: **Primary datasets:** 1. **Tabular** (100 employee records): Uniform objects with identical fields – optimal for TOON's tabular format. 2. **Nested** (50 e-commerce orders): Complex structures with nested customer objects and item arrays. 3. **Analytics** (60 days of metrics): Time-series data with dates and numeric values. 4. **GitHub** (100 repositories): Real-world data from top GitHub repos by stars. 5. **Event Logs** (75 logs): Semi-uniform data with ~50% flat logs and ~50% with nested error objects. 6. **Nested Config** (1 configuration): Deeply nested configuration with minimal tabular eligibility. **Structural validation datasets:** 7. **Control**: Valid complete dataset (baseline for validation) 8. **Truncated**: Array with 3 rows removed from end (tests `[N]` length detection) 9. **Extra rows**: Array with 3 additional rows beyond declared length 10. **Width mismatch**: Inconsistent field count (missing salary in row 10) 11. **Missing fields**: Systematic field omissions (no email in multiple rows) #### Question Types 209 questions are generated dynamically across five categories: - **Field retrieval (33%)**: Direct value lookups or values that can be read straight off a record (including booleans and simple counts such as array lengths) - Example: "What is Alice's salary?" → `75000` - Example: "How many items are in order ORD-0042?" → `3` - Example: "What is the customer name for order ORD-0042?" → `John Doe` - **Aggregation (30%)**: Dataset-level totals and averages plus single-condition filters (counts, sums, min/max comparisons) - Example: "How many employees work in Engineering?" → `17` - Example: "What is the total revenue across all orders?" → `45123.50` - Example: "How many employees have salary > 80000?" → `23` - **Filtering (23%)**: Multi-condition queries requiring compound logic (AND constraints across fields) - Example: "How many employees in Sales have salary > 80000?" → `5` - Example: "How many active employees have more than 10 years of experience?" → `8` - **Structure awareness (12%)**: Tests format-native structural affordances (TOON's `[N]` count and `{fields}`, CSV's header row) - Example: "How many employees are in the dataset?" → `100` - Example: "List the field names for employees" → `id, name, email, department, salary, yearsExperience, active` - Example: "What is the department of the last employee?" → `Sales` - **Structural validation (2%)**: Tests ability to detect incomplete, truncated, or corrupted data using structural metadata - Example: "Is this data complete and valid?" → `YES` (control dataset) or `NO` (corrupted datasets) - Tests TOON's `[N]` length validation and `{fields}` consistency checking - Demonstrates CSV's lack of structural validation capabilities #### Evaluation Process 1. **Format conversion**: Each dataset is converted to all 6 formats (TOON, JSON, YAML, JSON compact, XML, CSV). 2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer. 3. **Validate deterministically**: Answers are validated using type-aware comparison (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`) without requiring an LLM judge. #### Models & Configuration - **Models tested**: `claude-haiku-4-5-20251001`, `gemini-3-flash-preview`, `gpt-5-nano`, `grok-4-1-fast-non-reasoning` - **Token counting**: Using `gpt-tokenizer` with `o200k_base` encoding (GPT-5 tokenizer) - **Temperature**: Not set (models use their defaults) - **Total evaluations**: 209 questions × 6 formats × 4 models = 5,016 LLM calls ## Token Efficiency Token counts are measured using the GPT-5 `o200k_base` tokenizer via [`gpt-tokenizer`](https://github.com/niieani/gpt-tokenizer). Savings are calculated against formatted JSON (2-space indentation) as the primary baseline, with additional comparisons to compact JSON (minified), YAML, and XML. Actual savings vary by model and tokenizer. The benchmarks test datasets across different structural patterns (uniform, semi-uniform, nested, deeply nested) to show where TOON excels and where other formats may be better. #### Mixed-Structure Track Datasets with nested or semi-uniform structures. CSV excluded as it cannot properly represent these structures. ``` 🛒 E-commerce orders with nested structures ┊ Tabular: 33% │ TOON █████████████░░░░░░░ 73,126 tokens ├─ vs JSON (−33.3%) 109,599 tokens ├─ vs JSON compact (+5.3%) 69,459 tokens ├─ vs YAML (−14.4%) 85,415 tokens └─ vs XML (−40.7%) 123,344 tokens 🧾 Semi-uniform event logs ┊ Tabular: 50% │ TOON █████████████████░░░ 154,084 tokens ├─ vs JSON (−15.0%) 181,201 tokens ├─ vs JSON compact (+19.9%) 128,529 tokens ├─ vs YAML (−0.8%) 155,397 tokens └─ vs XML (−25.2%) 205,859 tokens 🧩 Deeply nested configuration ┊ Tabular: 0% │ TOON ██████████████░░░░░░ 620 tokens ├─ vs JSON (−31.9%) 911 tokens ├─ vs JSON compact (+11.1%) 558 tokens ├─ vs YAML (−6.3%) 662 tokens └─ vs XML (−38.2%) 1,003 tokens ──────────────────────────────────── Total ──────────────────────────────────── TOON ████████████████░░░░ 227,830 tokens ├─ vs JSON (−21.9%) 291,711 tokens ├─ vs JSON compact (+14.7%) 198,546 tokens ├─ vs YAML (−5.7%) 241,474 tokens └─ vs XML (−31.0%) 330,206 tokens ``` #### Flat-Only Track Datasets with flat tabular structures where CSV is applicable. ``` 👥 Uniform employee records ┊ Tabular: 100% │ CSV ███████████████████░ 47,102 tokens TOON ████████████████████ 49,919 tokens (+6.0% vs CSV) ├─ vs JSON (−60.7%) 127,063 tokens ├─ vs JSON compact (−36.9%) 79,059 tokens ├─ vs YAML (−50.1%) 100,011 tokens └─ vs XML (−65.9%) 146,579 tokens 📈 Time-series analytics data ┊ Tabular: 100% │ CSV ██████████████████░░ 8,383 tokens TOON ████████████████████ 9,115 tokens (+8.7% vs CSV) ├─ vs JSON (−59.0%) 22,245 tokens ├─ vs JSON compact (−35.9%) 14,211 tokens ├─ vs YAML (−49.0%) 17,858 tokens └─ vs XML (−65.8%) 26,616 tokens ⭐ Top 100 GitHub repositories ┊ Tabular: 100% │ CSV ███████████████████░ 8,512 tokens TOON ████████████████████ 8,744 tokens (+2.7% vs CSV) ├─ vs JSON (−42.3%) 15,144 tokens ├─ vs JSON compact (−23.7%) 11,454 tokens ├─ vs YAML (−33.4%) 13,128 tokens └─ vs XML (−48.9%) 17,095 tokens ──────────────────────────────────── Total ──────────────────────────────────── CSV ███████████████████░ 63,997 tokens TOON ████████████████████ 67,778 tokens (+5.9% vs CSV) ├─ vs JSON (−58.8%) 164,452 tokens ├─ vs JSON compact (−35.3%) 104,724 tokens ├─ vs YAML (−48.3%) 130,997 tokens └─ vs XML (−64.4%) 190,290 tokens ``` Show detailed examples #### 📈 Time-series analytics data **Savings:** 13,130 tokens (59.0% reduction vs JSON) **JSON** (22,245 tokens): ```json { "metrics": [ { "date": "2025-01-01", "views": 6138, "clicks": 174, "conversions": 12, "revenue": 2712.49, "bounceRate": 0.35 }, { "date": "2025-01-02", "views": 4616, "clicks": 274, "conversions": 34, "revenue": 9156.29, "bounceRate": 0.56 }, { "date": "2025-01-03", "views": 4460, "clicks": 143, "conversions": 8, "revenue": 1317.98, "bounceRate": 0.59 }, { "date": "2025-01-04", "views": 4740, "clicks": 125, "conversions": 13, "revenue": 2934.77, "bounceRate": 0.37 }, { "date": "2025-01-05", "views": 6428, "clicks": 369, "conversions": 19, "revenue": 1317.24, "bounceRate": 0.3 } ] } ``` **TOON** (9,115 tokens): ``` metrics[5]{date,views,clicks,conversions,revenue,bounceRate}: 2025-01-01,6138,174,12,2712.49,0.35 2025-01-02,4616,274,34,9156.29,0.56 2025-01-03,4460,143,8,1317.98,0.59 2025-01-04,4740,125,13,2934.77,0.37 2025-01-05,6428,369,19,1317.24,0.3 ``` --- #### ⭐ Top 100 GitHub repositories **Savings:** 6,400 tokens (42.3% reduction vs JSON) **JSON** (15,144 tokens): ```json { "repositories": [ { "id": 28457823, "name": "freeCodeCamp", "repo": "freeCodeCamp/freeCodeCamp", "description": "freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…", "createdAt": "2014-12-24T17:49:19Z", "updatedAt": "2025-10-28T11:58:08Z", "pushedAt": "2025-10-28T10:17:16Z", "stars": 430886, "watchers": 8583, "forks": 42146, "defaultBranch": "main" }, { "id": 132750724, "name": "build-your-own-x", "repo": "codecrafters-io/build-your-own-x", "description": "Master programming by recreating your favorite technologies from scratch.", "createdAt": "2018-05-09T12:03:18Z", "updatedAt": "2025-10-28T12:37:11Z", "pushedAt": "2025-10-10T18:45:01Z", "stars": 430877, "watchers": 6332, "forks": 40453, "defaultBranch": "master" }, { "id": 21737465, "name": "awesome", "repo": "sindresorhus/awesome", "description": "😎 Awesome lists about all kinds of interesting topics", "createdAt": "2014-07-11T13:42:37Z", "updatedAt": "2025-10-28T12:40:21Z", "pushedAt": "2025-10-27T17:57:31Z", "stars": 410052, "watchers": 8017, "forks": 32029, "defaultBranch": "main" } ] } ``` **TOON** (8,744 tokens): ``` repositories[3]{id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch}: 28457823,freeCodeCamp,freeCodeCamp/freeCodeCamp,"freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…","2014-12-24T17:49:19Z","2025-10-28T11:58:08Z","2025-10-28T10:17:16Z",430886,8583,42146,main 132750724,build-your-own-x,codecrafters-io/build-your-own-x,Master programming by recreating your favorite technologies from scratch.,"2018-05-09T12:03:18Z","2025-10-28T12:37:11Z","2025-10-10T18:45:01Z",430877,6332,40453,master 21737465,awesome,sindresorhus/awesome,😎 Awesome lists about all kinds of interesting topics,"2014-07-11T13:42:37Z","2025-10-28T12:40:21Z","2025-10-27T17:57:31Z",410052,8017,32029,main ``` ## Related Resources - [Formal Byte-Level Model](/reference/efficiency-formalization) – Mathematical analysis of byte efficiency compared to JSON - [Specification](/reference/spec) – Formal TOON specification ================================================ FILE: docs/guide/format-overview.md ================================================ # Format Overview TOON syntax reference with concrete examples. See [Getting Started](/guide/getting-started) for introduction. ## Data Model TOON models data the same way as JSON: - **Primitives**: strings, numbers, booleans, and `null` - **Objects**: mappings from string keys to values - **Arrays**: ordered sequences of values ### Root Forms A TOON document can represent different root forms: - **Root object** (most common): Fields appear at depth 0 with no parent key - **Root array**: Begins with `[N]:` or `[N]{fields}:` at depth 0 - **Root primitive**: A single primitive value (string, number, boolean, or null) Most examples in these docs use root objects, but the format supports all three forms equally ([spec §5](https://github.com/toon-format/spec/blob/main/SPEC.md#5-concrete-syntax-and-root-form)). ## Objects ### Simple Objects Objects with primitive values use `key: value` syntax, with one field per line: ```yaml id: 123 name: Ada active: true ``` Indentation replaces braces. One space follows the colon. ### Nested Objects Nested objects add one indentation level (default: 2 spaces): ```yaml user: id: 123 name: Ada ``` When a key ends with `:` and has no value on the same line, it opens a nested object. All lines at the next indentation level belong to that object. ### Empty Objects An empty object at the root yields an empty document (no lines). A nested empty object is `key:` alone, with no children. ## Arrays TOON detects array structure and chooses the most efficient representation. Arrays always declare their length in brackets: `[N]`. ### Primitive Arrays (Inline) Arrays of primitives (strings, numbers, booleans, null) are rendered inline: ```yaml tags[3]: admin,ops,dev ``` The delimiter (comma by default) separates values. Strings containing the active delimiter must be quoted. ### Arrays of Objects (Tabular) When all objects in an array share the same set of primitive-valued keys, TOON uses tabular format: ::: code-group ```yaml [Basic Tabular] items[2]{sku,qty,price}: A1,2,9.99 B2,1,14.5 ``` ```yaml [With Spaces in Values] users[2]{id,name,role}: 1,Alice Admin,admin 2,"Bob Smith",user ``` ::: The header `items[2]{sku,qty,price}:` declares: - **Array length**: `[2]` means 2 rows - **Field names**: `{sku,qty,price}` defines the columns - **Active delimiter**: comma (default) Each row contains values in the same order as the field list. Values are encoded as primitives (strings, numbers, booleans, null) and separated by the delimiter. > [!NOTE] > Tabular format requires identical field sets across all objects (same keys, order per object may vary) and primitive values only (no nested arrays/objects). ### Mixed and Non-Uniform Arrays Arrays that don't meet the tabular requirements use list format with hyphen markers: ```yaml items[3]: - 1 - a: 1 - text ``` Each element starts with `- ` at one indentation level deeper than the parent array header. ### Objects as List Items When an array element is an object, it appears as a list item: ```yaml items[2]: - id: 1 name: First - id: 2 name: Second extra: true ``` When a tabular array is the first field of a list-item object, the tabular header appears on the hyphen line, with rows indented two levels deeper and other fields indented one level deeper: ```yaml items[1]: - users[2]{id,name}: 1,Ada 2,Bob status: active ``` When the object has only a single tabular field, the same pattern applies: ```yaml items[1]: - users[2]{id,name}: 1,Ada 2,Bob ``` This is the canonical encoding for list-item objects whose first field is a tabular array. ### Arrays of Arrays When you have arrays containing primitive inner arrays: ```yaml pairs[2]: - [2]: 1,2 - [2]: 3,4 ``` Each inner array gets its own header on the list-item line. ### Empty Arrays Empty arrays have special representations: ```yaml items[0]: ``` The header declares length zero, with no elements following. ## Array Headers ### Header Syntax Array headers follow this pattern: ``` key[N]<{fields}>: ``` Where: - **N** is the non-negative integer length - **delimiter** (optional) explicitly declares the active delimiter: - Absent → comma (`,`) - `\t` (tab character) → tab delimiter - `|` → pipe delimiter - **fields** (optional) for tabular arrays: `{field1,field2,field3}` > [!TIP] > The array length `[N]` helps LLMs validate structure. If you ask a model to generate TOON output, explicit lengths let you detect truncation or malformed data. ### Delimiter Options TOON supports three delimiters: comma (default), tab, and pipe. The delimiter is scoped to the array header that declares it. ::: code-group ```yaml [Comma (default)] items[2]{sku,name,qty,price}: A1,Widget,2,9.99 B2,Gadget,1,14.5 ``` ```yaml [Tab] items[2 ]{sku name qty price}: A1 Widget 2 9.99 B2 Gadget 1 14.5 ``` ```yaml [Pipe] items[2|]{sku|name|qty|price}: A1|Widget|2|9.99 B2|Gadget|1|14.5 ``` ::: Tab and pipe delimiters are explicitly encoded in the header brackets and field braces. Commas don't require quoting when tab or pipe is active, and vice versa. > [!TIP] > Tab delimiters often tokenize more efficiently than commas, especially for data with few quoted strings. Use `encode(data, { delimiter: '\t' })` for additional token savings. ## Key Folding (Optional) Key folding is an optional encoder feature (since spec v1.5) that collapses chains of single-key objects into dotted paths, reducing tokens for deeply nested data. ### Basic Folding Standard nesting: ```yaml data: metadata: items[2]: a,b ``` With key folding (`keyFolding: 'safe'`): ```yaml data.metadata.items[2]: a,b ``` The three nested objects collapse into a single dotted key `data.metadata.items`. ### When Folding Applies A chain of objects is foldable when: - Each object in the chain has exactly one key (leading to the next object or a leaf value) - The leaf value is a primitive, array, or empty object - All segments are valid identifier segments (letters, digits, underscores only; no dots within segments) - The resulting folded key doesn't collide with existing keys ::: details Advanced Folding Rules **Segment Requirements (safe mode):** - All folded segments must match `^[A-Za-z_][A-Za-z0-9_]*$` (no dots, hyphens, or other special characters) - No segment may require quoting per §7.3 of the spec - The resulting folded key must not equal any existing sibling literal key at the same depth (collision avoidance) **Depth Limit:** - The `flattenDepth` option (default: `Infinity`) controls how many segments to fold - `flattenDepth: 2` folds only two-segment chains: `{a: {b: val}}` → `a.b: val` - Values less than 2 have no practical effect **Round-Trip with Path Expansion:** To reconstruct the original structure when decoding, use `expandPaths: 'safe'`. This splits dotted keys back into nested objects using the same safety rules ([spec §13.4](https://github.com/toon-format/spec/blob/main/SPEC.md#134-key-folding-and-path-expansion)). ::: ### Round-Trip with Path Expansion When decoding TOON that used key folding, enable path expansion to restore the nested structure: ```ts import { decode, encode } from '@toon-format/toon' const original = { data: { metadata: { items: ['a', 'b'] } } } // Encode with folding const toon = encode(original, { keyFolding: 'safe' }) // → "data.metadata.items[2]: a,b" // Decode with expansion const restored = decode(toon, { expandPaths: 'safe' }) // → { data: { metadata: { items: ['a', 'b'] } } } ``` Path expansion is off by default, so dotted keys are treated as literal keys unless explicitly enabled. ## Quoting and Types ### When Strings Need Quotes TOON quotes strings **only when necessary** to maximize token efficiency. A string must be quoted if: - It's empty (`""`) - It has leading or trailing whitespace - It equals `true`, `false`, or `null` (case-sensitive) - It looks like a number (e.g., `"42"`, `"-3.14"`, `"1e-6"`, or `"05"` with leading zeros) - It contains special characters: colon (`:`), quote (`"`), backslash (`\`), brackets, braces, or control characters (newline, tab, carriage return) - It contains the relevant delimiter (the active delimiter inside an array scope, or the document delimiter elsewhere) - It equals `"-"` or starts with `"-"` followed by any character Otherwise, strings can be unquoted. Unicode, emoji, and strings with internal (non-leading/trailing) spaces are safe unquoted: ```yaml message: Hello 世界 👋 note: This has inner spaces ``` ### Escape Sequences In quoted strings and keys, only five escape sequences are valid: | Character | Escape | |-----------|--------| | Backslash (`\`) | `\\` | | Double quote (`"`) | `\"` | | Newline (U+000A) | `\n` | | Carriage return (U+000D) | `\r` | | Tab (U+0009) | `\t` | All other escape sequences (e.g., `\x`, `\u`) are invalid and will cause an error in strict mode. ### Type Conversions Numbers are emitted in canonical decimal form (no exponent notation, no trailing zeros). Non-JSON types are normalized before encoding: | Input | Output | |-------|--------| | Finite number | Canonical decimal (e.g., `1e6` → `1000000`, `1.5000` → `1.5`, `-0` → `0`) | | `NaN`, `Infinity`, `-Infinity` | `null` | | `BigInt` (within safe range) | Number | | `BigInt` (out of range) | Quoted decimal string (e.g., `"9007199254740993"`) | | `Date` | ISO string in quotes (e.g., `"2025-01-01T00:00:00.000Z"`) | | `undefined`, `function`, `symbol` | `null` | Decoders accept both decimal and exponent forms on input (e.g., `42`, `-3.14`, `1e-6`), and treat tokens with forbidden leading zeros (e.g., `"05"`) as strings, not numbers. ### Custom Serialization with toJSON Objects with a `toJSON()` method are serialized by calling the method and normalizing its result before encoding, similar to `JSON.stringify`: ```ts const obj = { data: 'example', toJSON() { return { info: this.data } } } encode(obj) // info: example ``` The `toJSON()` method: - Takes precedence over built-in normalization (Date, Array, Set, Map) - Results are recursively normalized - Is called for objects with `toJSON` in their prototype chain --- For complete rules on quoting, escaping, type conversions, and strict-mode decoding, see [spec §2–4 (data model), §7 (strings and keys), and §14 (strict mode)](https://github.com/toon-format/spec/blob/main/SPEC.md). ================================================ FILE: docs/guide/getting-started.md ================================================ # Getting Started ## What is TOON? **Token-Oriented Object Notation** is a compact, human-readable encoding of the JSON data model that minimizes tokens and makes structure easy for models to follow. It's intended for *LLM input* as a drop-in, lossless representation of your existing JSON. TOON combines YAML's indentation-based structure for nested objects with a CSV-style tabular layout for uniform arrays. TOON's sweet spot is uniform arrays of objects (multiple fields per row, same structure across items), achieving CSV-like compactness while adding explicit structure that helps LLMs parse and validate data reliably. Think of it as a translation layer: use JSON programmatically, and encode it as TOON for LLM input. ### Why TOON? Standard JSON is verbose and token-expensive. For uniform arrays of objects, JSON repeats every field name for every record: ```json { "users": [ { "id": 1, "name": "Alice", "role": "admin" }, { "id": 2, "name": "Bob", "role": "user" } ] } ``` YAML already reduces some redundancy with indentation instead of braces: ```yaml users: - id: 1 name: Alice role: admin - id: 2 name: Bob role: user ``` TOON goes further by declaring fields once and streaming data as rows: ```yaml users[2]{id,name,role}: 1,Alice,admin 2,Bob,user ``` The `[2]` declares the array length, enabling LLMs to answer dataset size questions and detect truncation. The `{id,name,role}` declares the field names. Each row is then a compact, comma-separated list of values. This is the core pattern: declare structure once, stream data compactly. The format approaches CSV's efficiency while adding explicit structure. For a more realistic example, here's how TOON handles a dataset with both nested objects and tabular arrays: ::: code-group ```json [JSON (235 tokens)] { "context": { "task": "Our favorite hikes together", "location": "Boulder", "season": "spring_2025" }, "friends": ["ana", "luis", "sam"], "hikes": [ { "id": 1, "name": "Blue Lake Trail", "distanceKm": 7.5, "elevationGain": 320, "companion": "ana", "wasSunny": true }, { "id": 2, "name": "Ridge Overlook", "distanceKm": 9.2, "elevationGain": 540, "companion": "luis", "wasSunny": false }, { "id": 3, "name": "Wildflower Loop", "distanceKm": 5.1, "elevationGain": 180, "companion": "sam", "wasSunny": true } ] } ``` ```yaml [TOON (106 tokens)] context: task: Our favorite hikes together location: Boulder season: spring_2025 friends[3]: ana,luis,sam hikes[3]{id,name,distanceKm,elevationGain,companion,wasSunny}: 1,Blue Lake Trail,7.5,320,ana,true 2,Ridge Overlook,9.2,540,luis,false 3,Wildflower Loop,5.1,180,sam,true ``` ::: Notice how TOON combines YAML's indentation for the `context` object with inline format for the primitive `friends` array and tabular format for the structured `hikes` array. Each format is chosen automatically based on the data structure. ### Design Goals TOON is optimized for specific use cases. It aims to: - Make uniform arrays of objects as compact as possible by declaring structure once and streaming data. - Stay fully lossless and deterministic – round-trips preserve all data and structure. - Keep parsing simple and robust for both LLMs and humans through explicit structure markers. - Provide validation guardrails (array lengths, field counts) that help detect truncation and malformed output. ## When to Use TOON TOON excels with uniform arrays of objects – data with the same structure across items. For LLM prompts, the format produces deterministic, minimally quoted text with built-in validation. Explicit array lengths (`[N]`) and field headers (`{fields}`) help detect truncation and malformed data, while the tabular structure declares fields once rather than repeating them in every row. ::: tip The TOON format is stable, but also an idea in progress. Nothing's set in stone – help shape where it goes by contributing to the [spec](https://github.com/toon-format/spec) or sharing feedback. ::: ## When Not to Use TOON TOON is not always the best choice. Consider alternatives when: - **Deeply nested or non-uniform structures** (tabular eligibility ≈ 0%): JSON-compact often uses fewer tokens. Example: complex configuration objects with many nested levels. - **Semi-uniform arrays** (~40–60% tabular eligibility): Token savings diminish. Prefer JSON if your pipelines already rely on it. - **Pure tabular data**: CSV is smaller than TOON for flat tables. TOON adds minimal overhead (~5-10%) to provide structure (array length declarations, field headers, delimiter scoping) that improves LLM reliability. - **Latency-critical applications**: Benchmark on your exact setup. Some deployments (especially local/quantized models) may process compact JSON faster despite TOON's lower token count. > [!NOTE] > For data-driven comparisons across different structures, see [benchmarks](/guide/benchmarks). When optimizing for latency, measure TTFT, tokens/sec, and total time for both TOON and JSON-compact and use whichever performs better in your specific environment. ## Installation ### TypeScript Library Install the library via your preferred package manager: ::: code-group ```bash [npm] npm install @toon-format/toon ``` ```bash [pnpm] pnpm add @toon-format/toon ``` ```bash [yarn] yarn add @toon-format/toon ``` ::: ### CLI The CLI can be used without installation via `npx`, or installed globally: ::: code-group ```bash [npx (no install)] npx @toon-format/cli input.json -o output.toon ``` ```bash [npm] npm install -g @toon-format/cli ``` ```bash [pnpm] pnpm add -g @toon-format/cli ``` ```bash [yarn] yarn global add @toon-format/cli ``` ::: For full CLI documentation, see the [CLI reference](/cli/). ## Media Type & File Extension TOON files conventionally use the `.toon` extension. For HTTP transmission, the provisional media type is `text/toon`, always with UTF-8 encoding. While you may specify `charset=utf-8` explicitly, it's optional – UTF-8 is the default assumption. This follows the registration process outlined in [spec §18.2](https://github.com/toon-format/spec/blob/main/SPEC.md#182-provisional-media-type). ## Your First Example The examples below use the TypeScript library for demonstration, but the same operations work in any language with a TOON implementation. Let's encode a simple dataset with the TypeScript library: ```ts import { encode } from '@toon-format/toon' const data = { users: [ { id: 1, name: 'Alice', role: 'admin' }, { id: 2, name: 'Bob', role: 'user' } ] } console.log(encode(data)) ``` **Output:** ```yaml users[2]{id,name,role}: 1,Alice,admin 2,Bob,user ``` ### Decoding Back to JSON Decoding is just as simple: ```ts import { decode } from '@toon-format/toon' const toon = ` users[2]{id,name,role}: 1,Alice,admin 2,Bob,user ` const data = decode(toon) console.log(JSON.stringify(data, null, 2)) ``` **Output:** ```json { "users": [ { "id": 1, "name": "Alice", "role": "admin" }, { "id": 2, "name": "Bob", "role": "user" } ] } ``` Round-tripping is lossless: `decode(encode(x))` always equals `x` (after normalization of non-JSON types like `Date`, `NaN`, etc.). ## Where to Go Next Now that you've seen your first TOON document, read the [Format Overview](/guide/format-overview) for complete syntax details (objects, arrays, quoting rules, key folding), then explore [Using TOON with LLMs](/guide/llm-prompts) to see how to use it effectively in prompts. For implementation details, check the [API Reference](/reference/api) (TypeScript) or the [Specification](/reference/spec) (language-agnostic normative rules). ================================================ FILE: docs/guide/llm-prompts.md ================================================ # Using TOON with LLMs TOON is designed for passing structured data to Large Language Models with reduced token costs and improved reliability. This guide shows how to use TOON effectively in prompts, both for input (sending data to models) and output (getting models to generate TOON). This guide is about the TOON format itself. Code examples use the TypeScript library for demonstration, but the same patterns and techniques apply regardless of which programming language you're using. ## Why TOON for LLMs LLM tokens cost money, and JSON is verbose – repeating every field name for every record in an array. TOON minimizes tokens especially for uniform arrays by declaring fields once and streaming data as rows, typically saving 30-60% compared to formatted JSON. TOON adds structure guardrails: explicit `[N]` lengths and `{fields}` headers make it easier for models to track rows and for you to validate output. Strict mode helps detect truncation and malformed TOON when decoding model responses. ## Sending TOON as Input TOON works best when you show the format instead of describing it. The structure is self-documenting – models parse it naturally once they see the pattern. Wrap your encoded data in a fenced code block (label it ` ```toon` for clarity): ````md Data is in TOON format (2-space indent, arrays show length and fields). ```toon users[3]{id,name,role,lastLogin}: 1,Alice,admin,2025-01-15T10:30:00Z 2,Bob,user,2025-01-14T15:22:00Z 3,Charlie,user,2025-01-13T09:45:00Z ``` Task: Summarize the user roles and their last activity. ```` The indentation and headers are usually enough – models treat TOON like familiar YAML or CSV. The explicit array lengths (`[N]`) and field headers (`{fields}`) help the model track structure, especially for large tables. > [!NOTE] > Most models don't have built-in TOON syntax highlighting, so ` ```toon` or ` ```yaml` both work fine. The structure is what matters. ## Generating TOON from LLMs For output, be more explicit. When you want the model to **generate** TOON: - **Show the expected header** (e.g., `users[N]{id,name,role}:`). The model fills rows instead of repeating keys, reducing generation errors. - **State the rules**: 2-space indent, no trailing spaces, `[N]` matches row count. Here's a prompt that works for both reading and generating: ````md Data is in TOON format (2-space indent, arrays show length and fields). ```toon users[3]{id,name,role,lastLogin}: 1,Alice,admin,2025-01-15T10:30:00Z 2,Bob,user,2025-01-14T15:22:00Z 3,Charlie,user,2025-01-13T09:45:00Z ``` Task: Return only users with role "user" as TOON. Use the same header format. Set [N] to match the row count. Output only the code block. ```` **Expected output:** ```toon users[2]{id,name,role,lastLogin}: 2,Bob,user,2025-01-14T15:22:00Z 3,Charlie,user,2025-01-13T09:45:00Z ``` The model adjusts `[N]` to `2` and generates two rows. ### Validation with Strict Mode When decoding model-generated TOON, use strict mode (default) to catch errors: ```ts import { decode } from '@toon-format/toon' try { const data = decode(modelOutput, { strict: true }) // Success – data is valid } catch (error) { // Model output was malformed (count mismatch, invalid escapes, etc.) console.error('Validation failed:', error.message) } ``` Strict mode checks counts, indentation, and escaping so you can detect truncation or malformed TOON. For complete details, see the [API Reference](/reference/api#decode). ## Delimiter Choices for Token Efficiency Use `delimiter: '\t'` for tab-separated tables if you want even fewer tokens. Tabs are single characters, often tokenize more efficiently than commas, and rarely appear in natural text (reducing quote-escaping). ```ts const toon = encode(data, { delimiter: '\t' }) ``` Tell the model "fields are tab-separated" when using tabs. For more on delimiters, see the [Format Overview](/guide/format-overview#delimiter-options). ## Streaming Large Outputs When working with large datasets (thousands of records or deeply nested structures), use `encodeLines()` to stream TOON output line-by-line instead of building the full string in memory. ```ts import { encodeLines } from '@toon-format/toon' const largeData = await fetchThousandsOfRecords() // Stream large dataset without loading full string in memory for (const line of encodeLines(largeData, { delimiter: '\t' })) { process.stdout.write(`${line}\n`) } ``` The CLI also supports streaming for memory-efficient JSON-to-TOON conversion: ```bash toon large-dataset.json --output output.toon ``` This streaming approach prevents out-of-memory errors when preparing large context windows for LLMs. For complete details on `encodeLines()`, see the [API Reference](/reference/api#encodelines). **Consuming streaming LLM outputs:** If your LLM client exposes streaming text and you buffer by lines, you can decode TOON incrementally: ```ts import { decodeFromLines } from '@toon-format/toon' // Buffer streaming response into lines const lines: string[] = [] let buffer = '' for await (const chunk of modelStream) { buffer += chunk let index: number while ((index = buffer.indexOf('\n')) !== -1) { lines.push(buffer.slice(0, index)) buffer = buffer.slice(index + 1) } } // Decode buffered lines const data = decodeFromLines(lines) ``` For streaming decode APIs, see [`decodeFromLines()`](/reference/api#decodefromlines-lines-options) and [`decodeStream()`](/reference/api#decodestream-source-options). ## Tips and Pitfalls **Show, don't describe.** Don't explain TOON syntax in detail – just show an example. Models learn the pattern from context. A simple code block with 2-5 rows is more effective than paragraphs of explanation. **Keep examples small.** Use 2-5 rows in your examples, not hundreds. The model generalizes from the pattern. Large examples waste tokens without improving accuracy. **Always validate output.** Decode generated TOON with `strict: true` (default) to catch errors early. Don't assume model output is valid TOON without checking. ## Real-World Example Here's a complete workflow: send data to a model and validate its TOON response. **Prompt with TOON input:** ````md System logs in TOON format (tab-separated): ```toon events[4 ]{id level message timestamp}: 1 error Connection timeout 2025-01-15T10:00:00Z 2 warn Slow query 2025-01-15T10:05:00Z 3 info User login 2025-01-15T10:10:00Z 4 error Database error 2025-01-15T10:15:00Z ``` Task: Return only error-level events as TOON. Use the same format. ```` **Validate the response:** ```ts import { decode } from '@toon-format/toon' const modelResponse = ` events[2 ]{id level message timestamp}: 1 error Connection timeout 2025-01-15T10:00:00Z 4 error Database error 2025-01-15T10:15:00Z ` const filtered = decode(modelResponse, { strict: true }) // ✓ Validated – model correctly filtered and adjusted [N] to 2 ``` ================================================ FILE: docs/index.md ================================================ --- layout: home titleTemplate: Token-Oriented Object Notation hero: name: TOON text: Token-Oriented Object Notation tagline: A compact, human-readable encoding of the JSON data model for LLM prompts. image: dark: /logo-index-dark.svg light: /logo-index-light.svg alt: TOON Logo actions: - theme: brand text: What is TOON? link: /guide/getting-started - theme: alt text: Benchmarks link: /guide/benchmarks - theme: alt text: Playground link: /playground - theme: alt text: CLI link: /cli/ features: - title: Token-Efficient & Accurate icon: 📊 details: TOON reaches 74% accuracy (vs JSON's 70%) while using ~40% fewer tokens in mixed-structure benchmarks across 4 models. link: /guide/benchmarks - title: JSON Data Model icon: 🔁 details: Encodes the same objects, arrays, and primitives as JSON with deterministic, lossless round-trips. link: /guide/format-overview - title: LLM-Friendly Guardrails icon: 🛤️ details: Explicit [N] lengths and {fields} headers give models a clear schema to follow, improving parsing reliability. link: /guide/format-overview#arrays - title: Minimal Syntax icon: 📐 details: Uses indentation instead of braces and minimizes quoting, giving YAML-like readability with CSV-style compactness. link: /guide/format-overview#arrays - title: Tabular Arrays icon: 🧺 details: Uniform arrays of objects collapse into tables that declare fields once and stream row values line by line. link: /guide/format-overview#arrays - title: Multi-Language Ecosystem icon: 🌐 details: Spec-driven implementations in TypeScript, Python, Go, Rust, .NET, and other languages. link: /ecosystem/implementations --- ================================================ FILE: docs/package.json ================================================ { "name": "@toon-format/docs", "type": "module", "private": true, "scripts": { "dev": "vitepress dev", "build": "vitepress build", "preview": "vitepress preview" }, "devDependencies": { "@vueuse/core": "^14.2.1", "fflate": "^0.8.2", "gpt-tokenizer": "^3.4.0", "markdown-it-mathjax3": "^4.3.2", "uint8array-extras": "^1.5.0", "unocss": "^66.6.5", "vitepress": "^1.6.4", "vitepress-plugin-llms": "^1.11.0" } } ================================================ FILE: docs/playground.md ================================================ --- layout: PlaygroundLayout title: Playground --- ================================================ FILE: docs/reference/api.md ================================================ # API Reference TypeScript/JavaScript API documentation for the `@toon-format/toon` package. For format rules, see the [Format Overview](/guide/format-overview) or the [Specification](/reference/spec). For other languages, see [Implementations](/ecosystem/implementations). ## Installation ::: code-group ```bash [npm] npm install @toon-format/toon ``` ```bash [pnpm] pnpm add @toon-format/toon ``` ```bash [yarn] yarn add @toon-format/toon ``` ::: ## Encoding Functions ### `encode(input, options?)` Converts any JSON-serializable value to TOON format. ```ts import { encode } from '@toon-format/toon' const toon = encode(data, { indent: 2, delimiter: ',', keyFolding: 'off', flattenDepth: Infinity }) ``` #### Parameters | Parameter | Type | Description | |-----------|------|-------------| | `input` | `unknown` | Any JSON-serializable value (object, array, primitive, or nested structure) | | `options` | `EncodeOptions?` | Optional encoding options (see [Configuration Reference](#configuration-reference)) | #### Return Value Returns a TOON-formatted string with no trailing newline or spaces. #### Type Normalization Non-JSON-serializable values are normalized before encoding: | Input | Output | |-------|--------| | `Object` with `toJSON()` method | Result of calling `toJSON()`, recursively normalized | | Finite number | Canonical decimal (no exponent, no leading/trailing zeros: `1e6` → `1000000`, `-0` → `0`) | | `NaN`, `Infinity`, `-Infinity` | `null` | | `BigInt` (within safe range) | Number | | `BigInt` (out of range) | Quoted decimal string (e.g., `"9007199254740993"`) | | `Date` | ISO string in quotes (e.g., `"2025-01-01T00:00:00.000Z"`) | | `undefined`, `function`, `symbol` | `null` | #### Example ```ts import { encode } from '@toon-format/toon' const items = [ { sku: 'A1', qty: 2, price: 9.99 }, { sku: 'B2', qty: 1, price: 14.5 } ] console.log(encode({ items })) ``` **Output:** ```yaml items[2]{sku,qty,price}: A1,2,9.99 B2,1,14.5 ``` ### `encodeLines(input, options?)` **Preferred method for streaming TOON output.** Converts any JSON-serializable value to TOON format as a sequence of lines, without building the full string in memory. Suitable for streaming large outputs to files, HTTP responses, or process stdout. ```ts import { encodeLines } from '@toon-format/toon' // Stream to stdout (Node.js) for (const line of encodeLines(data)) { process.stdout.write(`${line}\n`) } // Write to file line-by-line const lines = encodeLines(data, { indent: 2, delimiter: '\t' }) for (const line of lines) { await writeToStream(`${line}\n`) } // Collect to array const lineArray = Array.from(encodeLines(data)) ``` #### Parameters | Parameter | Type | Description | |-----------|------|-------------| | `input` | `unknown` | Any JSON-serializable value (object, array, primitive, or nested structure) | | `options` | `EncodeOptions?` | Optional encoding options (see [Configuration Reference](#configuration-reference)) | #### Return Value Returns an `Iterable` that yields TOON lines one at a time. **Each yielded string is a single line without a trailing newline character** – you must add `\n` when writing to streams or stdout. ::: info Relationship to `encode()` `encode(value, options)` is equivalent to: ```ts Array.from(encodeLines(value, options)).join('\n') ``` ::: #### Example ```ts import { createWriteStream } from 'node:fs' import { encodeLines } from '@toon-format/toon' const data = { items: Array.from({ length: 100000 }, (_, i) => ({ id: i, name: `Item ${i}`, value: Math.random() })) } // Stream large dataset to file const stream = createWriteStream('output.toon') for (const line of encodeLines(data, { delimiter: '\t' })) { stream.write(`${line}\n`) } stream.end() ``` ### Replacer Function The `replacer` option allows you to transform or filter values during encoding. It works similarly to `JSON.stringify`'s replacer parameter, but with path tracking for more precise control. #### Type Signature ```typescript type EncodeReplacer = ( key: string, value: JsonValue, path: readonly (string | number)[] ) => unknown ``` #### Parameters | Parameter | Type | Description | |-----------|------|-------------| | `key` | `string` | Property name, array index (as string), or empty string for root | | `value` | `JsonValue` | The normalized value at this location | | `path` | `readonly (string \| number)[]` | Path from root to current value | #### Return Value - Return the value unchanged to keep it - Return a different value to replace it (will be normalized) - Return `undefined` to omit properties/array elements - For root value, `undefined` means "no change" (root cannot be omitted) #### Examples **Filtering sensitive data:** ```typescript import { encode } from '@toon-format/toon' const data = { user: { name: 'Alice', password: 'secret123', email: 'alice@example.com' } } function replacer(key, value) { if (key === 'password') return undefined return value } console.log(encode(data, { replacer })) ``` **Output:** ```yaml user: name: Alice email: alice@example.com ``` **Transforming values:** ```typescript const data = { user: 'alice', role: 'admin' } function replacer(key, value) { if (typeof value === 'string') return value.toUpperCase() return value } console.log(encode(data, { replacer })) ``` **Output:** ```yaml user: ALICE role: ADMIN ``` **Path-based transformations:** ```typescript const data = { metadata: { created: '2025-01-01' }, user: { created: '2025-01-02' } } function replacer(key, value, path) { // Add timezone info only to top-level metadata if (path.length === 1 && path[0] === 'metadata' && key === 'created') { return `${value}T00:00:00Z` } return value } console.log(encode(data, { replacer })) ``` **Output:** ```yaml metadata: created: 2025-01-01T00:00:00Z user: created: 2025-01-02 ``` ::: tip Replacer Execution Order The replacer is called in a depth-first manner: 1. Root value first (key = `''`, path = `[]`) 2. Then each property/element (with proper key and path) 3. Values are re-normalized after replacement 4. Children are processed after parent transformation ::: ::: warning Array Indices as Strings Following `JSON.stringify` behavior, array indices are passed as strings (`'0'`, `'1'`, `'2'`, etc.) to the replacer, not as numbers. ::: ## Decoding Functions ### `decode(input, options?)` Converts a TOON-formatted string back to JavaScript values. ```ts import { decode } from '@toon-format/toon' const data = decode(toon, { indent: 2, strict: true, expandPaths: 'off' }) ``` #### Parameters | Parameter | Type | Description | |-----------|------|-------------| | `input` | `string` | A TOON-formatted string to parse | | `options` | `DecodeOptions?` | Optional decoding options (see [Configuration Reference](#configuration-reference)) | #### Return Value Returns a JavaScript value (object, array, or primitive) representing the parsed TOON data. #### Example ```ts import { decode } from '@toon-format/toon' const toon = ` items[2]{sku,qty,price}: A1,2,9.99 B2,1,14.5 ` const data = decode(toon) console.log(data) ``` **Output:** ```json { "items": [ { "sku": "A1", "qty": 2, "price": 9.99 }, { "sku": "B2", "qty": 1, "price": 14.5 } ] } ``` ### `decodeFromLines(lines, options?)` Decodes TOON format from pre-split lines into a JavaScript value. This is a streaming-friendly wrapper around the event-based decoder that builds the full value in memory. Useful when you already have lines as an array or iterable (e.g., from file streams, readline interfaces, or network responses) and want the standard decode behavior with path expansion support. #### Parameters | Parameter | Type | Description | |-----------|------|-------------| | `lines` | `Iterable` | Iterable of TOON lines (without trailing newlines) | | `options` | `DecodeOptions?` | Optional decoding configuration (see [Configuration Reference](#configuration-reference)) | #### Return Value Returns a `JsonValue` (the parsed JavaScript value: object, array, or primitive). #### Example **Basic usage with arrays:** ```ts import { decodeFromLines } from '@toon-format/toon' const lines = ['name: Alice', 'age: 30'] const value = decodeFromLines(lines) // { name: 'Alice', age: 30 } ``` **Streaming from Node.js readline:** ```ts import { createReadStream } from 'node:fs' import { createInterface } from 'node:readline' import { decodeFromLines } from '@toon-format/toon' const rl = createInterface({ input: createReadStream('data.toon'), crlfDelay: Infinity, }) const value = decodeFromLines(rl) console.log(value) ``` **With path expansion:** ```ts const lines = ['user.name: Alice', 'user.age: 30'] const value = decodeFromLines(lines, { expandPaths: 'safe' }) // { user: { name: 'Alice', age: 30 } } ``` ### Choosing the Right Decoder | Function | Input | Output | Async | Path Expansion | Use When | |----------|-------|--------|-------|----------------|----------| | `decode()` | String | Value | No | Yes | You have a complete TOON string | | `decodeFromLines()` | Lines | Value | No | Yes | You have lines and want the full value | | `decodeStreamSync()` | Lines | Events | No | No | You need event-by-event processing (sync) | | `decodeStream()` | Lines | Events | Yes | No | You need event-by-event processing (async) | ::: info Key Differences - **Value vs. Events**: Functions ending in `Stream` yield events without building the full value in memory. - **Path expansion**: Only `decode()` and `decodeFromLines()` support `expandPaths: 'safe'`. - **Async support**: Only `decodeStream()` accepts async iterables (useful for file/network streams). ::: ## Streaming Decoders ### `decodeStreamSync(lines, options?)` Synchronously decodes TOON lines into a stream of JSON events. This function yields structured events that represent the JSON data model without building the full value tree. Useful for streaming processing, custom transformations, or memory-efficient parsing of large datasets where you don't need the full value in memory. ::: tip Event Streaming This is a low-level API that returns individual parse events. For most use cases, [`decodeFromLines()`](#decodefromlines-lines-options) or [`decode()`](#decode-input-options) are more convenient. Path expansion (`expandPaths: 'safe'`) is **not supported** in streaming mode since it requires the full value tree. ::: #### Parameters | Parameter | Type | Description | |-----------|------|-------------| | `lines` | `Iterable` | Iterable of TOON lines (without trailing newlines) | | `options` | `DecodeStreamOptions?` | Optional streaming decoding configuration (see [Configuration Reference](#configuration-reference)) | #### Return Value Returns an `Iterable` that yields structured events (see [TypeScript Types](#typescript-types) for event structure). #### Example **Basic event streaming:** ```ts import { decodeStreamSync } from '@toon-format/toon' const lines = ['name: Alice', 'age: 30'] for (const event of decodeStreamSync(lines)) { console.log(event) } // Output: // { type: 'startObject' } // { type: 'key', key: 'name' } // { type: 'primitive', value: 'Alice' } // { type: 'key', key: 'age' } // { type: 'primitive', value: 30 } // { type: 'endObject' } ``` **Custom processing:** ```ts import { decodeStreamSync } from '@toon-format/toon' const lines = ['users[2]{id,name}:', ' 1,Alice', ' 2,Bob'] let userCount = 0 for (const event of decodeStreamSync(lines)) { if (event.type === 'endObject' && userCount < 2) { userCount++ console.log(`Processed user ${userCount}`) } } ``` ### `decodeStream(source, options?)` Asynchronously decodes TOON lines into a stream of JSON events. This is the async version of [`decodeStreamSync()`](#decodestreamsync-lines-options), supporting both synchronous and asynchronous iterables. Useful for processing file streams, network responses, or other async sources where you want to handle data incrementally as it arrives. #### Parameters | Parameter | Type | Description | |-----------|------|-------------| | `source` | `AsyncIterable` \| `Iterable` | Async or sync iterable of TOON lines (without trailing newlines) | | `options` | `DecodeStreamOptions?` | Optional streaming decoding configuration (see [Configuration Reference](#configuration-reference)) | #### Return Value Returns an `AsyncIterable` that yields structured events asynchronously (see [TypeScript Types](#typescript-types) for event structure). #### Example **Streaming from file:** ```ts import { createReadStream } from 'node:fs' import { createInterface } from 'node:readline' import { decodeStream } from '@toon-format/toon' const fileStream = createReadStream('data.toon', 'utf-8') const rl = createInterface({ input: fileStream, crlfDelay: Infinity }) for await (const event of decodeStream(rl)) { console.log(event) // Process events as they arrive } ``` ## Configuration Reference ### `EncodeOptions` Configuration for [`encode()`](#encode-input-options) and [`encodeLines()`](#encodelines-input-options): | Option | Type | Default | Description | |--------|------|---------|-------------| | `indent` | `number` | `2` | Number of spaces per indentation level | | `delimiter` | `','` \| `'\t'` \| `'\|'` | `','` | Delimiter for array values and tabular rows | | `keyFolding` | `'off'` \| `'safe'` | `'off'` | Enable key folding to collapse single-key wrapper chains into dotted paths | | `flattenDepth` | `number` | `Infinity` | Maximum number of segments to fold when `keyFolding` is enabled (values 0-1 have no practical effect) | | `replacer` | `EncodeReplacer` | `undefined` | Optional hook to transform or omit values before encoding (see [Replacer Function](#replacer-function)) | **Delimiter options:** ::: code-group ```ts [Comma (default)] encode(data, { delimiter: ',' }) ``` ```ts [Tab] encode(data, { delimiter: '\t' }) ``` ```ts [Pipe] encode(data, { delimiter: '|' }) ``` ::: See [Delimiter Strategies](#delimiter-strategies) for guidance on choosing delimiters. ### `DecodeOptions` Configuration for [`decode()`](#decode-input-options) and [`decodeFromLines()`](#decodefromlines-lines-options): | Option | Type | Default | Description | |--------|------|---------|-------------| | `indent` | `number` | `2` | Expected number of spaces per indentation level | | `strict` | `boolean` | `true` | Enable strict validation (array counts, indentation, delimiter consistency) | | `expandPaths` | `'off'` \| `'safe'` | `'off'` | Enable path expansion to reconstruct dotted keys into nested objects (pairs with `keyFolding: 'safe'`) | By default (`strict: true`), the decoder validates input strictly: - **Invalid escape sequences**: Throws on `\x`, unterminated strings - **Syntax errors**: Throws on missing colons, malformed headers - **Array length mismatches**: Throws when declared length doesn't match actual count - **Delimiter mismatches**: Throws when row delimiters don't match header - **Indentation errors**: Throws when leading spaces aren't exact multiples of `indent` Set `strict: false` to skip validation for lenient parsing. See [Key Folding & Path Expansion](#key-folding-path-expansion) for more details on path expansion behavior and conflict resolution. ### `DecodeStreamOptions` Configuration for [`decodeStreamSync()`](#decodestreamsync-lines-options) and [`decodeStream()`](#decodestream-source-options): | Option | Type | Default | Description | |--------|------|---------|-------------| | `indent` | `number` | `2` | Expected number of spaces per indentation level | | `strict` | `boolean` | `true` | Enable strict validation (array counts, indentation, delimiter consistency) | ::: warning Path Expansion Not Supported Path expansion requires building the full value tree, which is incompatible with event streaming. Use [`decodeFromLines()`](#decodefromlines-lines-options) if you need path expansion. ::: ## TypeScript Types ### `JsonStreamEvent` Events emitted by [`decodeStreamSync()`](#decodestreamsync-lines-options) and [`decodeStream()`](#decodestream-source-options): ```ts type JsonStreamEvent = | { type: 'startObject' } | { type: 'endObject' } | { type: 'startArray', length: number } | { type: 'endArray' } | { type: 'key', key: string, wasQuoted?: boolean } | { type: 'primitive', value: JsonPrimitive } type JsonPrimitive = string | number | boolean | null ``` ## Guides & Examples ### Round-Trip Compatibility TOON provides lossless round-trips after normalization: ```ts import { decode, encode } from '@toon-format/toon' const original = { users: [ { id: 1, name: 'Alice', role: 'admin' }, { id: 2, name: 'Bob', role: 'user' } ] } const toon = encode(original) const restored = decode(toon) console.log(JSON.stringify(original) === JSON.stringify(restored)) // true ``` **With Key Folding:** ```ts import { decode, encode } from '@toon-format/toon' const original = { data: { metadata: { items: ['a', 'b'] } } } // Encode with folding const toon = encode(original, { keyFolding: 'safe' }) // → "data.metadata.items[2]: a,b" // Decode with expansion const restored = decode(toon, { expandPaths: 'safe' }) // → { data: { metadata: { items: ['a', 'b'] } } } console.log(JSON.stringify(original) === JSON.stringify(restored)) // true ``` ### Key Folding & Path Expansion **Key Folding** (`keyFolding: 'safe'`) collapses single-key wrapper chains during encoding: ```ts import { encode } from '@toon-format/toon' const data = { data: { metadata: { items: ['a', 'b'] } } } // Without folding encode(data) // data: // metadata: // items[2]: a,b // With folding encode(data, { keyFolding: 'safe' }) // data.metadata.items[2]: a,b ``` **Path Expansion** (`expandPaths: 'safe'`) reverses this during decoding: ```ts import { decode } from '@toon-format/toon' const toon = 'data.metadata.items[2]: a,b' const data = decode(toon, { expandPaths: 'safe' }) console.log(data) // { data: { metadata: { items: ['a', 'b'] } } } ``` **Expansion Conflict Resolution:** When multiple expanded keys construct overlapping paths, the decoder merges them recursively: - **Object + Object**: Deep merge recursively - **Object + Non-object** (array or primitive): Conflict - With `strict: true` (default): Error - With `strict: false`: Last-write-wins (LWW) ### Delimiter Strategies Tab delimiters (`\t`) often tokenize more efficiently than commas, as Tabs are single characters that rarely appear in natural text. This reduces the need for quote-escaping, leading to smaller token counts in large datasets. Example: ```yaml items[2 ]{sku name qty price}: A1 Widget 2 9.99 B2 Gadget 1 14.5 ``` For maximum token savings on large tabular data, combine tab delimiters with key folding: ```ts encode(data, { delimiter: '\t', keyFolding: 'safe' }) ``` **Choosing a Delimiter:** - **Comma (`,`)**: Default, widely understood, good for simple tabular data. - **Tab (`\t`)**: Best for LLM token efficiency, excellent for large datasets. - **Pipe (`|`)**: Alternative when commas appear frequently in data. ================================================ FILE: docs/reference/efficiency-formalization.md ================================================ # TOON vs JSON: Byte-Level Efficiency Model A mathematical analysis of TOON's byte efficiency compared to JSON across different data structures. ::: info Scope of This Document This page presents a theoretical, character-based comparison between TOON and JSON. For practical benchmarks and token counts, see [Benchmarks](/guide/benchmarks). It is an **advanced, non-normative** reference: it explains TOON's design from a mathematical angle but does not change the TOON specification. ::: ## Overview Large Language Models increasingly rely on structured data for inference and function calling. However, standard formats like JSON introduce significant verbosity that inflates token usage and inference costs. This analysis presents a formal mathematical comparison between TOON and JSON to evaluate whether TOON achieves quantifiable efficiency gains by eliminating structural redundancy. Under the assumptions described below (compact JSON, canonical TOON, ASCII keys and punctuation, shallow to moderate nesting, and mostly unquoted TOON strings), TOON's **structural overhead is lower than compact JSON** for the structure families analyzed here, except arrays of arrays. ### Key Findings - **Tabular arrays** represent TOON's optimal use case, with efficiency gains scaling linearly with both row count and field count. - **Simple objects and primitive arrays** show consistent byte reduction, with savings proportional to the number of fields or elements. - **Nested objects** benefit from reduced overhead, though efficiency decreases with depth due to indentation costs; at sufficient depth, compact JSON can become smaller. - **Arrays of arrays** are the only structure where TOON is less efficient than JSON in this analysis, due to TOON's explicit list markers and inner array headers. ## Methodology We define recursive byte-length functions $L_{\text{json}}$ and $L_{\text{toon}}$ for both formats, then derive the efficiency delta: $$ \Delta = L_{\text{json}}(\Omega) - L_{\text{toon}}(\Omega) $$ Where $\Omega$ represents the data structure under comparison. If $\Delta > 0$, TOON uses fewer bytes than JSON for that structure. ::: info Scope & Assumptions - **Compact JSON**: JSON is assumed to be compact (no spaces or newlines outside strings). Byte counts are computed on this compact form. - **Canonical TOON**: TOON is assumed to follow canonical formatting (indent = 2 spaces, exactly one space after `:`, no spaces after commas in arrays/field lists, no trailing spaces). - **Keys and strings**: All keys are "simple" ASCII identifier-style keys that: - must be quoted in JSON, and - can be left unquoted in TOON (no characters that would force quoting). Many examples assume values are numbers, booleans, null, or TOON-safe strings that can be unquoted in TOON but must be quoted in JSON. - **Numbers**: Both formats are assumed to use the same canonical decimal representation (no exponent notation), matching TOON's requirement. JSON could use exponent forms; we ignore that here to isolate structural differences. - **ASCII/UTF-8**: Keys and structural tokens are assumed ASCII, so byte length equals character count ($|x|_{\text{utf8}} = |x|_{\text{char}}$). Non-ASCII content affects both formats similarly and does not change the structural conclusions. - **Nesting depth**: Closed-form expressions are given for flat structures and a single level of nesting. Each additional nesting level in TOON adds 2 bytes of indentation per nested line. At sufficient depth, the braces of compact JSON can win over TOON's indentation (as seen in [When Not to Use TOON](/guide/getting-started#when-not-to-use-toon)). - **Byte vs token count**: Modern LLM tokenizers operate over UTF-8 bytes, so byte length is a good upper bound and first-order proxy for token count, even though the mapping is not exactly linear. ::: Think of this as a simplified structural model: we strip away real-world noise and ask, "if you only count structural characters, how do JSON and TOON compare?" ## Formal Notation ### Data Model Let $\omega$ be a primitive value such that $\omega \in \{\text{string, number, boolean, null}\}$. Let $\mathcal{O}$ be an object composed of $n$ key-value pairs: $$ \mathcal{O} = \{(k_1, v_1), (k_2, v_2), \dots, (k_n, v_n)\} $$ Let $\mathcal{A}$ be an array composed of $n$ elements: $$ \mathcal{A} = \{v_1, v_2, \dots, v_n\} $$ Where: - $k_i$ is a key (string) - $v_i$ can be a primitive value $\omega$, an object $\mathcal{O}$, or an array $\mathcal{A}$ Therefore: $v_i \in \{\omega, \mathcal{O}, \mathcal{A}\}$ ### String Length Let $\mathcal{S}$ be the set of valid Unicode strings. For any string $x \in \mathcal{S}$, we denote $|x|_{\text{utf8}}$ as the byte-length of $x$ under UTF-8 encoding. ### Integer Length Let $n \in \mathbb{Z}_{\ge 0}$ be a non-negative integer. The number of bytes required to represent $n$ in decimal format is: $$ L_{\text{num}}(n) = \begin{cases} 1 & \text{if } n = 0 \\ \lfloor \log_{10}(|n|) \rfloor + 1 & \text{if } n > 0 \end{cases} $$ ## JSON Size Functions For a flat object of $n$ keys: $$ L_{\text{json}}(\mathcal{O}) = \underbrace{2}_{\{\}} + \sum_{i=1}^{n} (L_{\text{str}}(k_i) + \underbrace{1}_{:} + L_{\text{json}}(v_i)) + \underbrace{(n-1)}_{\text{commas}} $$ Where $L_{\text{str}}(k)$ is the length of the key including its mandatory quotes: $$ L_{\text{str}}(k) = |k|_{\text{utf8}} + \underbrace{2}_{\text{quotes}} $$ ### Primitive Values in JSON When $v_i$ is a primitive data type $\omega$: | Type | Formula | |------|---------| | String | $L_{\text{str}}(v_i) = \lvert v_i\rvert_{\text{utf8}} + 2$ | | Number | $L_{\text{num}}(v_i) = \lvert v_i\rvert_{\text{utf8}}$ | | Boolean | $L_{\text{bool}}(v_i) = \lvert v_i\rvert_{\text{utf8}}$ | | Null | $L_{\text{null}}(v_i) = \lvert v_i\rvert_{\text{utf8}}$ | ### Arrays in JSON When $v_i$ is an array $\mathcal{A}$: $$ L_{\text{json}}(\mathcal{A}) = \underbrace{2}_{\text{[]}} + \sum_{i=1}^{n} L_{\text{json}}(v_i) + \underbrace{(n-1)}_{\text{commas}} $$ ## TOON Size Functions For a flat object of $n$ keys: $$ L_{\text{toon}}(\mathcal{O}) = \sum_{i=1}^{n} (L_{\text{str}}(k_i) + \underbrace{1}_{:} + \underbrace{1}_{\text{space}} + L_{\text{toon}}(v_i)) + \underbrace{(n-1)}_{\text{newlines}} $$ Where $L_{\text{str}}(k)$ is the length of the key (no quotes required for simple keys): $$ L_{\text{str}}(k) = |k|_{\text{utf8}} $$ ### Primitive Values in TOON When $v_i$ is a primitive data type $\omega$: | Type | Formula | |------|---------| | String (normal) | $L_{\text{str}}(v_i) = \lvert v_i\rvert_{\text{utf8}}$ | | String (looks like number/boolean) | $L_{\text{str}}(v_i) = \lvert v_i\rvert_{\text{utf8}} + 2$ | | Number | $L_{\text{num}}(v_i) = \lvert v_i\rvert_{\text{utf8}}$ | | Boolean | $L_{\text{bool}}(v_i) = \lvert v_i\rvert_{\text{utf8}}$ | | Null | $L_{\text{null}}(v_i) = \lvert v_i\rvert_{\text{utf8}}$ | ### Simple Arrays in TOON Here $L_{\text{toon}}(\mathcal{A})$ refers to the length of the whole field line `key[N]: ...`, not just the array value. When $v_i$ is a simple array $\mathcal{A}$: $$ L_{\text{toon}}(\mathcal{A}) = L_{\text{str}}(k_i) + \underbrace{1}_{\text{[}} + L_{\text{num}}(n) + \underbrace{1}_{\text{]}} + \underbrace{1}_{:} + \underbrace{1}_{\text{space}} + \sum_{i=1}^{n} L_{\text{toon}}(v_i) + \underbrace{(n-1)}_{\text{commas}} $$ ### Tabular Arrays in TOON When $v_i$ is an array of objects with $m$ fields: $$ \begin{split} L_{\text{toon}}(\mathcal{A}') = L_{\text{str}}(k_i) + \underbrace{1}_{\text{[}} + L_{\text{num}}(n) + \underbrace{1}_{\text{]}} + \underbrace{1}_{\{} + \\ \sum_{i=1}^{m} L_{\text{str}}(k_i) + \underbrace{(m-1)}_{\text{commas}} + \underbrace{1}_{\}} + \underbrace{1}_{:} + \\ \underbrace{2n}_{\text{indents}} + \sum_{i=1}^{n}\sum_{j=1}^{m} L_{\text{toon}}(v_{ij}) + \underbrace{(m-1)n}_{\text{commas}} + \underbrace{n}_{\text{newlines}} \end{split} $$ *Note: The term $2n$ assumes an indentation size of 2 spaces.* ## Efficiency Analysis by Structure Each subsection below focuses on a particular structure family, states the resulting formula, and shows a small example. Intuitively, TOON tends to win when it can: - avoid repeating keys (tabular arrays), - avoid quoting keys and many values, - and replace braces with indentation, and tends to lose when it pays a fixed overhead per element (arrays of arrays) or deep indentation (heavily nested configs). ### Simple Objects Flat objects with primitive string values are the easiest win: JSON pays for braces and quoted keys and strings, while TOON drops braces at the root, omits quotes on simple keys, and uses one line per field. For objects with only string primitives: $$ \Delta_{\text{obj}} = 2 + n + \sum_{i=1}^{n}(L_{\text{json}}(v_i)) - \sum_{i=1}^{n}(L_{\text{toon}}(v_i)) $$ If all values are strings that can be unquoted in TOON, this simplifies to: $$ f(n) = 2 + 3n $$ **Example:** For 1,000,000 objects, TOON saves **3,000,002 bytes ≈ 2.86 MB**. #### Empirical Validation ::: code-group ```json [JSON (21 bytes)] { "id": 1, "name": "Ada" } ``` ```yaml [TOON (15 bytes)] id: 1 name: Ada ``` ::: $$ \Delta_{\text{obj}} = 2 + \underbrace{2}_{n} + \underbrace{6}_{\sum L_{\text{json}}(v_i)} - \underbrace{4}_{\sum L_{\text{toon}}(v_i)} = 6 $$ ### Nested Objects Adding a wrapper object (one extra level of nesting) introduces extra braces for JSON and extra indentation and newlines for TOON. For a single level of nesting with primitive values, TOON still comes out ahead, but the net advantage is smaller. For a single level of nesting with primitives: $$ f(n) = 5 + n $$ **Example:** For 1,000,000 nested objects (depth 1), TOON saves **1,000,005 bytes ≈ 0.95 MB**. ::: warning Caveat This formula is for a single nesting level. Each additional nesting level adds 2 spaces of indentation per nested line; at sufficient depth, compact JSON can become smaller, especially when tabular opportunities disappear (see [When Not to Use TOON](/guide/getting-started#when-not-to-use-toon) and the "Deeply nested configuration" dataset in [Benchmarks](/guide/benchmarks)). ::: #### Empirical Validation ::: code-group ```json [JSON (30 bytes)] { "user": { "id": 1, "name": "Ada" } } ``` ```yaml [TOON (25 bytes)] user: id: 1 name: Ada ``` ::: $$ \Delta_{\text{nested}} = 5 $$ ### Primitive Arrays For arrays of string primitives, JSON writes `["foo","bar","baz"]`, quoting every string and using `[]` for the array. TOON writes `key[N]: foo,bar,baz`, paying once for the length marker but omitting most quotes. For arrays of $n$ string primitives: $$ \Delta_{\text{arr}} = 3 - L_{\text{num}}(n) + \sum_{i=1}^{n}(L_{\text{json}}(v_i)) - \sum_{i=1}^{n}(L_{\text{toon}}(v_i)) $$ With string values that can be unquoted in TOON, this simplifies to: $$ f(n) = 2 + 2n - \lfloor \log_{10}(|n|) \rfloor $$ **Example:** For 1,000,000 elements, TOON saves **1,999,996 bytes ≈ 1.91 MB**. #### Empirical Validation ::: code-group ```json [JSON (28 bytes)] { "tags": ["foo", "bar", "baz"] } ``` ```yaml [TOON (20 bytes)] tags[3]: foo,bar,baz ``` ::: $$ \Delta_{\text{arr}} = 3 - \underbrace{1}_{L_{\text{num}}(3)} + \underbrace{15}_{\sum L_{\text{json}}} - \underbrace{9}_{\sum L_{\text{toon}}} = 8 $$ ### Root Arrays At the root, JSON writes `["x","y","z"]`; TOON writes `[3]: x,y,z`. There is no object key cost, so the advantage mainly comes from not quoting TOON-safe strings and from replacing `[]` with `[N]:`. For root-level arrays of $n$ string primitives: $$ f(n) = -3 + 2n - \lfloor \log_{10}(|n|) \rfloor $$ **Example:** For 1,000,000 elements, TOON saves **1,999,991 bytes ≈ 1.91 MB**. #### Empirical Validation ::: code-group ```json [JSON (13 bytes)] ["x", "y", "z"] ``` ```yaml [TOON (10 bytes)] [3]: x,y,z ``` ::: $$ \Delta_{\text{root}} = \underbrace{9}_{\sum L_{\text{json}}} - 2 - \underbrace{1}_{L_{\text{num}}(3)} - \underbrace{3}_{\sum L_{\text{toon}}} = 3 $$ ### Tabular Arrays Uniform arrays of objects are TOON's sweet spot. JSON repeats every key for every row, while TOON declares the length and column names once (`key[N]{id,qty,...}:`) and streams rows as bare values. For arrays of objects with $n$ rows and $m$ fields, assuming numeric values and $|k| = 3$: $$ f(n) = 1 + nm(3 + |k|) - m(1 + |k|) - \lfloor \log_{10}(|n|) \rfloor $$ **Example:** For 1,000,000 rows with 2 fields and 3-character field names, TOON saves **11,999,987 bytes ≈ 11.44 MB**. This is where TOON's design (declare fields once, stream rows) pays off most strongly: savings grow linearly with both row count and field count. #### Empirical Validation ::: code-group ```json [JSON (45 bytes)] { "items": [{ "id": 1, "qty": 5 }, { "id": 2, "qty": 3 }] } ``` ```yaml [TOON (29 bytes)] items[2]{id,qty}: 1,5 2,3 ``` ::: $$ \Delta_{\text{tab}} = 2 + \underbrace{4}_{nm} - \underbrace{2}_{m} + \underbrace{22}_{\Sigma L_{\text{json}}} - \underbrace{1}_{L_{\text{num}}(n)} - \underbrace{5}_{\Sigma L_{\text{toon}}(k)} - \underbrace{4}_{\Sigma L_{\text{toon}}(v)} = 16 $$ ### Arrays of Arrays Arrays of arrays of primitives are where TOON structurally loses: each inner array becomes a list item with its own header, so TOON pays a fixed overhead per inner array (`"- "` plus `"[m]: "`), while JSON just uses commas. ::: tip Practical Note For arrays of arrays of primitives, this model predicts that JSON is more byte-efficient than TOON, because TOON pays ~6 extra bytes per inner array (2 for `"- "`, 4 for `"[m]: "`), plus the length marker. ::: For arrays of arrays with $n$ outer elements and $m$ inner elements: $$ \begin{split} \Delta_{\text{arrarr}} = 2 - 6n - \sum_{i=1}^{n}\sum_{j=1}^{m} L_{\text{num}}(m) + \\ \sum_{i=1}^{n}\sum_{j=1}^{m} L_{\text{json}}(v_{ij}) - \sum_{i=1}^{n}\sum_{j=1}^{m} L_{\text{toon}}(v_{ij}) \end{split} $$ With string primitives and $m = 2$: $$ f(n) = 2 - 6n - \sum_{i=1}^{n}\sum_{j=1}^{m} (\lfloor \log_{10}(|m|) \rfloor + 1) + 2nm $$ **Example:** For 1,000,000 arrays with $m = 2$, TOON **wastes 2,999,998 bytes ≈ 2.86 MB** relative to JSON under this model. #### Empirical Validation ::: code-group ```json [JSON (23 bytes)] { "pairs": [[1, 2], [3, 4]] } ``` ```yaml [TOON (35 bytes)] pairs[2]: - [2]: 1,2 - [2]: 3,4 ``` ::: $$ \Delta_{\text{arrarr}} = 2 - \underbrace{12}_{6n} - \underbrace{2}_{\sum L_{\text{num}}(m)} + \underbrace{4}_{\sum L_{\text{json}}} - \underbrace{4}_{\sum L_{\text{toon}}} = -12 $$ ### Strings That Look Like Literals Strings that look like numbers or booleans (e.g. `"123"`, `"true"`) must be quoted in both JSON and TOON, slightly reducing TOON's advantage because it no longer saves quotes on those values. For objects containing such strings: $$ \Delta_{\text{strlit}} = 2 + n $$ **Example:** For 1,000,000 objects, TOON saves **2,000,002 bytes ≈ 1.91 MB**. #### Empirical Validation ::: code-group ```json [JSON (34 bytes)] { "version": "123", "enabled": "true" } ``` ```yaml [TOON (30 bytes)] version: "123" enabled: "true" ``` ::: $$ \Delta_{\text{str}} = 2 + \underbrace{2}_{n} = 4 $$ ### Empty Structures Empty containers reveal structural differences even at minimal sizes. **Empty Object:** $$ \Delta_{\text{EmptyObject}} = 2 $$ JSON requires `{}` (2 bytes), whereas a completely empty root object in TOON is represented as an empty document (0 bytes). **Empty Array (field):** $$ \Delta_{\text{EmptyArray}} = 3 $$ For a field named `key`, JSON uses `{"key":[]}` in compact form, while TOON uses: ```yaml key[0]: ``` Under this model, that yields a constant 3-byte advantage for TOON. ## Summary Table The table below summarizes the formulas and which side wins under the modeling assumptions. | Structure | Efficiency Formula | TOON Advantage? | |-----------|-------------------|-----------------| | Simple Objects | $f(n) = 2 + 3n$ | ✅ Yes | | Nested Objects (1 level) | $f(n) = 5 + n$ | ✅ Yes (shrinks with depth) | | Primitive Arrays | $f(n) = 2 + 2n - \lfloor \log_{10}(n) \rfloor$ | ✅ Yes | | Root Arrays | $f(n) = -3 + 2n - \lfloor \log_{10}(n) \rfloor$ | ✅ Yes | | Tabular Arrays | $f(n) = 1 + nm(3+\lvert k\rvert) - m(1+\lvert k\rvert) - \lfloor \log_{10}(n) \rfloor$ | ✅ **Best case** | | Arrays of Arrays | $f(n) = 2 - 6n + 2nm - \text{overhead}$ | ❌ JSON wins here | | String Literals | $f(n) = 2 + n$ | ✅ Yes (smaller gain) | | Empty Structures | $\Delta = 2$ or $3$ | ✅ Yes | In short: - TOON's gains are **linear in the number of fields** for flat objects. - For arrays, gains grow **linearly in the number of elements**, and for tabular arrays **linearly in both rows and fields**. - Arrays of arrays are the main structural case where JSON is smaller. - Deep nesting and heavy quoting can erode or reverse these advantages in real data. ## Conclusion This simplified theoretical model supports TOON's design goal: structurally, it reduces overhead compared to compact JSON in many common patterns by: - avoiding repeated keys in tabular arrays, - omitting quotes on many keys and values, - and replacing braces with indentation at shallow depths. For the structure families examined here and under the stated assumptions, the structural overhead of TOON is lower than that of compact JSON except for arrays of arrays. Since UTF-8 byte length is a reasonable first-order proxy for tokens, these structural savings usually translate into lower token counts in those patterns. At the same time, this is deliberately a simplified model. In real datasets, additional factors – deeper or irregular nesting, heavily quoted strings, exponent notation in JSON, and tokenizer idiosyncrasies – can reduce or even reverse these gains. Our [Benchmarks](/guide/benchmarks) and [When Not to Use TOON](/guide/getting-started#when-not-to-use-toon) show that compact JSON can be more efficient for deeply nested or low-tabularity data. Use this page as intuition for *why* TOON behaves the way it does, not as a universal guarantee. ## Related Resources - [Benchmarks](/guide/benchmarks) – Empirical token count and accuracy comparisons across formats - [Specification](/reference/spec) – Formal TOON specification ## References This analysis is based on: - **Original Research**: [TOON vs. JSON: A Mathematical Evaluation of Byte Efficiency in Structured Data](https://www.researchgate.net/publication/397903673_TOON_vs_JSON_A_Mathematical_Evaluation_of_Byte_Efficiency_in_Structured_Data) - **TOON Specification**: [toon-format/spec](https://github.com/toon-format/spec) - **JSON Specification**: [RFC 8259](https://datatracker.ietf.org/doc/html/rfc8259), [ECMA-404](https://www.ecma-international.org/publications-and-standards/standards/ecma-404/) --- This page was contributed by Mateo Lafalce ([@mateolafalce](https://github.com/mateolafalce)). *Have questions or found an error in the formalization? Open an issue on [GitHub](https://github.com/toon-format/spec) or contribute improvements to this analysis.* ================================================ FILE: docs/reference/spec.md ================================================ # Specification The [TOON specification](https://github.com/toon-format/spec) is the authoritative reference for implementing encoders, decoders, and validators. It defines the concrete syntax, normative encoding/decoding behavior, and strict-mode validation rules. You don't need this page to *use* TOON. It's mainly for implementers and contributors. If you're looking to learn how to use TOON, start with the [Getting Started](/guide/getting-started) guide instead. > [!TIP] > The TOON specification is stable, but also an idea in progress. Nothing's set in stone – help shape where it goes by contributing to it or sharing feedback! ## Current Version **Spec v{{ $spec.version }}** (2025-11-24) is the current published Working Draft. It is stable for implementation but not yet finalized; see "Status of This Document" in the spec for details. ## Media Type & File Extension The spec defines a provisional media type and file extension in [§18.2](https://github.com/toon-format/spec/blob/main/SPEC.md#182-provisional-media-type): - **Media type:** `text/toon` (provisional, not yet IANA‑registered; UTF‑8 only) - **File extension:** `.toon` TOON documents are always UTF‑8 with LF (`\n`) line endings; the optional `charset` parameter, when present, MUST be `utf-8` per the spec. ## Guided Tour of the Spec ### Core Concepts [§1 Terminology and Conventions](https://github.com/toon-format/spec/blob/main/SPEC.md#1-terminology-and-conventions): Defines key terms like "indentation level", "active delimiter", "strict mode", and RFC2119 keywords (MUST, SHOULD, MAY). [§2 Data Model](https://github.com/toon-format/spec/blob/main/SPEC.md#2-data-model): Specifies the JSON data model (objects, arrays, primitives), array/object ordering requirements, and canonical number formatting (no exponent notation, no leading/trailing zeros). [§3 Encoding Normalization](https://github.com/toon-format/spec/blob/main/SPEC.md#3-encoding-normalization-reference-encoder): Defines how non-JSON types (Date, BigInt, NaN, Infinity, undefined, etc.) are normalized before encoding. Required reading for encoder implementers. [§4 Decoding Interpretation](https://github.com/toon-format/spec/blob/main/SPEC.md#4-decoding-interpretation-reference-decoder): Specifies how decoders map text tokens to host values (quoted strings, unquoted primitives, numeric parsing with leading-zero handling). Decoders default to strict mode (`strict = true`) in the reference implementation; strict-mode errors are enumerated in §14. ### Syntax Rules [§5 Concrete Syntax and Root Form](https://github.com/toon-format/spec/blob/main/SPEC.md#5-concrete-syntax-and-root-form): Defines TOON's line-oriented, indentation-based notation and how to determine whether the root is an object, array, or primitive. [§6 Header Syntax](https://github.com/toon-format/spec/blob/main/SPEC.md#6-header-syntax-normative): Normative ABNF grammar for array headers: `key[N]{fields}:`. Specifies bracket segments, delimiter symbols, and field lists. [§7 Strings and Keys](https://github.com/toon-format/spec/blob/main/SPEC.md#7-strings-and-keys): Complete quoting rules (when strings MUST be quoted), escape sequences (only `\\`, `\"`, `\n`, `\r`, `\t` are valid), and key encoding requirements. [§8 Objects](https://github.com/toon-format/spec/blob/main/SPEC.md#8-objects): Object field encoding (key: value), nesting rules, key order preservation, and empty object handling. [§9 Arrays](https://github.com/toon-format/spec/blob/main/SPEC.md#9-arrays): Covers all array forms: primitive (inline), arrays of objects (tabular), mixed/non-uniform (list), and arrays of arrays. Includes tabular detection requirements. [§10 Objects as List Items](https://github.com/toon-format/spec/blob/main/SPEC.md#10-objects-as-list-items): Indentation rules for objects appearing in list items (first field on the hyphen line), including the canonical pattern when the first field is a tabular array (header on the hyphen line, rows at depth +2, sibling fields at depth +1). [§11 Delimiters](https://github.com/toon-format/spec/blob/main/SPEC.md#11-delimiters): Delimiter scoping (document vs active), delimiter-aware quoting, and parsing rules for comma/tab/pipe delimiters. [§12 Indentation and Whitespace](https://github.com/toon-format/spec/blob/main/SPEC.md#12-indentation-and-whitespace): Encoding requirements (consistent spaces, no tabs in indentation, no trailing spaces/newlines) and decoding rules (strict vs non-strict indentation handling). ### Conformance and Validation [§13 Conformance and Options](https://github.com/toon-format/spec/blob/main/SPEC.md#13-conformance-and-options): Defines conformance classes (encoder, decoder, validator), standardized options, and conformance checklists. [§13.4 Key Folding and Path Expansion](https://github.com/toon-format/spec/blob/main/SPEC.md#134-key-folding-and-path-expansion): Optional encoder feature (key folding) and decoder feature (path expansion) for collapsing/expanding dotted paths, with deep-merge semantics and strict/non-strict conflict resolution. [§14 Strict Mode Errors and Diagnostics](https://github.com/toon-format/spec/blob/main/SPEC.md#14-strict-mode-errors-and-diagnostics-authoritative-checklist): **Authoritative checklist** of all strict-mode errors: array count mismatches, syntax errors, indentation errors, structural errors, and path expansion conflicts. ### Implementation Guidance [§15 Security Considerations](https://github.com/toon-format/spec/blob/main/SPEC.md#15-security-considerations): Injection risks, quoting rules, and strict-mode checks relevant to security. [§16 Internationalization](https://github.com/toon-format/spec/blob/main/SPEC.md#16-internationalization): Unicode handling and locale-independent number formatting. [§17 Interoperability and Mappings](https://github.com/toon-format/spec/blob/main/SPEC.md#17-interoperability-and-mappings): JSON/CSV/YAML mappings and conversion guidance. [§18 IANA Considerations](https://github.com/toon-format/spec/blob/main/SPEC.md#18-iana-considerations): Media type registration plans and provisional status. [§19 TOON Core Profile](https://github.com/toon-format/spec/blob/main/SPEC.md#19-toon-core-profile-normative-subset): Normative subset of the most common, memory-friendly rules. Useful for minimal implementations. [Appendix G: Host Type Normalization Examples](https://github.com/toon-format/spec/blob/main/SPEC.md#appendix-g-host-type-normalization-examples-informative): Non-normative guidance for Go, JavaScript, Python, and Rust implementations on normalizing language-specific types. [Appendix C: Test Suite and Compliance](https://github.com/toon-format/spec/blob/main/SPEC.md#appendix-c-test-suite-and-compliance-informative): Reference test suite at [github.com/toon-format/spec/tree/main/tests](https://github.com/toon-format/spec/tree/main/tests) for validating implementations. ## Spec Sections at a Glance | Section | Topic | When to Read | |---------|-------|--------------| | §1-4 | Data model, normalization, decoding | Implementing encoders/decoders | | §5-6 | Syntax, headers, root form | Implementing parsers | | §7 | Strings, keys, quoting, escaping | Implementing string handling | | §8-10 | Objects, arrays, list items | Implementing structure encoding | | §11-12 | Delimiters, indentation, whitespace | Implementing formatting and validation | | §13 | Conformance, options, key folding/path expansion | Implementing options and features | | §14 | Strict-mode errors | Implementing validators | | §15-18 | Security, i18n, interoperability, media type | Operational and ecosystem considerations | | §19 | Core profile | Minimal implementations | | §20-21 | Versioning, extensibility, IP | Long-term stability and licensing | ## Conformance Checklists The spec includes three conformance checklists: ### Encoder Checklist (§13.1) [↗ SPEC.md](https://github.com/toon-format/spec/blob/main/SPEC.md#131-encoder-conformance-checklist) Key requirements: - Produce UTF-8 with LF line endings - Use consistent indentation (default 2 spaces, no tabs) - Escape only `\\`, `\"`, `\n`, `\r`, `\t` in quoted strings; any other escape is invalid - Quote strings with active delimiter, colon, or structural characters - Emit array lengths `[N]` matching actual count - Preserve object key order - Normalize numbers to non-exponential decimal form - Convert `-0` to `0`, `NaN`/±Infinity to `null` - No trailing spaces or trailing newline - When `keyFolding="safe"` is enabled, folding MUST follow §13.4: - Only fold IdentifierSegment keys (letters/digits/underscores, no dots), - Do not introduce collisions with existing sibling keys, - Do not fold segments that would require quoting. - When `flattenDepth` is set, folding MUST stop at the configured number of segments (§13.4). ### Decoder Checklist (§13.2) [↗ SPEC.md](https://github.com/toon-format/spec/blob/main/SPEC.md#132-decoder-conformance-checklist) Key requirements: - Parse array headers per §6 (length, delimiter, fields) - Split inline arrays and tabular rows using active delimiter only - Unescape quoted strings with only valid escapes - Type unquoted primitives: true/false/null → booleans/null, numeric → number, else → string - Enforce strict-mode rules when `strict=true` - Preserve array order and object key order - When `expandPaths="safe"` is enabled, expand dotted keys into nested objects per §13.4: - Split on `.`, only expand when all segments are IdentifierSegments, - Deep-merge overlapping paths (object + object), - Do not perform element-wise array merges. - With `expandPaths="safe"` and `strict=true` (default), MUST error on any expansion conflict (§14.5). - With `expandPaths="safe"` and `strict=false`, MUST apply deterministic last-write-wins (LWW) conflict resolution (§13.4). ### Validator Checklist (§13.3) [↗ SPEC.md](https://github.com/toon-format/spec/blob/main/SPEC.md#133-validator-conformance-checklist) Validators should verify: - Structural conformance (headers, indentation, list markers) - Whitespace invariants (no trailing spaces/newlines) - Delimiter consistency between headers and rows - Array length counts match declared `[N]` - All strict-mode requirements (including path-expansion conflicts when enabled) ## Versioning The spec uses semantic versioning (major.minor): - **Major version** (e.g., v{{ $spec.version }}): Breaking changes, incompatible with previous versions - **Minor version** (e.g., v1.5 → v1.6): Clarifications, additional requirements, or backward-compatible additions See [Appendix D: Document Changelog](https://github.com/toon-format/spec/blob/main/SPEC.md#appendix-d-document-changelog-informative) for detailed version history. ## Contributing to the Spec The spec is community-maintained at [github.com/toon-format/spec](https://github.com/toon-format/spec). We welcome contributions of all kinds: reporting ambiguities or errors, proposing clarifications and examples, adding test cases to the reference suite, or discussing edge cases and normative behavior. Your feedback helps shape the format. ================================================ FILE: docs/reference/syntax-cheatsheet.md ================================================ # Syntax Cheatsheet Quick reference for mapping JSON to TOON format. For rigorous, normative syntax rules and edge cases, see the [Specification](/reference/spec). ## Objects ::: code-group ```json [JSON] { "id": 1, "name": "Ada" } ``` ```yaml [TOON] id: 1 name: Ada ``` ::: ## Nested Objects ::: code-group ```json [JSON] { "user": { "id": 1, "name": "Ada" } } ``` ```yaml [TOON] user: id: 1 name: Ada ``` ::: ## Primitive Arrays ::: code-group ```json [JSON] { "tags": ["foo", "bar", "baz"] } ``` ```yaml [TOON] tags[3]: foo,bar,baz ``` ::: ## Tabular Arrays ::: code-group ```json [JSON] { "items": [ { "id": 1, "qty": 5 }, { "id": 2, "qty": 3 } ] } ``` ```yaml [TOON] items[2]{id,qty}: 1,5 2,3 ``` ::: ## Mixed / Non-Uniform Arrays ::: code-group ```json [JSON] { "items": [1, { "a": 1 }, "x"] } ``` ```yaml [TOON] items[3]: - 1 - a: 1 - x ``` ::: > [!NOTE] > When a list-item object has a tabular array as its first field, the tabular header appears on the hyphen line. Rows are indented two levels deeper than the hyphen, and other fields are indented one level deeper. This is the canonical encoding for this pattern. ::: code-group ```yaml [Multi-field object] items[1]: - users[2]{id,name}: 1,Ada 2,Bob status: active ``` ```yaml [Single-field object] items[1]: - users[2]{id,name}: 1,Ada 2,Bob ``` ::: ## Arrays of Arrays ::: code-group ```json [JSON] { "pairs": [[1, 2], [3, 4]] } ``` ```yaml [TOON] pairs[2]: - [2]: 1,2 - [2]: 3,4 ``` ::: ## Root Arrays ::: code-group ```json [JSON] ["x", "y", "z"] ``` ```yaml [TOON] [3]: x,y,z ``` ::: ## Empty Containers ::: code-group ```json [Empty Object] {} ``` ```yaml [Empty Object] (empty output) ``` ::: ::: code-group ```json [Empty Array] { "items": [] } ``` ```yaml [Empty Array] items[0]: ``` ::: ## Quoting Special Cases ### Strings That Look Like Literals ::: code-group ```json [JSON] { "version": "123", "enabled": "true" } ``` ```yaml [TOON] version: "123" enabled: "true" ``` ::: These strings must be quoted because they look like numbers/booleans. ### Strings with Active Delimiter ::: code-group ```json [JSON] { "note": "hello, world" } ``` ```yaml [TOON] note: "hello, world" ``` ::: Strings containing the active delimiter (comma by default) must be quoted. ### Strings with Leading/Trailing Spaces ::: code-group ```json [JSON] { "message": " padded " } ``` ```yaml [TOON] message: " padded " ``` ::: ### Empty String ::: code-group ```json [JSON] { "name": "" } ``` ```yaml [TOON] name: "" ``` ::: ## Quoting Rules Summary Strings **must** be quoted if they: - Are empty (`""`) - Have leading or trailing whitespace - Equal `true`, `false`, or `null` (case-sensitive) - Look like numbers (e.g., `"42"`, `"-3.14"`, `"1e-6"`, `"05"`) - Contain special characters: `:`, `"`, `\`, `[`, `]`, `{`, `}`, newline, tab, carriage return - Contain the active delimiter (comma by default, or tab/pipe if declared in header) - Equal `"-"` or start with `"-"` followed by any character Otherwise, strings can be unquoted. Unicode and emoji are safe: ```yaml message: Hello 世界 👋 note: This has inner spaces ``` ## Escape Sequences Only five escape sequences are valid in quoted strings: | Character | Escape | |-----------|--------| | Backslash (`\`) | `\\` | | Double quote (`"`) | `\"` | | Newline | `\n` | | Carriage return | `\r` | | Tab | `\t` | All other escapes (e.g., `\x`, `\u`) are invalid. ## Array Headers ### Basic Header ``` key[N]: ``` - `N` = array length - Default delimiter: comma ### Tabular Header ``` key[N]{field1,field2,field3}: ``` - `N` = array length - `{fields}` = column names - Default delimiter: comma ### Alternative Delimiters ::: code-group ```yaml [Tab Delimiter] items[2 ]{id name}: 1 Alice 2 Bob ``` ```yaml [Pipe Delimiter] items[2|]{id|name}: 1|Alice 2|Bob ``` ::: The delimiter symbol appears inside the brackets and braces. ## Key Folding (Optional) Standard nesting: ```yaml data: metadata: items[2]: a,b ``` With key folding (`keyFolding: 'safe'`): ```yaml data.metadata.items[2]: a,b ``` See [Format Overview – Key Folding](/guide/format-overview#key-folding-optional) for details. ## Type Conversions | Input | Output | |-------|--------| | Finite number | Canonical decimal (no exponent, no trailing zeros) | | `NaN`, `Infinity`, `-Infinity` | `null` | | `BigInt` (safe range) | Number | | `BigInt` (out of range) | Quoted decimal string | | `Date` | ISO string (quoted) | | `undefined`, `function`, `symbol` | `null` | ================================================ FILE: docs/uno.config.ts ================================================ import type { UserConfig } from 'unocss' import { defineConfig, presetIcons, presetWind4, transformerDirectives } from 'unocss' const config: UserConfig = defineConfig({ presets: [ presetWind4(), presetIcons(), ], transformers: [ transformerDirectives(), ], }) export default config ================================================ FILE: docs/wrangler.toml ================================================ name = "toon-docs" compatibility_date = "2025-10-01" [[routes]] pattern = "toonformat.dev" custom_domain = true [assets] directory = "./.vitepress/dist/" not_found_handling = "404-page" ================================================ FILE: eslint.config.ts ================================================ import type { ConfigNames, TypedFlatConfigItem } from '@antfu/eslint-config' import type { FlatConfigComposer } from 'eslint-flat-config-utils' import antfu from '@antfu/eslint-config' const config: FlatConfigComposer = antfu({ pnpm: false, rules: { 'no-cond-assign': 'off', }, }).append({ files: ['**/README.md', 'SPEC.md', '**/benchmarks/**/*', '**/docs/**/*'], rules: { 'markdown/no-missing-link-fragments': 'off', 'markdown/fenced-code-language': 'off', 'markdown/heading-increment': 'off', 'import/no-duplicates': 'off', 'style/no-tabs': 'off', 'yaml/quotes': 'off', }, }) export default config ================================================ FILE: package.json ================================================ { "name": "@toon-format/monorepo", "type": "module", "version": "2.1.0", "private": true, "packageManager": "pnpm@10.30.3", "scripts": { "build": "pnpm -r --filter=./packages/** run build", "automd": "automd", "docs:dev": "vitepress dev docs", "docs:build": "vitepress build docs", "docs:preview": "vitepress preview docs", "lint": "eslint .", "lint:fix": "eslint . --fix", "test": "pnpm -r test", "test:types": "tsc --noEmit", "release": "bumpp -r" }, "devDependencies": { "@antfu/eslint-config": "^7.6.1", "@commitlint/types": "^20.4.3", "@types/node": "^24.12.0", "automd": "^0.4.3", "bumpp": "^10.4.1", "eslint": "^10.0.2", "eslint-flat-config-utils": "^3.0.1", "tsdown": "^0.21.0", "typescript": "^5.9.3", "vitest": "^4.0.18" } } ================================================ FILE: packages/cli/README.md ================================================ # @toon-format/cli Command-line tool for converting JSON to TOON and back, with token analysis and streaming support. [TOON (Token-Oriented Object Notation)](https://toonformat.dev) is a compact, human-readable encoding of the JSON data model that minimizes tokens for LLM input. The CLI lets you test conversions, analyze token savings, and integrate TOON into shell pipelines with stdin/stdout support. ## Installation ```bash # npm npm install -g @toon-format/cli # pnpm pnpm add -g @toon-format/cli # yarn yarn global add @toon-format/cli ``` Or use directly with `npx`: ```bash npx @toon-format/cli [options] [input] ``` ## Usage ```bash toon [options] [input] ``` **Standard input:** Omit the input argument or use `-` to read from stdin. This enables piping data directly from other commands. **Auto-detection:** The CLI automatically detects the operation based on file extension (`.json` → encode, `.toon` → decode). When reading from stdin, use `--encode` or `--decode` flags to specify the operation (defaults to encode). ### Basic Examples ```bash # Encode JSON to TOON (auto-detected) toon input.json -o output.toon # Decode TOON to JSON (auto-detected) toon data.toon -o output.json # Output to stdout toon input.json # Pipe from stdin cat data.json | toon echo '{"name": "Ada"}' | toon # Decode from stdin cat data.toon | toon --decode ``` ## Options | Option | Description | | ------ | ----------- | | `-o, --output ` | Output file path (prints to stdout if omitted) | | `-e, --encode` | Force encode mode (overrides auto-detection) | | `-d, --decode` | Force decode mode (overrides auto-detection) | | `--delimiter ` | Array delimiter: `,` (comma), `\t` (tab), `\|` (pipe) | | `--indent ` | Indentation size (default: `2`) | | `--stats` | Show token count estimates and savings (encode only) | | `--no-strict` | Disable strict validation when decoding | | `--keyFolding ` | Enable key folding: `off`, `safe` (default: `off`) | | `--flattenDepth ` | Maximum folded segment count when key folding is enabled (default: `Infinity`) | | `--expandPaths ` | Enable path expansion: `off`, `safe` (default: `off`) | ## Advanced Examples ### Token Statistics Show token savings when encoding: ```bash toon data.json --stats -o output.toon ``` Example output: ``` ✔ Encoded data.json → output.toon ℹ Token estimates: ~15,145 (JSON) → ~8,745 (TOON) ✔ Saved ~6,400 tokens (-42.3%) ``` ### Alternative Delimiters #### Tab-separated (often more token-efficient) ```bash toon data.json --delimiter "\t" -o output.toon ``` ### Lenient Decoding Skip validation for faster processing: ```bash toon data.toon --no-strict -o output.json ``` ### Stdin Workflows ```bash # Convert API response to TOON curl https://api.example.com/data | toon --stats # Process large dataset cat large-dataset.json | toon --delimiter "\t" > output.toon # Chain with other tools jq '.results' data.json | toon > filtered.toon ``` ### Large Dataset Processing The CLI uses streaming output for both encoding and decoding, writing incrementally without building the full output string in memory: ```bash # Encode large JSON file with minimal memory usage toon huge-dataset.json -o output.toon # Decode large TOON file with streaming JSON output toon huge-dataset.toon -o output.json # Process millions of records efficiently via stdin cat million-records.json | toon > output.toon cat million-records.toon | toon --decode > output.json ``` **Memory efficiency:** - **Encode (JSON → TOON)**: Streams TOON lines to output without full string in memory - **Decode (TOON → JSON)**: Uses the same event-based streaming decoder as the `decodeStream` API in `@toon-format/toon`, streaming JSON tokens to output without full string in memory - Peak memory usage scales with data depth, not total size - When `--expandPaths safe` is enabled, decode falls back to non-streaming mode internally to apply deep-merge expansion before writing JSON > [!NOTE] > When using `--stats` with encode, the full output string is kept in memory for token counting. Omit `--stats` for maximum memory efficiency with very large datasets. ### Key Folding (Since v1.5) Collapse nested wrapper chains to reduce tokens: #### Basic key folding ```bash # Encode with key folding toon input.json --keyFolding safe -o output.toon ``` For data like: ```json { "data": { "metadata": { "items": ["a", "b"] } } } ``` Output becomes: ``` data.metadata.items[2]: a,b ``` Instead of: ``` data: metadata: items[2]: a,b ``` #### Limit folding depth ```bash # Fold maximum 2 levels deep toon input.json --keyFolding safe --flattenDepth 2 -o output.toon ``` #### Path expansion on decode ```bash # Reconstruct nested structure from folded keys toon data.toon --expandPaths safe -o output.json ``` #### Round-trip workflow ```bash # Encode with folding toon input.json --keyFolding safe -o compressed.toon # Decode with expansion (restores original structure) toon compressed.toon --expandPaths safe -o output.json # Verify round-trip diff input.json output.json ``` #### Combined with other options ```bash # Key folding + tab delimiter + stats toon data.json --keyFolding safe --delimiter "\t" --stats -o output.toon ``` ## Why Use the CLI? - **Quick conversions** between formats without writing code - **Token analysis** to see potential savings before sending to LLMs - **Pipeline integration** with existing JSON-based workflows - **Flexible formatting** with delimiter and indentation options - **Key folding** to collapse nested wrappers for additional token savings - **Memory-efficient streaming** for both encode and decode operations - process large datasets without loading entire outputs into memory ## Related - [@toon-format/toon](https://www.npmjs.com/package/@toon-format/toon) - JavaScript/TypeScript library - [Full specification](https://github.com/toon-format/spec) - Complete format documentation - [Website](https://toonformat.dev) - Interactive examples and guides ## License [MIT](https://github.com/toon-format/toon/blob/main/LICENSE) License © 2025-PRESENT [Johann Schopplich](https://github.com/johannschopplich) ================================================ FILE: packages/cli/bin/toon.mjs ================================================ #!/usr/bin/env node 'use strict' import('../dist/index.mjs') ================================================ FILE: packages/cli/package.json ================================================ { "name": "@toon-format/cli", "type": "module", "version": "2.1.0", "packageManager": "pnpm@10.30.3", "description": "CLI for JSON ↔ TOON conversion using @toon-format/toon", "author": "Johann Schopplich ", "license": "MIT", "homepage": "https://toonformat.dev", "repository": { "type": "git", "url": "git+https://github.com/toon-format/toon.git" }, "bugs": { "url": "https://github.com/toon-format/toon/issues" }, "sideEffects": false, "exports": { ".": { "types": "./dist/index.d.mts", "default": "./dist/index.mjs" } }, "types": "./dist/index.d.mts", "bin": { "toon": "bin/toon.mjs" }, "files": [ "bin", "dist" ], "scripts": { "dev": "node ./src/cli-entry.ts --help", "build": "tsdown", "test": "vitest" }, "dependencies": { "citty": "^0.2.1", "consola": "^3.4.2", "tokenx": "^1.3.0" } } ================================================ FILE: packages/cli/src/cli-entry.ts ================================================ import { runMain } from 'citty' import { mainCommand } from './index.ts' runMain(mainCommand) ================================================ FILE: packages/cli/src/conversion.ts ================================================ import type { FileHandle } from 'node:fs/promises' import type { DecodeOptions, DecodeStreamOptions, EncodeOptions } from '../../toon/src/index.ts' import type { InputSource } from './types.ts' import * as fsp from 'node:fs/promises' import * as path from 'node:path' import process from 'node:process' import { consola } from 'consola' import { estimateTokenCount } from 'tokenx' import { decode, decodeStream, encode, encodeLines } from '../../toon/src/index.ts' import { jsonStreamFromEvents } from './json-from-events.ts' import { jsonStringifyLines } from './json-stringify-stream.ts' import { formatInputLabel, readInput, readLinesFromSource } from './utils.ts' export async function encodeToToon(config: { input: InputSource output?: string indent: NonNullable delimiter: NonNullable keyFolding?: NonNullable flattenDepth?: number printStats: boolean }): Promise { const jsonContent = await readInput(config.input) let data: unknown try { data = JSON.parse(jsonContent) } catch (error) { throw new Error(`Failed to parse JSON: ${error instanceof Error ? error.message : String(error)}`) } const encodeOptions: EncodeOptions = { delimiter: config.delimiter, indent: config.indent, keyFolding: config.keyFolding, flattenDepth: config.flattenDepth, } // When printing stats, we need the full string for token counting if (config.printStats) { const toonOutput = encode(data, encodeOptions) if (config.output) { await fsp.writeFile(config.output, toonOutput, 'utf-8') } else { console.log(toonOutput) } const jsonTokens = estimateTokenCount(jsonContent) const toonTokens = estimateTokenCount(toonOutput) const diff = jsonTokens - toonTokens const percent = ((diff / jsonTokens) * 100).toFixed(1) if (config.output) { const relativeInputPath = formatInputLabel(config.input) const relativeOutputPath = path.relative(process.cwd(), config.output) consola.success(`Encoded \`${relativeInputPath}\` → \`${relativeOutputPath}\``) } console.log() consola.info(`Token estimates: ~${jsonTokens} (JSON) → ~${toonTokens} (TOON)`) consola.success(`Saved ~${diff} tokens (-${percent}%)`) } else { await writeStreamingToon(encodeLines(data, encodeOptions), config.output) if (config.output) { const relativeInputPath = formatInputLabel(config.input) const relativeOutputPath = path.relative(process.cwd(), config.output) consola.success(`Encoded \`${relativeInputPath}\` → \`${relativeOutputPath}\``) } } } export async function decodeToJson(config: { input: InputSource output?: string indent: NonNullable strict: NonNullable expandPaths?: NonNullable }): Promise { // Path expansion requires full value in memory, so use non-streaming path if (config.expandPaths === 'safe') { const toonContent = await readInput(config.input) let data: unknown try { const decodeOptions: DecodeOptions = { indent: config.indent, strict: config.strict, expandPaths: config.expandPaths, } data = decode(toonContent, decodeOptions) } catch (error) { throw new Error(`Failed to decode TOON: ${error instanceof Error ? error.message : String(error)}`) } await writeStreamingJson(jsonStringifyLines(data, config.indent), config.output) } else { try { const lineSource = readLinesFromSource(config.input) const decodeStreamOptions: DecodeStreamOptions = { indent: config.indent, strict: config.strict, } const events = decodeStream(lineSource, decodeStreamOptions) const jsonChunks = jsonStreamFromEvents(events, config.indent) await writeStreamingJson(jsonChunks, config.output) } catch (error) { throw new Error(`Failed to decode TOON: ${error instanceof Error ? error.message : String(error)}`) } } if (config.output) { const relativeInputPath = formatInputLabel(config.input) const relativeOutputPath = path.relative(process.cwd(), config.output) consola.success(`Decoded \`${relativeInputPath}\` → \`${relativeOutputPath}\``) } } /** * Writes JSON chunks to a file or stdout using streaming approach. * Chunks are written one at a time without building the full string in memory. */ async function writeStreamingJson( chunks: AsyncIterable | Iterable, outputPath?: string, ): Promise { // Stream to file using fs/promises API if (outputPath) { let fileHandle: FileHandle | undefined try { fileHandle = await fsp.open(outputPath, 'w') for await (const chunk of chunks) { await fileHandle.write(chunk) } } finally { await fileHandle?.close() } } // Stream to stdout else { for await (const chunk of chunks) { process.stdout.write(chunk) } // Add final newline for stdout process.stdout.write('\n') } } /** * Writes TOON lines to a file or stdout using streaming approach. * Lines are written one at a time without building the full string in memory. */ async function writeStreamingToon( lines: Iterable, outputPath?: string, ): Promise { let isFirst = true // Stream to file using fs/promises API if (outputPath) { let fileHandle: FileHandle | undefined try { fileHandle = await fsp.open(outputPath, 'w') for (const line of lines) { if (!isFirst) await fileHandle.write('\n') await fileHandle.write(line) isFirst = false } } finally { await fileHandle?.close() } } // Stream to stdout else { for (const line of lines) { if (!isFirst) process.stdout.write('\n') process.stdout.write(line) isFirst = false } // Add final newline for stdout process.stdout.write('\n') } } ================================================ FILE: packages/cli/src/index.ts ================================================ import type { ArgsDef, CommandDef } from 'citty' import type { DecodeOptions, Delimiter, EncodeOptions } from '../../toon/src/index.ts' import type { InputSource } from './types.ts' import * as path from 'node:path' import process from 'node:process' import { defineCommand } from 'citty' import { consola } from 'consola' import { DEFAULT_DELIMITER, DELIMITERS } from '../../toon/src/index.ts' import pkg from '../package.json' with { type: 'json' } import { decodeToJson, encodeToToon } from './conversion.ts' import { detectMode } from './utils.ts' const { name, version } = pkg const args: ArgsDef = { input: { type: 'positional', description: 'Input file path (omit or use "-" to read from stdin)', required: false, }, output: { type: 'string', description: 'Output file path', alias: 'o', }, encode: { type: 'boolean', description: 'Encode JSON to TOON (auto-detected by default)', alias: 'e', }, decode: { type: 'boolean', description: 'Decode TOON to JSON (auto-detected by default)', alias: 'd', }, delimiter: { type: 'string', description: 'Delimiter for arrays: comma (,), tab (\\t), or pipe (|)', default: ',', }, indent: { type: 'string', description: 'Indentation size', default: '2', }, strict: { type: 'boolean', description: 'Enable strict mode for decoding', default: true, }, keyFolding: { type: 'string', description: 'Enable key folding: off, safe (default: off)', default: 'off', }, flattenDepth: { type: 'string', description: 'Maximum folded segment count when key folding is enabled (default: Infinity)', }, expandPaths: { type: 'string', description: 'Enable path expansion: off, safe (default: off)', default: 'off', }, stats: { type: 'boolean', description: 'Show token statistics', default: false, }, } as const export const mainCommand: CommandDef = defineCommand({ meta: { name, description: 'TOON CLI – Convert between JSON and TOON formats', version, }, args, async run({ args }) { const input = args.input const inputSource: InputSource = !input || input === '-' ? { type: 'stdin' } : { type: 'file', path: path.resolve(input) } const outputPath = args.output ? path.resolve(args.output) : undefined // Parse and validate indent const indent = Number.parseInt(args.indent || '2', 10) if (Number.isNaN(indent) || indent < 0) { throw new Error(`Invalid indent value: ${args.indent}`) } // Validate delimiter const delimiter = args.delimiter || DEFAULT_DELIMITER if (!(Object.values(DELIMITERS)).includes(delimiter as Delimiter)) { throw new Error(`Invalid delimiter "${delimiter}". Valid delimiters are: comma (,), tab (\\t), pipe (|)`) } // Validate `keyFolding` const keyFolding = args.keyFolding || 'off' if (keyFolding !== 'off' && keyFolding !== 'safe') { throw new Error(`Invalid keyFolding value "${keyFolding}". Valid values are: off, safe`) } // Parse and validate `flattenDepth` let flattenDepth: number | undefined if (args.flattenDepth !== undefined) { flattenDepth = Number.parseInt(args.flattenDepth, 10) if (Number.isNaN(flattenDepth) || flattenDepth < 0) { throw new Error(`Invalid flattenDepth value: ${args.flattenDepth}`) } } // Validate `expandPaths` const expandPaths = args.expandPaths || 'off' if (expandPaths !== 'off' && expandPaths !== 'safe') { throw new Error(`Invalid expandPaths value "${expandPaths}". Valid values are: off, safe`) } const mode = detectMode(inputSource, args.encode, args.decode) try { if (mode === 'encode') { await encodeToToon({ input: inputSource, output: outputPath, delimiter: delimiter as Delimiter, indent, keyFolding: keyFolding as NonNullable, flattenDepth, printStats: args.stats === true, }) } else { await decodeToJson({ input: inputSource, output: outputPath, indent, strict: args.strict !== false, expandPaths: expandPaths as NonNullable, }) } } catch (error) { consola.error(error) process.exit(1) } }, }) ================================================ FILE: packages/cli/src/json-from-events.ts ================================================ import type { JsonStreamEvent } from '../../toon/src/types.ts' /** * Context for tracking JSON structure state during event streaming. */ type JsonContext = | { type: 'object', needsComma: boolean, expectValue: boolean } | { type: 'array', needsComma: boolean } /** * Converts a stream of `JsonStreamEvent` into formatted JSON string chunks. * * Similar to `jsonStringifyLines` but driven by events instead of a value tree. * Useful for streaming TOON decode directly to JSON output without building * the full data structure in memory. * * @param events - Async iterable of JSON stream events * @param indent - Number of spaces for indentation (0 = compact, >0 = pretty) * @returns Async iterable of JSON string chunks * * @example * ```ts * const lines = readLinesFromSource(input) * const events = decodeStream(lines) * for await (const chunk of jsonStreamFromEvents(events, 2)) { * process.stdout.write(chunk) * } * ``` */ export async function* jsonStreamFromEvents( events: AsyncIterable, indent: number = 2, ): AsyncIterable { const stack: JsonContext[] = [] let depth = 0 for await (const event of events) { const parent = stack.length > 0 ? stack[stack.length - 1] : undefined switch (event.type) { case 'startObject': { // Emit comma if needed (inside array or after previous object field value) if (parent) { if (parent.type === 'array' && parent.needsComma) { yield ',' } else if (parent.type === 'object' && !parent.expectValue) { // Object field value already emitted, this is a nested object after a key // The comma is handled by the key event } } // Emit newline and indent for pretty printing if (indent > 0 && parent) { if (parent.type === 'array') { yield '\n' yield ' '.repeat(depth * indent) } } yield '{' stack.push({ type: 'object', needsComma: false, expectValue: false }) depth++ break } case 'endObject': { const context = stack.pop() if (!context || context.type !== 'object') { throw new Error('Mismatched endObject event') } depth-- // Emit newline and indent for closing brace (pretty print) if (indent > 0 && context.needsComma) { yield '\n' yield ' '.repeat(depth * indent) } yield '}' // Mark parent as needing comma for next item const newParent = stack.length > 0 ? stack[stack.length - 1] : undefined if (newParent) { if (newParent.type === 'object') { newParent.expectValue = false newParent.needsComma = true } else if (newParent.type === 'array') { newParent.needsComma = true } } break } case 'startArray': { // Emit comma if needed if (parent) { if (parent.type === 'array' && parent.needsComma) { yield ',' } } // Emit newline and indent for pretty printing if (indent > 0 && parent) { if (parent.type === 'array') { yield '\n' yield ' '.repeat(depth * indent) } } yield '[' stack.push({ type: 'array', needsComma: false, }) depth++ break } case 'endArray': { const context = stack.pop() if (!context || context.type !== 'array') { throw new Error('Mismatched endArray event') } depth-- // Emit newline and indent for closing bracket (pretty print) if (indent > 0 && context.needsComma) { yield '\n' yield ' '.repeat(depth * indent) } yield ']' // Mark parent as needing comma for next item const newParent = stack.length > 0 ? stack[stack.length - 1] : undefined if (newParent) { if (newParent.type === 'object') { newParent.expectValue = false newParent.needsComma = true } else if (newParent.type === 'array') { newParent.needsComma = true } } break } case 'key': { if (!parent || parent.type !== 'object') { throw new Error('Key event outside of object context') } // Emit comma before this field if needed if (parent.needsComma) { yield ',' } // Emit newline and indent (pretty print) if (indent > 0) { yield '\n' yield ' '.repeat(depth * indent) } // Emit key yield JSON.stringify(event.key) yield indent > 0 ? ': ' : ':' parent.expectValue = true parent.needsComma = true break } case 'primitive': { // Emit comma if needed if (parent) { if (parent.type === 'array' && parent.needsComma) { yield ',' } else if (parent.type === 'object' && !parent.expectValue) { // This shouldn't happen in well-formed events throw new Error('Primitive event in object without preceding key') } } // Emit newline and indent for array items (pretty print) if (indent > 0 && parent && parent.type === 'array') { yield '\n' yield ' '.repeat(depth * indent) } // Emit primitive value yield JSON.stringify(event.value) // Update parent context if (parent) { if (parent.type === 'object') { parent.expectValue = false // needsComma already true from key event } else if (parent.type === 'array') { parent.needsComma = true } } break } } } // Ensure stack is empty if (stack.length !== 0) { throw new Error('Incomplete event stream: unclosed objects or arrays') } } ================================================ FILE: packages/cli/src/json-stringify-stream.ts ================================================ /** * Streaming JSON stringifier. * * Yields JSON tokens one at a time, allowing streaming output without holding * the entire JSON string in memory. * * @param value - The value to stringify (must be JSON-serializable) * @param indent - Number of spaces for indentation (0 = compact, >0 = pretty) * @returns Generator that yields JSON string chunks * * @example * ```ts * const data = { name: "Alice", scores: [95, 87, 92] } * for (const chunk of jsonStringifyLines(data, 2)) { * process.stdout.write(chunk) * } * ``` */ export function* jsonStringifyLines( value: unknown, indent: number = 2, ): Iterable { yield* stringifyValue(value, 0, indent) } /** * Internal generator for recursive stringification. */ function* stringifyValue( value: unknown, depth: number, indent: number, ): Iterable { // Handle null if (value === null) { yield 'null' return } const type = typeof value // Handle primitives if (type === 'boolean' || type === 'number') { yield JSON.stringify(value) return } if (type === 'string') { yield JSON.stringify(value) return } // Handle arrays if (Array.isArray(value)) { yield* stringifyArray(value, depth, indent) return } // Handle objects if (type === 'object') { yield* stringifyObject(value as Record, depth, indent) return } // Undefined, functions, symbols become null in JSON yield 'null' } /** * Stringify an array with proper formatting. */ function* stringifyArray( arr: unknown[], depth: number, indent: number, ): Iterable { if (arr.length === 0) { yield '[]' return } yield '[' if (indent > 0) { // Pretty-printed format for (let i = 0; i < arr.length; i++) { yield '\n' yield ' '.repeat((depth + 1) * indent) yield* stringifyValue(arr[i], depth + 1, indent) if (i < arr.length - 1) { yield ',' } } yield '\n' yield ' '.repeat(depth * indent) yield ']' } else { // Compact format for (let i = 0; i < arr.length; i++) { yield* stringifyValue(arr[i], depth + 1, indent) if (i < arr.length - 1) { yield ',' } } yield ']' } } /** * Stringify an object with proper formatting. */ function* stringifyObject( obj: Record, depth: number, indent: number, ): Iterable { const keys = Object.keys(obj) if (keys.length === 0) { yield '{}' return } yield '{' if (indent > 0) { // Pretty-printed format for (let i = 0; i < keys.length; i++) { const key = keys[i]! const value = obj[key] yield '\n' yield ' '.repeat((depth + 1) * indent) yield JSON.stringify(key) yield ': ' yield* stringifyValue(value, depth + 1, indent) if (i < keys.length - 1) { yield ',' } } yield '\n' yield ' '.repeat(depth * indent) yield '}' } else { // Compact format for (let i = 0; i < keys.length; i++) { const key = keys[i]! const value = obj[key] yield JSON.stringify(key) yield ':' yield* stringifyValue(value, depth + 1, indent) if (i < keys.length - 1) { yield ',' } } yield '}' } } ================================================ FILE: packages/cli/src/types.ts ================================================ export type InputSource = | { type: 'stdin' } | { type: 'file', path: string } ================================================ FILE: packages/cli/src/utils.ts ================================================ import type { InputSource } from './types.ts' import { createReadStream } from 'node:fs' import * as fsp from 'node:fs/promises' import * as path from 'node:path' import process from 'node:process' export function detectMode( input: InputSource, encodeFlag?: boolean, decodeFlag?: boolean, ): 'encode' | 'decode' { // Explicit flags take precedence if (encodeFlag) return 'encode' if (decodeFlag) return 'decode' // Auto-detect based on file extension if (input.type === 'file') { if (input.path.endsWith('.json')) return 'encode' if (input.path.endsWith('.toon')) return 'decode' } // Default to encode return 'encode' } export async function readInput(source: InputSource): Promise { if (source.type === 'stdin') return readFromStdin() return fsp.readFile(source.path, 'utf-8') } export function formatInputLabel(source: InputSource): string { if (source.type === 'stdin') return 'stdin' const relativePath = path.relative(process.cwd(), source.path) return relativePath || path.basename(source.path) } function readFromStdin(): Promise { const { stdin } = process if (stdin.readableEnded) return Promise.resolve('') return new Promise((resolve, reject) => { let data = '' const onData = (chunk: string) => { data += chunk } function cleanup() { stdin.off('data', onData) stdin.off('error', onError) stdin.off('end', onEnd) } function onError(error: Error) { cleanup() reject(error) } function onEnd() { cleanup() resolve(data) } stdin.setEncoding('utf-8') stdin.on('data', onData) stdin.once('error', onError) stdin.once('end', onEnd) stdin.resume() }) } export async function* readLinesFromSource(source: InputSource): AsyncIterable { const stream = source.type === 'stdin' ? process.stdin : createReadStream(source.path, { encoding: 'utf-8' }) // Explicitly set encoding for stdin if (source.type === 'stdin') { stream.setEncoding('utf-8') } let buffer = '' for await (const chunk of stream) { buffer += chunk let index: number while ((index = buffer.indexOf('\n')) !== -1) { const line = buffer.slice(0, index) buffer = buffer.slice(index + 1) yield line } } // Emit last line if buffer is not empty and doesn't end with newline if (buffer.length > 0) { yield buffer } } ================================================ FILE: packages/cli/test/index.test.ts ================================================ import process from 'node:process' import { consola } from 'consola' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { DEFAULT_DELIMITER, encode } from '../../toon/src' import { version } from '../package.json' with { type: 'json' } import { createCliTestContext, mockStdin, runCli } from './utils' describe('toon CLI', () => { beforeEach(() => { vi.spyOn(process, 'exit').mockImplementation(() => 0 as never) vi.spyOn(console, 'log').mockImplementation(() => undefined) vi.spyOn(process.stdout, 'write').mockImplementation(() => true) }) afterEach(() => { vi.restoreAllMocks() }) describe('version', () => { it('prints the version when using --version', async () => { const consoleLog = vi.mocked(console.log) await runCli({ rawArgs: ['--version'] }) expect(consoleLog).toHaveBeenCalledWith(version) }) }) describe('encode (JSON → TOON)', () => { it('encodes JSON from stdin', async () => { const data = { title: 'TOON test', count: 3, nested: { ok: true }, } const cleanup = mockStdin(JSON.stringify(data)) const writeChunks: string[] = [] vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { writeChunks.push(String(chunk)) return true }) try { await runCli() const fullOutput = writeChunks.join('') expect(fullOutput).toBe(`${encode(data)}\n`) } finally { cleanup() } }) it('encodes a JSON file into a TOON file', async () => { const data = { title: 'TOON test', count: 3, nested: { ok: true }, } const context = await createCliTestContext({ 'input.json': JSON.stringify(data, undefined, 2), }) const consolaSuccess = vi.spyOn(consola, 'success').mockImplementation(() => undefined) try { await context.run(['input.json', '--output', 'output.toon']) const output = await context.read('output.toon') const expected = encode(data, { delimiter: DEFAULT_DELIMITER, indent: 2, }) expect(output).toBe(expected) expect(consolaSuccess).toHaveBeenCalledWith(expect.stringMatching(/Encoded .* → .*/)) } finally { await context.cleanup() } }) it('writes to stdout when output not specified', async () => { const data = { ok: true } const context = await createCliTestContext({ 'input.json': JSON.stringify(data), }) const writeChunks: string[] = [] vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { writeChunks.push(String(chunk)) return true }) try { await context.run(['input.json']) const fullOutput = writeChunks.join('') expect(fullOutput).toBe(`${encode(data)}\n`) } finally { await context.cleanup() } }) it('encodes JSON from stdin to output file', async () => { const data = { key: 'value' } const context = await createCliTestContext({}) const cleanup = mockStdin(JSON.stringify(data)) const consolaSuccess = vi.spyOn(consola, 'success').mockImplementation(() => undefined) try { await context.run(['--output', 'output.toon']) const output = await context.read('output.toon') expect(output).toBe(encode(data)) expect(consolaSuccess).toHaveBeenCalledWith(expect.stringMatching(/Encoded.*stdin[^\n\r\u2028\u2029\u2192]*\u2192.*output\.toon/)) } finally { cleanup() await context.cleanup() } }) }) describe('decode (TOON → JSON)', () => { it('decodes a TOON file into a JSON file', async () => { const data = { items: ['alpha', 'beta'], meta: { done: false }, } const toonInput = encode(data) const context = await createCliTestContext({ 'input.toon': toonInput, }) const consolaSuccess = vi.spyOn(consola, 'success').mockImplementation(() => undefined) try { await context.run(['input.toon', '--output', 'output.json']) const output = await context.read('output.json') expect(JSON.parse(output)).toEqual(data) expect(consolaSuccess).toHaveBeenCalledWith(expect.stringMatching(/Decoded .* → .*/)) } finally { await context.cleanup() } }) it('decodes TOON from stdin', async () => { const data = { items: ['a', 'b'], count: 2 } const toonInput = encode(data) const cleanup = mockStdin(toonInput) const writeChunks: string[] = [] vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { writeChunks.push(String(chunk)) return true }) try { await runCli({ rawArgs: ['--decode'] }) const fullOutput = writeChunks.join('') // Remove trailing newline before parsing const jsonOutput = fullOutput.endsWith('\n') ? fullOutput.slice(0, -1) : fullOutput const result = JSON.parse(jsonOutput) expect(result).toEqual(data) } finally { cleanup() } }) it('decodes TOON from stdin to output file', async () => { const data = { name: 'test', values: [1, 2, 3] } const toonInput = encode(data) const context = await createCliTestContext({}) const cleanup = mockStdin(toonInput) const consolaSuccess = vi.spyOn(consola, 'success').mockImplementation(() => undefined) try { await context.run(['--decode', '--output', 'output.json']) const output = await context.read('output.json') expect(JSON.parse(output)).toEqual(data) expect(consolaSuccess).toHaveBeenCalledWith(expect.stringMatching(/Decoded.*stdin[^\n\r\u2028\u2029\u2192]*\u2192.*output\.json/)) } finally { cleanup() await context.cleanup() } }) }) describe('stdin edge cases', () => { it('handles invalid JSON from stdin', async () => { const cleanup = mockStdin('{ invalid json }') const consolaError = vi.spyOn(consola, 'error').mockImplementation(() => undefined) const exitSpy = vi.mocked(process.exit) try { await runCli({ rawArgs: [] }) expect(exitSpy).toHaveBeenCalledWith(1) expect(consolaError).toHaveBeenCalled() } finally { cleanup() } }) it('handles invalid TOON from stdin', async () => { const cleanup = mockStdin('key: "unterminated string') const consolaError = vi.spyOn(consola, 'error').mockImplementation(() => undefined) const exitSpy = vi.mocked(process.exit) try { await runCli({ rawArgs: ['--decode'] }) expect(exitSpy).toHaveBeenCalledWith(1) expect(consolaError).toHaveBeenCalled() } finally { cleanup() } }) }) describe('stdin with options', () => { it('encodes JSON from stdin with custom delimiter', async () => { const data = { items: [1, 2, 3] } const cleanup = mockStdin(JSON.stringify(data)) const writeChunks: string[] = [] vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { writeChunks.push(String(chunk)) return true }) try { await runCli({ rawArgs: ['--delimiter', '|'] }) const fullOutput = writeChunks.join('') expect(fullOutput).toBe(`${encode(data, { delimiter: '|' })}\n`) } finally { cleanup() } }) it('encodes JSON from stdin with custom indent', async () => { const data = { nested: { deep: { value: 1 }, }, } const cleanup = mockStdin(JSON.stringify(data)) const writeChunks: string[] = [] vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { writeChunks.push(String(chunk)) return true }) try { await runCli({ rawArgs: ['--indent', '4'] }) const fullOutput = writeChunks.join('') expect(fullOutput).toBe(`${encode(data, { indent: 4 })}\n`) } finally { cleanup() } }) it('decodes TOON from stdin with --no-strict', async () => { const data = { test: true } const toonInput = encode(data) const cleanup = mockStdin(toonInput) const writeChunks: string[] = [] vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { writeChunks.push(String(chunk)) return true }) try { await runCli({ rawArgs: ['--decode', '--no-strict'] }) const fullOutput = writeChunks.join('') // Remove trailing newline before parsing const jsonOutput = fullOutput.endsWith('\n') ? fullOutput.slice(0, -1) : fullOutput const result = JSON.parse(jsonOutput) expect(result).toEqual(data) } finally { cleanup() } }) }) describe('encode options', () => { it('encodes with --keyFolding safe', async () => { const data = { data: { metadata: { items: ['a', 'b'], }, }, } const context = await createCliTestContext({ 'input.json': JSON.stringify(data), }) try { await context.run(['input.json', '--keyFolding', 'safe', '--output', 'output.toon']) const output = await context.read('output.toon') const expected = encode(data, { keyFolding: 'safe' }) expect(output).toBe(expected) } finally { await context.cleanup() } }) it('encodes with --flattenDepth', async () => { const data = { level1: { level2: { level3: { value: 'deep', }, }, }, } const context = await createCliTestContext({ 'input.json': JSON.stringify(data), }) try { await context.run(['input.json', '--keyFolding', 'safe', '--flattenDepth', '2', '--output', 'output.toon']) const output = await context.read('output.toon') const expected = encode(data, { keyFolding: 'safe', flattenDepth: 2 }) expect(output).toBe(expected) } finally { await context.cleanup() } }) }) describe('decode options', () => { it('decodes with --expandPaths safe', async () => { const data = { data: { metadata: { items: ['a', 'b'], }, }, } const toonInput = encode(data, { keyFolding: 'safe' }) const context = await createCliTestContext({ 'input.toon': toonInput, }) try { await context.run(['input.toon', '--decode', '--expandPaths', 'safe', '--output', 'output.json']) const output = await context.read('output.json') const result = JSON.parse(output) expect(result).toEqual(data) } finally { await context.cleanup() } }) it('decodes with --indent for JSON formatting', async () => { const data = { a: 1, b: [2, 3], c: { nested: true }, } const toonInput = encode(data, { indent: 4 }) const context = await createCliTestContext({ 'input.toon': toonInput, }) try { await context.run(['input.toon', '--decode', '--indent', '4', '--output', 'output.json']) const output = await context.read('output.json') const result = JSON.parse(output) expect(result).toEqual(data) expect(output).toContain(' ') // Should have 4-space indentation } finally { await context.cleanup() } }) it('decodes root primitive number', async () => { const toonInput = '42' const cleanup = mockStdin(toonInput) const writeChunks: string[] = [] vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { writeChunks.push(String(chunk)) return true }) try { await runCli({ rawArgs: ['--decode'] }) const fullOutput = writeChunks.join('') expect(fullOutput).toBe('42\n') } finally { cleanup() } }) it('decodes root primitive string', async () => { const toonInput = '"Hello World"' const cleanup = mockStdin(toonInput) const writeChunks: string[] = [] vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { writeChunks.push(String(chunk)) return true }) try { await runCli({ rawArgs: ['--decode'] }) const fullOutput = writeChunks.join('') const jsonOutput = fullOutput.endsWith('\n') ? fullOutput.slice(0, -1) : fullOutput expect(JSON.parse(jsonOutput)).toBe('Hello World') } finally { cleanup() } }) it('decodes root primitive boolean', async () => { const toonInput = 'true' const cleanup = mockStdin(toonInput) const writeChunks: string[] = [] vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { writeChunks.push(String(chunk)) return true }) try { await runCli({ rawArgs: ['--decode'] }) const fullOutput = writeChunks.join('') expect(fullOutput).toBe('true\n') } finally { cleanup() } }) }) describe('streaming output', () => { it('streams large JSON to TOON file with identical output', async () => { const data = { items: Array.from({ length: 1000 }, (_, i) => ({ id: i, name: `Item ${i}`, value: Math.random(), })), } const context = await createCliTestContext({ 'large-input.json': JSON.stringify(data, undefined, 2), }) const consolaSuccess = vi.spyOn(consola, 'success').mockImplementation(() => undefined) try { await context.run(['large-input.json', '--output', 'output.toon']) const output = await context.read('output.toon') // Verify streaming produces identical output to `encode()` const expected = encode(data, { delimiter: DEFAULT_DELIMITER, indent: 2, }) expect(output).toBe(expected) expect(consolaSuccess).toHaveBeenCalledWith(expect.stringMatching(/Encoded .* → .*/)) } finally { await context.cleanup() } }) it('streams large TOON to JSON file with streaming decode', async () => { const data = { records: Array.from({ length: 1000 }, (_, i) => ({ id: i, title: `Record ${i}`, score: Math.random() * 100, })), } const toonContent = encode(data, { delimiter: DEFAULT_DELIMITER, indent: 2, }) const context = await createCliTestContext({ 'large-input.toon': toonContent, }) const consolaSuccess = vi.spyOn(consola, 'success').mockImplementation(() => undefined) try { await context.run(['large-input.toon', '--decode', '--output', 'output.json']) const output = await context.read('output.json') const result = JSON.parse(output) expect(result).toEqual(data) expect(consolaSuccess).toHaveBeenCalledWith(expect.stringMatching(/Decoded .* → .*/)) } finally { await context.cleanup() } }) it('streams to stdout using process.stdout.write', async () => { const data = { users: [ { id: 1, name: 'Alice' }, { id: 2, name: 'Bob' }, ], } const context = await createCliTestContext({ 'input.json': JSON.stringify(data), }) const writeChunks: string[] = [] const writeSpy = vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { writeChunks.push(String(chunk)) return true }) try { await context.run(['input.json']) expect(writeSpy).toHaveBeenCalled() // Verify complete output matches `encode()` const fullOutput = writeChunks.join('') const expected = `${encode(data)}\n` expect(fullOutput).toBe(expected) } finally { await context.cleanup() } }) it('handles empty object streaming correctly', async () => { const data = {} const context = await createCliTestContext({ 'empty.json': JSON.stringify(data), }) try { await context.run(['empty.json', '--output', 'output.toon']) const output = await context.read('output.toon') expect(output).toBe(encode(data)) } finally { await context.cleanup() } }) it('handles single-line output streaming correctly', async () => { const data = { key: 'value' } const context = await createCliTestContext({ 'single.json': JSON.stringify(data), }) try { await context.run(['single.json', '--output', 'output.toon']) const output = await context.read('output.toon') expect(output).toBe(encode(data)) } finally { await context.cleanup() } }) it('uses non-streaming path when stats are enabled', async () => { const data = { items: [ { id: 1, value: 'test' }, { id: 2, value: 'data' }, ], } const context = await createCliTestContext({ 'input.json': JSON.stringify(data), }) const consoleLogSpy = vi.spyOn(console, 'log').mockImplementation(() => undefined) const consolaInfo = vi.spyOn(consola, 'info').mockImplementation(() => undefined) const consolaSuccess = vi.spyOn(consola, 'success').mockImplementation(() => undefined) try { await context.run(['input.json', '--stats']) expect(consolaInfo).toHaveBeenCalledWith(expect.stringMatching(/Token estimates:/)) expect(consolaSuccess).toHaveBeenCalledWith(expect.stringMatching(/Saved.*tokens/)) expect(consoleLogSpy).toHaveBeenCalledWith(encode(data)) } finally { await context.cleanup() } }) }) describe('error handling', () => { it('rejects invalid delimiter', async () => { const context = await createCliTestContext({ 'input.json': JSON.stringify({ value: 1 }), }) const consoleError = vi.spyOn(console, 'error').mockImplementation(() => undefined) const exitSpy = vi.mocked(process.exit) try { await context.run(['input.json', '--delimiter', ';']) expect(exitSpy).toHaveBeenCalledWith(1) const errorCall = consoleError.mock.calls.at(0) expect(errorCall).toBeDefined() const [error] = errorCall! expect(error).toBeInstanceOf(Error) expect(error.message).toContain('Invalid delimiter') } finally { await context.cleanup() } }) it('rejects invalid indent value', async () => { const context = await createCliTestContext({ 'input.json': JSON.stringify({ value: 1 }), }) const consoleError = vi.spyOn(console, 'error').mockImplementation(() => undefined) const exitSpy = vi.mocked(process.exit) try { await context.run(['input.json', '--indent', 'abc']) expect(exitSpy).toHaveBeenCalledWith(1) const errorCall = consoleError.mock.calls.at(0) expect(errorCall).toBeDefined() const [error] = errorCall! expect(error).toBeInstanceOf(Error) expect(error.message).toContain('Invalid indent value') } finally { await context.cleanup() } }) it('handles missing input file', async () => { const context = await createCliTestContext({}) const consolaError = vi.spyOn(consola, 'error').mockImplementation(() => undefined) const exitSpy = vi.mocked(process.exit) try { await context.run(['nonexistent.json']) expect(exitSpy).toHaveBeenCalledWith(1) expect(consolaError).toHaveBeenCalled() } finally { await context.cleanup() } }) it('rejects invalid --keyFolding value', async () => { const context = await createCliTestContext({ 'input.json': JSON.stringify({ value: 1 }), }) const consoleError = vi.spyOn(console, 'error').mockImplementation(() => undefined) const exitSpy = vi.mocked(process.exit) try { await context.run(['input.json', '--keyFolding', 'invalid']) expect(exitSpy).toHaveBeenCalledWith(1) const errorCall = consoleError.mock.calls.at(0) expect(errorCall).toBeDefined() const [error] = errorCall! expect(error).toBeInstanceOf(Error) expect(error.message).toContain('Invalid keyFolding value') } finally { await context.cleanup() } }) it('rejects invalid --expandPaths value', async () => { const context = await createCliTestContext({ 'input.toon': 'key: value', }) const consoleError = vi.spyOn(console, 'error').mockImplementation(() => undefined) const exitSpy = vi.mocked(process.exit) try { await context.run(['input.toon', '--decode', '--expandPaths', 'invalid']) expect(exitSpy).toHaveBeenCalledWith(1) const errorCall = consoleError.mock.calls.at(0) expect(errorCall).toBeDefined() const [error] = errorCall! expect(error).toBeInstanceOf(Error) expect(error.message).toContain('Invalid expandPaths value') } finally { await context.cleanup() } }) it('rejects invalid --flattenDepth value', async () => { const context = await createCliTestContext({ 'input.json': JSON.stringify({ value: 1 }), }) const consoleError = vi.spyOn(console, 'error').mockImplementation(() => undefined) const exitSpy = vi.mocked(process.exit) try { await context.run(['input.json', '--flattenDepth', '-1']) expect(exitSpy).toHaveBeenCalledWith(1) const errorCall = consoleError.mock.calls.at(0) expect(errorCall).toBeDefined() const [error] = errorCall! expect(error).toBeInstanceOf(Error) expect(error.message).toContain('Invalid flattenDepth value') } finally { await context.cleanup() } }) }) }) ================================================ FILE: packages/cli/test/json-from-events.test.ts ================================================ import type { JsonStreamEvent } from '../../toon/src/types' import { describe, expect, it } from 'vitest' import { jsonStreamFromEvents } from '../src/json-from-events' describe('jsonStreamFromEvents', () => { describe('primitives', () => { it('converts null event', async () => { const events = [ { type: 'primitive' as const, value: null }, ] expect(await join(jsonStreamFromEvents(asyncEvents(events), 0))).toBe(JSON.stringify(null)) expect(await join(jsonStreamFromEvents(asyncEvents(events), 2))).toBe(JSON.stringify(null, null, 2)) }) it('converts boolean events', async () => { const eventsTrue = [{ type: 'primitive' as const, value: true }] const eventsFalse = [{ type: 'primitive' as const, value: false }] expect(await join(jsonStreamFromEvents(asyncEvents(eventsTrue), 0))).toBe(JSON.stringify(true)) expect(await join(jsonStreamFromEvents(asyncEvents(eventsFalse), 0))).toBe(JSON.stringify(false)) expect(await join(jsonStreamFromEvents(asyncEvents(eventsTrue), 2))).toBe(JSON.stringify(true, null, 2)) }) it('converts number events', async () => { const events0 = [{ type: 'primitive' as const, value: 0 }] const events42 = [{ type: 'primitive' as const, value: 42 }] const eventsNeg = [{ type: 'primitive' as const, value: -17 }] const eventsFloat = [{ type: 'primitive' as const, value: 3.14159 }] expect(await join(jsonStreamFromEvents(asyncEvents(events0), 0))).toBe(JSON.stringify(0)) expect(await join(jsonStreamFromEvents(asyncEvents(events42), 0))).toBe(JSON.stringify(42)) expect(await join(jsonStreamFromEvents(asyncEvents(eventsNeg), 0))).toBe(JSON.stringify(-17)) expect(await join(jsonStreamFromEvents(asyncEvents(eventsFloat), 0))).toBe(JSON.stringify(3.14159)) expect(await join(jsonStreamFromEvents(asyncEvents(events42), 2))).toBe(JSON.stringify(42, null, 2)) }) it('converts string events', async () => { const eventsEmpty = [{ type: 'primitive' as const, value: '' }] const eventsHello = [{ type: 'primitive' as const, value: 'hello' }] const eventsQuotes = [{ type: 'primitive' as const, value: 'with "quotes"' }] expect(await join(jsonStreamFromEvents(asyncEvents(eventsEmpty), 0))).toBe(JSON.stringify('')) expect(await join(jsonStreamFromEvents(asyncEvents(eventsHello), 0))).toBe(JSON.stringify('hello')) expect(await join(jsonStreamFromEvents(asyncEvents(eventsQuotes), 0))).toBe(JSON.stringify('with "quotes"')) }) }) describe('empty containers', () => { it('converts empty array events', async () => { const events = [ { type: 'startArray' as const, length: 0 }, { type: 'endArray' as const }, ] expect(await join(jsonStreamFromEvents(asyncEvents(events), 0))).toBe(JSON.stringify([], null, 0)) expect(await join(jsonStreamFromEvents(asyncEvents(events), 2))).toBe(JSON.stringify([], null, 2)) }) it('converts empty object events', async () => { const events = [ { type: 'startObject' as const }, { type: 'endObject' as const }, ] expect(await join(jsonStreamFromEvents(asyncEvents(events), 0))).toBe(JSON.stringify({}, null, 0)) expect(await join(jsonStreamFromEvents(asyncEvents(events), 2))).toBe(JSON.stringify({}, null, 2)) }) }) describe('arrays', () => { it('converts simple array events with compact formatting', async () => { const events = [ { type: 'startArray' as const, length: 3 }, { type: 'primitive' as const, value: 1 }, { type: 'primitive' as const, value: 2 }, { type: 'primitive' as const, value: 3 }, { type: 'endArray' as const }, ] const value = [1, 2, 3] expect(await join(jsonStreamFromEvents(asyncEvents(events), 0))).toBe(JSON.stringify(value, null, 0)) }) it('converts simple array events with pretty formatting', async () => { const events = [ { type: 'startArray' as const, length: 3 }, { type: 'primitive' as const, value: 1 }, { type: 'primitive' as const, value: 2 }, { type: 'primitive' as const, value: 3 }, { type: 'endArray' as const }, ] const value = [1, 2, 3] expect(await join(jsonStreamFromEvents(asyncEvents(events), 2))).toBe(JSON.stringify(value, null, 2)) }) it('converts mixed-type array events', async () => { const events = [ { type: 'startArray' as const, length: 5 }, { type: 'primitive' as const, value: 1 }, { type: 'primitive' as const, value: 'two' }, { type: 'primitive' as const, value: true }, { type: 'primitive' as const, value: null }, { type: 'startObject' as const }, { type: 'key' as const, key: 'key' }, { type: 'primitive' as const, value: 'value' }, { type: 'endObject' as const }, { type: 'endArray' as const }, ] const value = [1, 'two', true, null, { key: 'value' }] expect(await join(jsonStreamFromEvents(asyncEvents(events), 0))).toBe(JSON.stringify(value, null, 0)) expect(await join(jsonStreamFromEvents(asyncEvents(events), 2))).toBe(JSON.stringify(value, null, 2)) }) it('converts nested array events', async () => { const events = [ { type: 'startArray' as const, length: 3 }, { type: 'startArray' as const, length: 2 }, { type: 'primitive' as const, value: 1 }, { type: 'primitive' as const, value: 2 }, { type: 'endArray' as const }, { type: 'startArray' as const, length: 2 }, { type: 'primitive' as const, value: 3 }, { type: 'primitive' as const, value: 4 }, { type: 'endArray' as const }, { type: 'startArray' as const, length: 2 }, { type: 'primitive' as const, value: 5 }, { type: 'primitive' as const, value: 6 }, { type: 'endArray' as const }, { type: 'endArray' as const }, ] const value = [[1, 2], [3, 4], [5, 6]] expect(await join(jsonStreamFromEvents(asyncEvents(events), 0))).toBe(JSON.stringify(value, null, 0)) expect(await join(jsonStreamFromEvents(asyncEvents(events), 2))).toBe(JSON.stringify(value, null, 2)) }) }) describe('objects', () => { it('converts simple object events with compact formatting', async () => { const events = [ { type: 'startObject' as const }, { type: 'key' as const, key: 'a' }, { type: 'primitive' as const, value: 1 }, { type: 'key' as const, key: 'b' }, { type: 'primitive' as const, value: 2 }, { type: 'key' as const, key: 'c' }, { type: 'primitive' as const, value: 3 }, { type: 'endObject' as const }, ] const value = { a: 1, b: 2, c: 3 } expect(await join(jsonStreamFromEvents(asyncEvents(events), 0))).toBe(JSON.stringify(value, null, 0)) }) it('converts simple object events with pretty formatting', async () => { const events = [ { type: 'startObject' as const }, { type: 'key' as const, key: 'a' }, { type: 'primitive' as const, value: 1 }, { type: 'key' as const, key: 'b' }, { type: 'primitive' as const, value: 2 }, { type: 'key' as const, key: 'c' }, { type: 'primitive' as const, value: 3 }, { type: 'endObject' as const }, ] const value = { a: 1, b: 2, c: 3 } expect(await join(jsonStreamFromEvents(asyncEvents(events), 2))).toBe(JSON.stringify(value, null, 2)) }) it('converts object events with mixed value types', async () => { const events = [ { type: 'startObject' as const }, { type: 'key' as const, key: 'num' }, { type: 'primitive' as const, value: 42 }, { type: 'key' as const, key: 'str' }, { type: 'primitive' as const, value: 'hello' }, { type: 'key' as const, key: 'bool' }, { type: 'primitive' as const, value: true }, { type: 'key' as const, key: 'nil' }, { type: 'primitive' as const, value: null }, { type: 'key' as const, key: 'arr' }, { type: 'startArray' as const, length: 3 }, { type: 'primitive' as const, value: 1 }, { type: 'primitive' as const, value: 2 }, { type: 'primitive' as const, value: 3 }, { type: 'endArray' as const }, { type: 'endObject' as const }, ] const value = { num: 42, str: 'hello', bool: true, nil: null, arr: [1, 2, 3], } expect(await join(jsonStreamFromEvents(asyncEvents(events), 0))).toBe(JSON.stringify(value, null, 0)) expect(await join(jsonStreamFromEvents(asyncEvents(events), 2))).toBe(JSON.stringify(value, null, 2)) }) it('converts nested object events', async () => { const events = [ { type: 'startObject' as const }, { type: 'key' as const, key: 'level1' }, { type: 'startObject' as const }, { type: 'key' as const, key: 'level2' }, { type: 'startObject' as const }, { type: 'key' as const, key: 'level3' }, { type: 'primitive' as const, value: 'deep' }, { type: 'endObject' as const }, { type: 'endObject' as const }, { type: 'endObject' as const }, ] const value = { level1: { level2: { level3: 'deep', }, }, } expect(await join(jsonStreamFromEvents(asyncEvents(events), 0))).toBe(JSON.stringify(value, null, 0)) expect(await join(jsonStreamFromEvents(asyncEvents(events), 2))).toBe(JSON.stringify(value, null, 2)) }) it('handles special characters in keys', async () => { const events = [ { type: 'startObject' as const }, { type: 'key' as const, key: 'normal-key' }, { type: 'primitive' as const, value: 1 }, { type: 'key' as const, key: 'key with spaces' }, { type: 'primitive' as const, value: 2 }, { type: 'key' as const, key: 'key:with:colons' }, { type: 'primitive' as const, value: 3 }, { type: 'key' as const, key: 'key"with"quotes' }, { type: 'primitive' as const, value: 4 }, { type: 'endObject' as const }, ] const value = { 'normal-key': 1, 'key with spaces': 2, 'key:with:colons': 3, 'key"with"quotes': 4, } expect(await join(jsonStreamFromEvents(asyncEvents(events), 0))).toBe(JSON.stringify(value, null, 0)) expect(await join(jsonStreamFromEvents(asyncEvents(events), 2))).toBe(JSON.stringify(value, null, 2)) }) }) describe('complex nested structures', () => { it('converts object containing arrays', async () => { const events = [ { type: 'startObject' as const }, { type: 'key' as const, key: 'name' }, { type: 'primitive' as const, value: 'Alice' }, { type: 'key' as const, key: 'scores' }, { type: 'startArray' as const, length: 3 }, { type: 'primitive' as const, value: 95 }, { type: 'primitive' as const, value: 87 }, { type: 'primitive' as const, value: 92 }, { type: 'endArray' as const }, { type: 'key' as const, key: 'metadata' }, { type: 'startObject' as const }, { type: 'key' as const, key: 'tags' }, { type: 'startArray' as const, length: 2 }, { type: 'primitive' as const, value: 'math' }, { type: 'primitive' as const, value: 'science' }, { type: 'endArray' as const }, { type: 'endObject' as const }, { type: 'endObject' as const }, ] const value = { name: 'Alice', scores: [95, 87, 92], metadata: { tags: ['math', 'science'], }, } expect(await join(jsonStreamFromEvents(asyncEvents(events), 0))).toBe(JSON.stringify(value, null, 0)) expect(await join(jsonStreamFromEvents(asyncEvents(events), 2))).toBe(JSON.stringify(value, null, 2)) }) it('converts array of objects', async () => { const events = [ { type: 'startArray' as const, length: 3 }, { type: 'startObject' as const }, { type: 'key' as const, key: 'id' }, { type: 'primitive' as const, value: 1 }, { type: 'key' as const, key: 'name' }, { type: 'primitive' as const, value: 'Alice' }, { type: 'endObject' as const }, { type: 'startObject' as const }, { type: 'key' as const, key: 'id' }, { type: 'primitive' as const, value: 2 }, { type: 'key' as const, key: 'name' }, { type: 'primitive' as const, value: 'Bob' }, { type: 'endObject' as const }, { type: 'startObject' as const }, { type: 'key' as const, key: 'id' }, { type: 'primitive' as const, value: 3 }, { type: 'key' as const, key: 'name' }, { type: 'primitive' as const, value: 'Charlie' }, { type: 'endObject' as const }, { type: 'endArray' as const }, ] const value = [ { id: 1, name: 'Alice' }, { id: 2, name: 'Bob' }, { id: 3, name: 'Charlie' }, ] expect(await join(jsonStreamFromEvents(asyncEvents(events), 0))).toBe(JSON.stringify(value, null, 0)) expect(await join(jsonStreamFromEvents(asyncEvents(events), 2))).toBe(JSON.stringify(value, null, 2)) }) }) describe('indentation levels', () => { const events = [ { type: 'startObject' as const }, { type: 'key' as const, key: 'a' }, { type: 'startArray' as const, length: 2 }, { type: 'primitive' as const, value: 1 }, { type: 'primitive' as const, value: 2 }, { type: 'endArray' as const }, { type: 'key' as const, key: 'b' }, { type: 'startObject' as const }, { type: 'key' as const, key: 'c' }, { type: 'primitive' as const, value: 3 }, { type: 'endObject' as const }, { type: 'endObject' as const }, ] const value = { a: [1, 2], b: { c: 3 } } it('handles indent=0 (compact)', async () => { expect(await join(jsonStreamFromEvents(asyncEvents(events), 0))).toBe(JSON.stringify(value, null, 0)) }) it('handles indent=2', async () => { expect(await join(jsonStreamFromEvents(asyncEvents(events), 2))).toBe(JSON.stringify(value, null, 2)) }) it('handles indent=4', async () => { expect(await join(jsonStreamFromEvents(asyncEvents(events), 4))).toBe(JSON.stringify(value, null, 4)) }) it('handles indent=8', async () => { expect(await join(jsonStreamFromEvents(asyncEvents(events), 8))).toBe(JSON.stringify(value, null, 8)) }) }) describe('error handling', () => { it('throws on mismatched endObject event', async () => { const events = [ { type: 'startArray' as const, length: 0 }, { type: 'endObject' as const }, // Wrong closing event ] await expect(async () => { await join(jsonStreamFromEvents(asyncEvents(events), 0)) }).rejects.toThrow('Mismatched endObject event') }) it('throws on mismatched endArray event', async () => { const events = [ { type: 'startObject' as const }, { type: 'endArray' as const }, // Wrong closing event ] await expect(async () => { await join(jsonStreamFromEvents(asyncEvents(events), 0)) }).rejects.toThrow('Mismatched endArray event') }) it('throws on key event outside object context', async () => { const events = [ { type: 'key' as const, key: 'invalid' }, { type: 'primitive' as const, value: 1 }, ] await expect(async () => { await join(jsonStreamFromEvents(asyncEvents(events), 0)) }).rejects.toThrow('Key event outside of object context') }) it('throws on primitive in object without preceding key', async () => { const events = [ { type: 'startObject' as const }, { type: 'primitive' as const, value: 'invalid' }, // No key before primitive { type: 'endObject' as const }, ] await expect(async () => { await join(jsonStreamFromEvents(asyncEvents(events), 0)) }).rejects.toThrow('Primitive event in object without preceding key') }) it('throws on incomplete event stream', async () => { const events = [ { type: 'startObject' as const }, { type: 'key' as const, key: 'name' }, { type: 'primitive' as const, value: 'Alice' }, // Missing `endObject` ] await expect(async () => { await join(jsonStreamFromEvents(asyncEvents(events), 0)) }).rejects.toThrow('Incomplete event stream: unclosed objects or arrays') }) }) }) /** * Converts array of events to async iterable. */ async function* asyncEvents(events: JsonStreamEvent[]): AsyncIterable { for (const event of events) { await Promise.resolve() yield event } } /** * Joins chunks from an async iterable into a single string. */ async function join(iter: AsyncIterable): Promise { const chunks: string[] = [] for await (const chunk of iter) { chunks.push(chunk) } return chunks.join('') } ================================================ FILE: packages/cli/test/json-stringify-stream.test.ts ================================================ import { describe, expect, it } from 'vitest' import { jsonStringifyLines } from '../src/json-stringify-stream' describe('jsonStringifyLines', () => { describe('primitives', () => { it('stringifies null', () => { expect(join(jsonStringifyLines(null, 0))).toBe(JSON.stringify(null)) expect(join(jsonStringifyLines(null, 2))).toBe(JSON.stringify(null, null, 2)) }) it('stringifies booleans', () => { expect(join(jsonStringifyLines(true, 0))).toBe(JSON.stringify(true)) expect(join(jsonStringifyLines(false, 0))).toBe(JSON.stringify(false)) expect(join(jsonStringifyLines(true, 2))).toBe(JSON.stringify(true, null, 2)) }) it('stringifies numbers', () => { expect(join(jsonStringifyLines(0, 0))).toBe(JSON.stringify(0)) expect(join(jsonStringifyLines(42, 0))).toBe(JSON.stringify(42)) expect(join(jsonStringifyLines(-17, 0))).toBe(JSON.stringify(-17)) expect(join(jsonStringifyLines(3.14159, 0))).toBe(JSON.stringify(3.14159)) expect(join(jsonStringifyLines(1e10, 2))).toBe(JSON.stringify(1e10, null, 2)) }) it('stringifies strings', () => { expect(join(jsonStringifyLines('', 0))).toBe(JSON.stringify('')) expect(join(jsonStringifyLines('hello', 0))).toBe(JSON.stringify('hello')) expect(join(jsonStringifyLines('with "quotes"', 0))).toBe(JSON.stringify('with "quotes"')) expect(join(jsonStringifyLines('with\nnewlines', 2))).toBe(JSON.stringify('with\nnewlines', null, 2)) expect(join(jsonStringifyLines('with\ttabs', 0))).toBe(JSON.stringify('with\ttabs')) }) it('converts undefined to null', () => { expect(join(jsonStringifyLines(undefined, 0))).toBe('null') expect(join(jsonStringifyLines(undefined, 2))).toBe('null') }) }) describe('empty containers', () => { it('stringifies empty arrays', () => { expect(join(jsonStringifyLines([], 0))).toBe(JSON.stringify([], null, 0)) expect(join(jsonStringifyLines([], 2))).toBe(JSON.stringify([], null, 2)) }) it('stringifies empty objects', () => { expect(join(jsonStringifyLines({}, 0))).toBe(JSON.stringify({}, null, 0)) expect(join(jsonStringifyLines({}, 2))).toBe(JSON.stringify({}, null, 2)) }) }) describe('arrays', () => { it('stringifies arrays with compact formatting (indent=0)', () => { const value = [1, 2, 3] expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) }) it('stringifies arrays with pretty formatting (indent=2)', () => { const value = [1, 2, 3] expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) it('stringifies mixed-type arrays', () => { const value = [1, 'two', true, null, { key: 'value' }] expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) it('stringifies nested arrays', () => { const value = [[1, 2], [3, 4], [5, 6]] expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) it('stringifies deeply nested arrays', () => { const value = [[[1]], [[2]], [[3]]] expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) expect(join(jsonStringifyLines(value, 4))).toBe(JSON.stringify(value, null, 4)) }) }) describe('objects', () => { it('stringifies simple objects with compact formatting', () => { const value = { a: 1, b: 2, c: 3 } expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) }) it('stringifies simple objects with pretty formatting', () => { const value = { a: 1, b: 2, c: 3 } expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) it('stringifies objects with mixed value types', () => { const value = { num: 42, str: 'hello', bool: true, nil: null, arr: [1, 2, 3], } expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) it('stringifies nested objects', () => { const value = { level1: { level2: { level3: 'deep', }, }, } expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) it('preserves key order', () => { const value = { z: 1, a: 2, m: 3 } expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) it('handles special characters in keys', () => { const value = { 'normal-key': 1, 'key with spaces': 2, 'key:with:colons': 3, 'key"with"quotes': 4, } expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) }) describe('complex nested structures', () => { it('stringifies objects containing arrays', () => { const value = { name: 'Alice', scores: [95, 87, 92], metadata: { tags: ['math', 'science'], }, } expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) it('stringifies arrays of objects', () => { const value = [ { id: 1, name: 'Alice' }, { id: 2, name: 'Bob' }, { id: 3, name: 'Charlie' }, ] expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) it('stringifies deeply nested mixed structures', () => { const value = { users: [ { name: 'Alice', roles: ['admin', 'user'], settings: { theme: 'dark', notifications: true, }, }, { name: 'Bob', roles: ['user'], settings: { theme: 'light', notifications: false, }, }, ], count: 2, } expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) }) describe('indentation levels', () => { const value = { a: [1, 2], b: { c: 3 } } it('handles indent=0 (compact)', () => { expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) }) it('handles indent=2', () => { expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) it('handles indent=4', () => { expect(join(jsonStringifyLines(value, 4))).toBe(JSON.stringify(value, null, 4)) }) it('handles indent=8', () => { expect(join(jsonStringifyLines(value, 8))).toBe(JSON.stringify(value, null, 8)) }) }) describe('edge cases', () => { it('handles arrays with undefined values (converted to null)', () => { const value = [1, undefined, 3] const expected = JSON.stringify(value, null, 2) expect(join(jsonStringifyLines(value, 2))).toBe(expected) }) it('handles single-element arrays', () => { const value = [42] expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) it('handles single-property objects', () => { const value = { only: 'one' } expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) it('handles objects with many properties', () => { const value: Record = {} for (let i = 0; i < 100; i++) { value[`key${i}`] = i } expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) it('handles large arrays', () => { const value = Array.from({ length: 1000 }, (_, i) => i) expect(join(jsonStringifyLines(value, 0))).toBe(JSON.stringify(value, null, 0)) expect(join(jsonStringifyLines(value, 2))).toBe(JSON.stringify(value, null, 2)) }) }) }) /** * Joins chunks from an iterable into a single string. */ function join(iter: Iterable): string { return Array.from(iter).join('') } ================================================ FILE: packages/cli/test/utils.ts ================================================ import * as fsp from 'node:fs/promises' import * as os from 'node:os' import * as path from 'node:path' import process from 'node:process' import { Readable } from 'node:stream' import { runMain } from 'citty' import { mainCommand } from '../src/index' interface FileRecord { [relativePath: string]: string } export function runCli(options?: Parameters[1]): Promise { return runMain(mainCommand, options) } export interface CliTestContext { readonly dir: string run: (args?: string[]) => Promise read: (relativePath: string) => Promise write: (relativePath: string, contents: string) => Promise resolve: (relativePath: string) => string cleanup: () => Promise } const TEMP_PREFIX = path.join(os.tmpdir(), 'toon-cli-test-') export async function createCliTestContext(initialFiles: FileRecord = {}): Promise { const dir = await fsp.mkdtemp(TEMP_PREFIX) await writeFiles(dir, initialFiles) async function run(args: string[] = []): Promise { const previousCwd = process.cwd() process.chdir(dir) try { await runCli({ rawArgs: args }) } finally { process.chdir(previousCwd) } } function resolvePath(relativePath: string): string { return path.join(dir, relativePath) } async function read(relativePath: string): Promise { return fsp.readFile(resolvePath(relativePath), 'utf8') } async function write(relativePath: string, contents: string): Promise { const targetPath = resolvePath(relativePath) await fsp.mkdir(path.dirname(targetPath), { recursive: true }) await fsp.writeFile(targetPath, contents, 'utf8') } async function cleanup(): Promise { await fsp.rm(dir, { recursive: true, force: true }) } return { dir, run, read, write, resolve: resolvePath, cleanup, } } async function writeFiles(baseDir: string, files: FileRecord): Promise { await Promise.all( Object.entries(files).map(async ([relativePath, contents]) => { const filePath = path.join(baseDir, relativePath) await fsp.mkdir(path.dirname(filePath), { recursive: true }) await fsp.writeFile(filePath, contents, 'utf8') }), ) } export function mockStdin(input: string): () => void { const mockStream = Readable.from([input]) const originalStdin = process.stdin Object.defineProperty(process, 'stdin', { value: mockStream, writable: true, }) return () => { Object.defineProperty(process, 'stdin', { value: originalStdin, writable: true, }) } } ================================================ FILE: packages/cli/tsdown.config.ts ================================================ import type { UserConfig } from 'tsdown/config' import { defineConfig } from 'tsdown/config' const config: UserConfig = defineConfig({ entry: { index: 'src/cli-entry.ts', }, dts: true, }) export default config ================================================ FILE: packages/toon/README.md ================================================  # Token-Oriented Object Notation (TOON) [](https://github.com/toon-format/toon/actions) [](https://www.npmjs.com/package/@toon-format/toon) [](https://github.com/toon-format/spec) [](https://www.npmjs.com/package/@toon-format/toon) [](./LICENSE) **Token-Oriented Object Notation** is a compact, human-readable encoding of the JSON data model that minimizes tokens and makes structure easy for models to follow. It's intended for *LLM input* as a drop-in, lossless representation of your existing JSON. TOON combines YAML's indentation-based structure for nested objects with a CSV-style tabular layout for uniform arrays. TOON's sweet spot is uniform arrays of objects (multiple fields per row, same structure across items), achieving CSV-like compactness while adding explicit structure that helps LLMs parse and validate data reliably. For deeply nested or non-uniform data, JSON may be more efficient. The similarity to CSV is intentional: CSV is simple and ubiquitous, and TOON aims to keep that familiarity while remaining a lossless, drop-in representation of JSON for Large Language Models. Think of it as a translation layer: use JSON programmatically, and encode it as TOON for LLM input. > [!TIP] > The TOON format is stable, but also an idea in progress. Nothing's set in stone – help shape where it goes by contributing to the [spec](https://github.com/toon-format/spec) or sharing feedback. ## Table of Contents - [Why TOON?](#why-toon) - [Key Features](#key-features) - [When Not to Use TOON](#when-not-to-use-toon) - [Benchmarks](#benchmarks) - [Installation & Quick Start](#installation--quick-start) - [Playgrounds](#playgrounds) - [Editor Support](#editor-support) - [CLI](#cli) - [Format Overview](#format-overview) - [Using TOON with LLMs](#using-toon-with-llms) - [Documentation](#documentation) - [Other Implementations](#other-implementations) - [📋 Full Specification](https://github.com/toon-format/spec/blob/main/SPEC.md) ## Why TOON? AI is becoming cheaper and more accessible, but larger context windows allow for larger data inputs as well. **LLM tokens still cost money** – and standard JSON is verbose and token-expensive: ```json { "context": { "task": "Our favorite hikes together", "location": "Boulder", "season": "spring_2025" }, "friends": ["ana", "luis", "sam"], "hikes": [ { "id": 1, "name": "Blue Lake Trail", "distanceKm": 7.5, "elevationGain": 320, "companion": "ana", "wasSunny": true }, { "id": 2, "name": "Ridge Overlook", "distanceKm": 9.2, "elevationGain": 540, "companion": "luis", "wasSunny": false }, { "id": 3, "name": "Wildflower Loop", "distanceKm": 5.1, "elevationGain": 180, "companion": "sam", "wasSunny": true } ] } ``` YAML already conveys the same information with fewer tokens. ```yaml context: task: Our favorite hikes together location: Boulder season: spring_2025 friends: - ana - luis - sam hikes: - id: 1 name: Blue Lake Trail distanceKm: 7.5 elevationGain: 320 companion: ana wasSunny: true - id: 2 name: Ridge Overlook distanceKm: 9.2 elevationGain: 540 companion: luis wasSunny: false - id: 3 name: Wildflower Loop distanceKm: 5.1 elevationGain: 180 companion: sam wasSunny: true ``` TOON conveys the same information with **even fewer tokens** – combining YAML-like indentation with CSV-style tabular arrays: ```yaml context: task: Our favorite hikes together location: Boulder season: spring_2025 friends[3]: ana,luis,sam hikes[3]{id,name,distanceKm,elevationGain,companion,wasSunny}: 1,Blue Lake Trail,7.5,320,ana,true 2,Ridge Overlook,9.2,540,luis,false 3,Wildflower Loop,5.1,180,sam,true ``` ## Key Features - 📊 **Token-Efficient & Accurate:** TOON reaches 74% accuracy (vs JSON's 70%) while using ~40% fewer tokens in mixed-structure benchmarks across 4 models. - 🔁 **JSON Data Model:** Encodes the same objects, arrays, and primitives as JSON with deterministic, lossless round-trips. - 🛤️ **LLM-Friendly Guardrails:** Explicit [N] lengths and {fields} headers give models a clear schema to follow, improving parsing reliability. - 📐 **Minimal Syntax:** Uses indentation instead of braces and minimizes quoting, giving YAML-like readability with CSV-style compactness. - 🧺 **Tabular Arrays:** Uniform arrays of objects collapse into tables that declare fields once and stream row values line by line. - 🌐 **Multi-Language Ecosystem:** Spec-driven implementations in TypeScript, Python, Go, Rust, .NET, and other languages. ## Media Type & File Extension By convention, TOON files use the `.toon` extension and the provisional media type `text/toon` for HTTP and content-type–aware contexts. TOON documents are always UTF-8 encoded; the `charset=utf-8` parameter may be specified but defaults to UTF-8 when omitted. See [SPEC.md §18.2](https://github.com/toon-format/spec/blob/main/SPEC.md#182-provisional-media-type) for normative details. ## When Not to Use TOON TOON excels with uniform arrays of objects, but there are cases where other formats are better: - **Deeply nested or non-uniform structures** (tabular eligibility ≈ 0%): JSON-compact often uses fewer tokens. Example: complex configuration objects with many nested levels. - **Semi-uniform arrays** (~40–60% tabular eligibility): Token savings diminish. Prefer JSON if your pipelines already rely on it. - **Pure tabular data**: CSV is smaller than TOON for flat tables. TOON adds minimal overhead (~5-10%) to provide structure (array length declarations, field headers, delimiter scoping) that improves LLM reliability. - **Latency-critical applications**: If end-to-end response time is your top priority, benchmark on your exact setup. Some deployments (especially local/quantized models like Ollama) may process compact JSON faster despite TOON's lower token count. Measure TTFT, tokens/sec, and total time for both formats and use whichever is faster. See [benchmarks](#benchmarks) for concrete comparisons across different data structures. ## Benchmarks Benchmarks are organized into two tracks to ensure fair comparisons: - **Mixed-Structure Track**: Datasets with nested or semi-uniform structures (TOON vs JSON, YAML, XML). CSV excluded as it cannot properly represent these structures. - **Flat-Only Track**: Datasets with flat tabular structures where CSV is applicable (CSV vs TOON vs JSON, YAML, XML). ### Retrieval Accuracy Benchmarks test LLM comprehension across different input formats using 209 data retrieval questions on 4 models. Show Dataset Catalog #### Dataset Catalog | Dataset | Rows | Structure | CSV Support | Eligibility | | ------- | ---- | --------- | ----------- | ----------- | | Uniform employee records | 100 | uniform | ✓ | 100% | | E-commerce orders with nested structures | 50 | nested | ✗ | 33% | | Time-series analytics data | 60 | uniform | ✓ | 100% | | Top 100 GitHub repositories | 100 | uniform | ✓ | 100% | | Semi-uniform event logs | 75 | semi-uniform | ✗ | 50% | | Deeply nested configuration | 11 | deep | ✗ | 0% | | Valid complete dataset (control) | 20 | uniform | ✓ | 100% | | Array truncated: 3 rows removed from end | 17 | uniform | ✓ | 100% | | Extra rows added beyond declared length | 23 | uniform | ✓ | 100% | | Inconsistent field count (missing salary in row 10) | 20 | uniform | ✓ | 100% | | Missing required fields (no email in multiple rows) | 20 | uniform | ✓ | 100% | **Structure classes:** - **uniform**: All objects have identical fields with primitive values - **semi-uniform**: Mix of uniform and non-uniform structures - **nested**: Objects with nested structures (nested objects or arrays) - **deep**: Highly nested with minimal tabular eligibility **CSV Support:** ✓ (supported), ✗ (not supported – would require lossy flattening) **Eligibility:** Percentage of arrays that qualify for TOON's tabular format (uniform objects with primitive values) #### Efficiency Ranking (Accuracy per 1K Tokens) Each format ranked by efficiency (accuracy percentage per 1,000 tokens): ``` TOON ████████████████████ 27.7 acc%/1K tok │ 76.4% acc │ 2,759 tokens JSON compact █████████████████░░░ 23.7 acc%/1K tok │ 73.7% acc │ 3,104 tokens YAML ██████████████░░░░░░ 19.9 acc%/1K tok │ 74.5% acc │ 3,749 tokens JSON ████████████░░░░░░░░ 16.4 acc%/1K tok │ 75.0% acc │ 4,587 tokens XML ██████████░░░░░░░░░░ 13.8 acc%/1K tok │ 72.1% acc │ 5,221 tokens ``` *Efficiency score = (Accuracy % ÷ Tokens) × 1,000. Higher is better.* > [!TIP] > TOON achieves **76.4%** accuracy (vs JSON's 75.0%) while using **39.9% fewer tokens**. **Note on CSV:** Excluded from ranking as it only supports 109 of 209 questions (flat tabular data only). While CSV is highly token-efficient for simple tabular data, it cannot represent nested structures that other formats handle. #### Per-Model Accuracy Accuracy across 4 LLMs on 209 data retrieval questions: ``` claude-haiku-4-5-20251001 → TOON ████████████░░░░░░░░ 59.8% (125/209) JSON ███████████░░░░░░░░░ 57.4% (120/209) YAML ███████████░░░░░░░░░ 56.0% (117/209) XML ███████████░░░░░░░░░ 55.5% (116/209) JSON compact ███████████░░░░░░░░░ 55.0% (115/209) CSV ██████████░░░░░░░░░░ 50.5% (55/109) gemini-3-flash-preview XML ████████████████████ 98.1% (205/209) JSON ███████████████████░ 97.1% (203/209) YAML ███████████████████░ 97.1% (203/209) → TOON ███████████████████░ 96.7% (202/209) JSON compact ███████████████████░ 96.7% (202/209) CSV ███████████████████░ 96.3% (105/109) gpt-5-nano → TOON ██████████████████░░ 90.9% (190/209) JSON compact ██████████████████░░ 90.9% (190/209) JSON ██████████████████░░ 89.0% (186/209) CSV ██████████████████░░ 89.0% (97/109) YAML █████████████████░░░ 87.1% (182/209) XML ████████████████░░░░ 80.9% (169/209) grok-4-1-fast-non-reasoning → TOON ████████████░░░░░░░░ 58.4% (122/209) YAML ████████████░░░░░░░░ 57.9% (121/209) JSON ███████████░░░░░░░░░ 56.5% (118/209) XML ███████████░░░░░░░░░ 54.1% (113/209) JSON compact ██████████░░░░░░░░░░ 52.2% (109/209) CSV ██████████░░░░░░░░░░ 51.4% (56/109) ``` > [!TIP] > TOON achieves **76.4% accuracy** (vs JSON's 75.0%) while using **39.9% fewer tokens** on these datasets. Performance by dataset, model, and question type #### Performance by Question Type | Question Type | TOON | JSON | YAML | JSON compact | XML | CSV | | ------------- | ---- | ---- | ---- | ---- | ---- | ---- | | Field Retrieval | 99.6% | 99.3% | 98.5% | 98.5% | 98.9% | 100.0% | | Aggregation | 61.9% | 61.9% | 59.9% | 58.3% | 54.4% | 50.9% | | Filtering | 56.8% | 53.1% | 56.3% | 55.2% | 51.6% | 50.9% | | Structure Awareness | 89.0% | 87.0% | 84.0% | 84.0% | 81.0% | 85.9% | | Structural Validation | 70.0% | 60.0% | 60.0% | 55.0% | 85.0% | 80.0% | #### Performance by Dataset ##### Uniform employee records | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 73.2% | 2,334 | 120/164 | | `toon` | 73.2% | 2,498 | 120/164 | | `json-compact` | 73.8% | 3,924 | 121/164 | | `yaml` | 73.8% | 4,959 | 121/164 | | `json-pretty` | 73.8% | 6,331 | 121/164 | | `xml` | 74.4% | 7,296 | 122/164 | ##### E-commerce orders with nested structures | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `toon` | 82.3% | 7,458 | 135/164 | | `json-compact` | 78.7% | 7,110 | 129/164 | | `yaml` | 79.9% | 8,755 | 131/164 | | `json-pretty` | 79.3% | 11,234 | 130/164 | | `xml` | 77.4% | 12,649 | 127/164 | ##### Time-series analytics data | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 75.0% | 1,411 | 90/120 | | `toon` | 78.3% | 1,553 | 94/120 | | `json-compact` | 74.2% | 2,354 | 89/120 | | `yaml` | 75.8% | 2,954 | 91/120 | | `json-pretty` | 75.0% | 3,681 | 90/120 | | `xml` | 72.5% | 4,389 | 87/120 | ##### Top 100 GitHub repositories | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 65.9% | 8,527 | 87/132 | | `toon` | 66.7% | 8,779 | 88/132 | | `yaml` | 65.2% | 13,141 | 86/132 | | `json-compact` | 59.8% | 11,464 | 79/132 | | `json-pretty` | 63.6% | 15,157 | 84/132 | | `xml` | 56.1% | 17,105 | 74/132 | ##### Semi-uniform event logs | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `json-compact` | 68.3% | 4,839 | 82/120 | | `toon` | 65.0% | 5,819 | 78/120 | | `json-pretty` | 69.2% | 6,817 | 83/120 | | `yaml` | 61.7% | 5,847 | 74/120 | | `xml` | 58.3% | 7,729 | 70/120 | ##### Deeply nested configuration | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `json-compact` | 90.5% | 568 | 105/116 | | `toon` | 94.8% | 655 | 110/116 | | `yaml` | 93.1% | 675 | 108/116 | | `json-pretty` | 92.2% | 924 | 107/116 | | `xml` | 91.4% | 1,013 | 106/116 | ##### Valid complete dataset (control) | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `toon` | 100.0% | 535 | 4/4 | | `json-compact` | 100.0% | 787 | 4/4 | | `yaml` | 100.0% | 992 | 4/4 | | `json-pretty` | 100.0% | 1,274 | 4/4 | | `xml` | 25.0% | 1,462 | 1/4 | | `csv` | 0.0% | 483 | 0/4 | ##### Array truncated: 3 rows removed from end | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 413 | 4/4 | | `xml` | 100.0% | 1,243 | 4/4 | | `toon` | 0.0% | 462 | 0/4 | | `json-pretty` | 0.0% | 1,085 | 0/4 | | `yaml` | 0.0% | 843 | 0/4 | | `json-compact` | 0.0% | 670 | 0/4 | ##### Extra rows added beyond declared length | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 550 | 4/4 | | `toon` | 75.0% | 605 | 3/4 | | `json-compact` | 75.0% | 901 | 3/4 | | `xml` | 100.0% | 1,678 | 4/4 | | `yaml` | 75.0% | 1,138 | 3/4 | | `json-pretty` | 50.0% | 1,460 | 2/4 | ##### Inconsistent field count (missing salary in row 10) | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 480 | 4/4 | | `json-compact` | 100.0% | 782 | 4/4 | | `yaml` | 100.0% | 985 | 4/4 | | `toon` | 100.0% | 1,008 | 4/4 | | `json-pretty` | 100.0% | 1,266 | 4/4 | | `xml` | 100.0% | 1,453 | 4/4 | ##### Missing required fields (no email in multiple rows) | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 340 | 4/4 | | `xml` | 100.0% | 1,409 | 4/4 | | `toon` | 75.0% | 974 | 3/4 | | `json-pretty` | 50.0% | 1,225 | 2/4 | | `yaml` | 25.0% | 951 | 1/4 | | `json-compact` | 0.0% | 750 | 0/4 | #### Performance by Model ##### claude-haiku-4-5-20251001 | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `toon` | 59.8% | 125/209 | | `json-pretty` | 57.4% | 120/209 | | `yaml` | 56.0% | 117/209 | | `xml` | 55.5% | 116/209 | | `json-compact` | 55.0% | 115/209 | | `csv` | 50.5% | 55/109 | ##### gemini-3-flash-preview | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `xml` | 98.1% | 205/209 | | `json-pretty` | 97.1% | 203/209 | | `yaml` | 97.1% | 203/209 | | `toon` | 96.7% | 202/209 | | `json-compact` | 96.7% | 202/209 | | `csv` | 96.3% | 105/109 | ##### gpt-5-nano | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `toon` | 90.9% | 190/209 | | `json-compact` | 90.9% | 190/209 | | `json-pretty` | 89.0% | 186/209 | | `csv` | 89.0% | 97/109 | | `yaml` | 87.1% | 182/209 | | `xml` | 80.9% | 169/209 | ##### grok-4-1-fast-non-reasoning | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `toon` | 58.4% | 122/209 | | `yaml` | 57.9% | 121/209 | | `json-pretty` | 56.5% | 118/209 | | `xml` | 54.1% | 113/209 | | `json-compact` | 52.2% | 109/209 | | `csv` | 51.4% | 56/109 | #### What's Being Measured This benchmark tests **LLM comprehension and data retrieval accuracy** across different input formats. Each LLM receives formatted data and must answer questions about it. This does **not** test the model's ability to generate TOON output – only to read and understand it. #### Datasets Tested Eleven datasets designed to test different structural patterns and validation capabilities: **Primary datasets:** 1. **Tabular** (100 employee records): Uniform objects with identical fields – optimal for TOON's tabular format. 2. **Nested** (50 e-commerce orders): Complex structures with nested customer objects and item arrays. 3. **Analytics** (60 days of metrics): Time-series data with dates and numeric values. 4. **GitHub** (100 repositories): Real-world data from top GitHub repos by stars. 5. **Event Logs** (75 logs): Semi-uniform data with ~50% flat logs and ~50% with nested error objects. 6. **Nested Config** (1 configuration): Deeply nested configuration with minimal tabular eligibility. **Structural validation datasets:** 7. **Control**: Valid complete dataset (baseline for validation) 8. **Truncated**: Array with 3 rows removed from end (tests `[N]` length detection) 9. **Extra rows**: Array with 3 additional rows beyond declared length 10. **Width mismatch**: Inconsistent field count (missing salary in row 10) 11. **Missing fields**: Systematic field omissions (no email in multiple rows) #### Question Types 209 questions are generated dynamically across five categories: - **Field retrieval (33%)**: Direct value lookups or values that can be read straight off a record (including booleans and simple counts such as array lengths) - Example: "What is Alice's salary?" → `75000` - Example: "How many items are in order ORD-0042?" → `3` - Example: "What is the customer name for order ORD-0042?" → `John Doe` - **Aggregation (30%)**: Dataset-level totals and averages plus single-condition filters (counts, sums, min/max comparisons) - Example: "How many employees work in Engineering?" → `17` - Example: "What is the total revenue across all orders?" → `45123.50` - Example: "How many employees have salary > 80000?" → `23` - **Filtering (23%)**: Multi-condition queries requiring compound logic (AND constraints across fields) - Example: "How many employees in Sales have salary > 80000?" → `5` - Example: "How many active employees have more than 10 years of experience?" → `8` - **Structure awareness (12%)**: Tests format-native structural affordances (TOON's `[N]` count and `{fields}`, CSV's header row) - Example: "How many employees are in the dataset?" → `100` - Example: "List the field names for employees" → `id, name, email, department, salary, yearsExperience, active` - Example: "What is the department of the last employee?" → `Sales` - **Structural validation (2%)**: Tests ability to detect incomplete, truncated, or corrupted data using structural metadata - Example: "Is this data complete and valid?" → `YES` (control dataset) or `NO` (corrupted datasets) - Tests TOON's `[N]` length validation and `{fields}` consistency checking - Demonstrates CSV's lack of structural validation capabilities #### Evaluation Process 1. **Format conversion**: Each dataset is converted to all 6 formats (TOON, JSON, YAML, JSON compact, XML, CSV). 2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer. 3. **Validate deterministically**: Answers are validated using type-aware comparison (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`) without requiring an LLM judge. #### Models & Configuration - **Models tested**: `claude-haiku-4-5-20251001`, `gemini-3-flash-preview`, `gpt-5-nano`, `grok-4-1-fast-non-reasoning` - **Token counting**: Using `gpt-tokenizer` with `o200k_base` encoding (GPT-5 tokenizer) - **Temperature**: Not set (models use their defaults) - **Total evaluations**: 209 questions × 6 formats × 4 models = 5,016 LLM calls ### Token Efficiency Token counts are measured using the GPT-5 `o200k_base` tokenizer via [`gpt-tokenizer`](https://github.com/niieani/gpt-tokenizer). Savings are calculated against formatted JSON (2-space indentation) as the primary baseline, with additional comparisons to compact JSON (minified), YAML, and XML. Actual savings vary by model and tokenizer. The benchmarks test datasets across different structural patterns (uniform, semi-uniform, nested, deeply nested) to show where TOON excels and where other formats may be better. #### Mixed-Structure Track Datasets with nested or semi-uniform structures. CSV excluded as it cannot properly represent these structures. ``` 🛒 E-commerce orders with nested structures ┊ Tabular: 33% │ TOON █████████████░░░░░░░ 73,126 tokens ├─ vs JSON (−33.3%) 109,599 tokens ├─ vs JSON compact (+5.3%) 69,459 tokens ├─ vs YAML (−14.4%) 85,415 tokens └─ vs XML (−40.7%) 123,344 tokens 🧾 Semi-uniform event logs ┊ Tabular: 50% │ TOON █████████████████░░░ 154,084 tokens ├─ vs JSON (−15.0%) 181,201 tokens ├─ vs JSON compact (+19.9%) 128,529 tokens ├─ vs YAML (−0.8%) 155,397 tokens └─ vs XML (−25.2%) 205,859 tokens 🧩 Deeply nested configuration ┊ Tabular: 0% │ TOON ██████████████░░░░░░ 620 tokens ├─ vs JSON (−31.9%) 911 tokens ├─ vs JSON compact (+11.1%) 558 tokens ├─ vs YAML (−6.3%) 662 tokens └─ vs XML (−38.2%) 1,003 tokens ──────────────────────────────────── Total ──────────────────────────────────── TOON ████████████████░░░░ 227,830 tokens ├─ vs JSON (−21.9%) 291,711 tokens ├─ vs JSON compact (+14.7%) 198,546 tokens ├─ vs YAML (−5.7%) 241,474 tokens └─ vs XML (−31.0%) 330,206 tokens ``` #### Flat-Only Track Datasets with flat tabular structures where CSV is applicable. ``` 👥 Uniform employee records ┊ Tabular: 100% │ CSV ███████████████████░ 47,102 tokens TOON ████████████████████ 49,919 tokens (+6.0% vs CSV) ├─ vs JSON (−60.7%) 127,063 tokens ├─ vs JSON compact (−36.9%) 79,059 tokens ├─ vs YAML (−50.1%) 100,011 tokens └─ vs XML (−65.9%) 146,579 tokens 📈 Time-series analytics data ┊ Tabular: 100% │ CSV ██████████████████░░ 8,383 tokens TOON ████████████████████ 9,115 tokens (+8.7% vs CSV) ├─ vs JSON (−59.0%) 22,245 tokens ├─ vs JSON compact (−35.9%) 14,211 tokens ├─ vs YAML (−49.0%) 17,858 tokens └─ vs XML (−65.8%) 26,616 tokens ⭐ Top 100 GitHub repositories ┊ Tabular: 100% │ CSV ███████████████████░ 8,512 tokens TOON ████████████████████ 8,744 tokens (+2.7% vs CSV) ├─ vs JSON (−42.3%) 15,144 tokens ├─ vs JSON compact (−23.7%) 11,454 tokens ├─ vs YAML (−33.4%) 13,128 tokens └─ vs XML (−48.9%) 17,095 tokens ──────────────────────────────────── Total ──────────────────────────────────── CSV ███████████████████░ 63,997 tokens TOON ████████████████████ 67,778 tokens (+5.9% vs CSV) ├─ vs JSON (−58.8%) 164,452 tokens ├─ vs JSON compact (−35.3%) 104,724 tokens ├─ vs YAML (−48.3%) 130,997 tokens └─ vs XML (−64.4%) 190,290 tokens ``` Show detailed examples #### 📈 Time-series analytics data **Savings:** 13,130 tokens (59.0% reduction vs JSON) **JSON** (22,245 tokens): ```json { "metrics": [ { "date": "2025-01-01", "views": 6138, "clicks": 174, "conversions": 12, "revenue": 2712.49, "bounceRate": 0.35 }, { "date": "2025-01-02", "views": 4616, "clicks": 274, "conversions": 34, "revenue": 9156.29, "bounceRate": 0.56 }, { "date": "2025-01-03", "views": 4460, "clicks": 143, "conversions": 8, "revenue": 1317.98, "bounceRate": 0.59 }, { "date": "2025-01-04", "views": 4740, "clicks": 125, "conversions": 13, "revenue": 2934.77, "bounceRate": 0.37 }, { "date": "2025-01-05", "views": 6428, "clicks": 369, "conversions": 19, "revenue": 1317.24, "bounceRate": 0.3 } ] } ``` **TOON** (9,115 tokens): ``` metrics[5]{date,views,clicks,conversions,revenue,bounceRate}: 2025-01-01,6138,174,12,2712.49,0.35 2025-01-02,4616,274,34,9156.29,0.56 2025-01-03,4460,143,8,1317.98,0.59 2025-01-04,4740,125,13,2934.77,0.37 2025-01-05,6428,369,19,1317.24,0.3 ``` --- #### ⭐ Top 100 GitHub repositories **Savings:** 6,400 tokens (42.3% reduction vs JSON) **JSON** (15,144 tokens): ```json { "repositories": [ { "id": 28457823, "name": "freeCodeCamp", "repo": "freeCodeCamp/freeCodeCamp", "description": "freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…", "createdAt": "2014-12-24T17:49:19Z", "updatedAt": "2025-10-28T11:58:08Z", "pushedAt": "2025-10-28T10:17:16Z", "stars": 430886, "watchers": 8583, "forks": 42146, "defaultBranch": "main" }, { "id": 132750724, "name": "build-your-own-x", "repo": "codecrafters-io/build-your-own-x", "description": "Master programming by recreating your favorite technologies from scratch.", "createdAt": "2018-05-09T12:03:18Z", "updatedAt": "2025-10-28T12:37:11Z", "pushedAt": "2025-10-10T18:45:01Z", "stars": 430877, "watchers": 6332, "forks": 40453, "defaultBranch": "master" }, { "id": 21737465, "name": "awesome", "repo": "sindresorhus/awesome", "description": "😎 Awesome lists about all kinds of interesting topics", "createdAt": "2014-07-11T13:42:37Z", "updatedAt": "2025-10-28T12:40:21Z", "pushedAt": "2025-10-27T17:57:31Z", "stars": 410052, "watchers": 8017, "forks": 32029, "defaultBranch": "main" } ] } ``` **TOON** (8,744 tokens): ``` repositories[3]{id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch}: 28457823,freeCodeCamp,freeCodeCamp/freeCodeCamp,"freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…","2014-12-24T17:49:19Z","2025-10-28T11:58:08Z","2025-10-28T10:17:16Z",430886,8583,42146,main 132750724,build-your-own-x,codecrafters-io/build-your-own-x,Master programming by recreating your favorite technologies from scratch.,"2018-05-09T12:03:18Z","2025-10-28T12:37:11Z","2025-10-10T18:45:01Z",430877,6332,40453,master 21737465,awesome,sindresorhus/awesome,😎 Awesome lists about all kinds of interesting topics,"2014-07-11T13:42:37Z","2025-10-28T12:40:21Z","2025-10-27T17:57:31Z",410052,8017,32029,main ``` ## Installation & Quick Start ### CLI (No Installation Required) Try TOON instantly with npx: ```bash # Convert JSON to TOON npx @toon-format/cli input.json -o output.toon # Pipe from stdin echo '{"name": "Ada", "role": "dev"}' | npx @toon-format/cli ``` See the [CLI section](#cli) for all options and examples. ### TypeScript Library ```bash # npm npm install @toon-format/toon # pnpm pnpm add @toon-format/toon # yarn yarn add @toon-format/toon ``` **Example usage:** ```ts import { encode } from '@toon-format/toon' const data = { users: [ { id: 1, name: 'Alice', role: 'admin' }, { id: 2, name: 'Bob', role: 'user' } ] } console.log(encode(data)) // users[2]{id,name,role}: // 1,Alice,admin // 2,Bob,user ``` **Streaming large datasets:** ```ts import { encodeLines } from '@toon-format/toon' const largeData = await fetchThousandsOfRecords() // Memory-efficient streaming for large data for (const line of encodeLines(largeData)) { process.stdout.write(`${line}\n`) } ``` > [!TIP] > For streaming decode APIs, see [`decodeFromLines()`](https://toonformat.dev/reference/api#decodefromlines-lines-options) and [`decodeStream()`](https://toonformat.dev/reference/api#decodestream-source-options). **Transforming values with replacer:** ```ts import { encode } from '@toon-format/toon' // Remove sensitive fields const user = { name: 'Alice', password: 'secret', email: 'alice@example.com' } const safe = encode(user, { replacer: (key, value) => key === 'password' ? undefined : value }) // name: Alice // email: alice@example.com // Transform values const data = { status: 'active', count: 5 } const transformed = encode(data, { replacer: (key, value) => typeof value === 'string' ? value.toUpperCase() : value }) // status: ACTIVE // count: 5 ``` > [!TIP] > The `replacer` function provides fine-grained control over encoding, similar to `JSON.stringify`'s replacer but with path tracking. See the [API Reference](https://toonformat.dev/reference/api#replacer-function) for more examples. ## Playgrounds Experiment with TOON format interactively using these tools for token comparison, format conversion, and validation. ### Official Playground The [TOON Playground](https://toonformat.dev/playground) lets you convert JSON to TOON in real-time, compare token counts, and share your experiments via URL. ### Community Playgrounds - [Format Tokenization Playground](https://www.curiouslychase.com/playground/format-tokenization-exploration) - [TOON Tools](https://toontools.vercel.app/) ## Editor Support ### VS Code [TOON Language Support](https://marketplace.visualstudio.com/items?itemName=vishalraut.vscode-toon) - Syntax highlighting, validation, conversion, and token analysis. ```bash code --install-extension vishalraut.vscode-toon ``` ### Tree-sitter Grammar [tree-sitter-toon](https://github.com/3swordman/tree-sitter-toon) - Grammar for Tree-sitter-compatible editors (Neovim, Helix, Emacs, Zed). ### Neovim [toon.nvim](https://github.com/thalesgelinger/toon.nvim) - Lua-based plugin. ### Other Editors Use YAML syntax highlighting as a close approximation. ## CLI Command-line tool for quick JSON↔TOON conversions, token analysis, and pipeline integration. Auto-detects format from file extension, supports stdin/stdout workflows, and offers delimiter options for maximum efficiency. ```bash # Encode JSON to TOON (auto-detected) npx @toon-format/cli input.json -o output.toon # Decode TOON to JSON (auto-detected) npx @toon-format/cli data.toon -o output.json # Pipe from stdin (no argument needed) cat data.json | npx @toon-format/cli echo '{"name": "Ada"}' | npx @toon-format/cli # Output to stdout npx @toon-format/cli input.json # Show token savings npx @toon-format/cli data.json --stats ``` > [!TIP] > See the full [CLI documentation](https://toonformat.dev/cli/) for all options, examples, and advanced usage. ## Format Overview Detailed syntax references, implementation guides, and quick lookups for understanding and using the TOON format. - [Format Overview](https://toonformat.dev/guide/format-overview) – Complete syntax documentation - [Syntax Cheatsheet](https://toonformat.dev/reference/syntax-cheatsheet) – Quick reference - [API Reference](https://toonformat.dev/reference/api) – Encode/decode usage (TypeScript) ## Using TOON with LLMs TOON works best when you show the format instead of describing it. The structure is self-documenting – models parse it naturally once they see the pattern. Wrap data in ` ```toon` code blocks for input, and show the expected header template when asking models to generate TOON. Use tab delimiters for even better token efficiency. Follow the detailed [LLM integration guide](https://toonformat.dev/guide/llm-prompts) for strategies, examples, and validation techniques. ## Documentation Comprehensive guides, references, and resources to help you get the most out of the TOON format and tools. ### Getting Started - [Introduction & Installation](https://toonformat.dev/guide/getting-started) – What TOON is, when to use it, first steps - [Format Overview](https://toonformat.dev/guide/format-overview) – Complete syntax with examples - [Benchmarks](https://toonformat.dev/guide/benchmarks) – Accuracy & token efficiency results ### Tools & Integration - [CLI](https://toonformat.dev/cli/) – Command-line tool for JSON↔TOON conversions - [Using TOON with LLMs](https://toonformat.dev/guide/llm-prompts) – Prompting strategies & validation - [Playgrounds](https://toonformat.dev/ecosystem/tools-and-playgrounds) – Interactive tools ### References - [API Reference](https://toonformat.dev/reference/api) – TypeScript/JavaScript encode/decode API - [Syntax Cheatsheet](https://toonformat.dev/reference/syntax-cheatsheet) – Quick format lookup - [Specification](https://github.com/toon-format/spec/blob/main/SPEC.md) – Normative rules for implementers ## Other Implementations TOON has official and community implementations across multiple languages including Python, Rust, Go, Java, Swift, .NET, and many more. See the full list of implementations in the [documentation](https://toonformat.dev/ecosystem/implementations). ## Credits - Logo design by [鈴木ックス(SZKX)](https://x.com/szkx_art) ## License [MIT](./LICENSE) License © 2025-PRESENT [Johann Schopplich](https://github.com/johannschopplich) ================================================ FILE: packages/toon/package.json ================================================ { "name": "@toon-format/toon", "type": "module", "version": "2.1.0", "packageManager": "pnpm@10.30.3", "description": "Token-Oriented Object Notation (TOON) – Compact, human-readable, schema-aware encoding of JSON for LLM prompts", "author": "Johann Schopplich ", "license": "MIT", "homepage": "https://toonformat.dev", "repository": { "type": "git", "url": "git+https://github.com/toon-format/toon.git" }, "bugs": { "url": "https://github.com/toon-format/toon/issues" }, "keywords": [ "toon", "format", "specification", "llm", "token-efficiency", "data-format" ], "sideEffects": false, "exports": { ".": { "types": "./dist/index.d.mts", "default": "./dist/index.mjs" } }, "types": "./dist/index.d.mts", "files": [ "dist" ], "scripts": { "build": "tsdown", "test": "vitest" }, "devDependencies": { "@toon-format/spec": "^3.0.3" } } ================================================ FILE: packages/toon/src/constants.ts ================================================ // #region List markers export const LIST_ITEM_MARKER = '-' export const LIST_ITEM_PREFIX = '- ' // #endregion // #region Structural characters export const COMMA = ',' export const COLON = ':' export const SPACE = ' ' export const PIPE = '|' export const DOT = '.' // #endregion // #region Brackets and braces export const OPEN_BRACKET = '[' export const CLOSE_BRACKET = ']' export const OPEN_BRACE = '{' export const CLOSE_BRACE = '}' // #endregion // #region Literals export const NULL_LITERAL = 'null' export const TRUE_LITERAL = 'true' export const FALSE_LITERAL = 'false' // #endregion // #region Escape characters export const BACKSLASH = '\\' export const DOUBLE_QUOTE = '"' export const NEWLINE = '\n' export const CARRIAGE_RETURN = '\r' export const TAB = '\t' // #endregion // #region Delimiters export const DELIMITERS = { comma: COMMA as ',', tab: TAB as '\t', pipe: PIPE as '|', } as const export type DelimiterKey = keyof typeof DELIMITERS export type Delimiter = typeof DELIMITERS[DelimiterKey] export const DEFAULT_DELIMITER: Delimiter = DELIMITERS.comma // #endregion ================================================ FILE: packages/toon/src/decode/decoders.ts ================================================ import type { ArrayHeaderInfo, DecodeStreamOptions, Depth, JsonPrimitive, JsonStreamEvent, ParsedLine } from '../types.ts' import type { StreamingScanState } from './scanner.ts' import { COLON, DEFAULT_DELIMITER, LIST_ITEM_MARKER, LIST_ITEM_PREFIX } from '../constants.ts' import { findClosingQuote } from '../shared/string-utils.ts' import { isArrayHeaderContent, isKeyValueContent, mapRowValuesToPrimitives, parseArrayHeaderLine, parseDelimitedValues, parseKeyToken, parsePrimitiveToken } from './parser.ts' import { createScanState, parseLinesAsync, parseLinesSync } from './scanner.ts' import { assertExpectedCount, validateNoBlankLinesInRange, validateNoExtraListItems, validateNoExtraTabularRows } from './validation.ts' interface DecoderContext { indent: number, strict: boolean } // #region Streaming line cursor class StreamingLineCursor { private buffer: ParsedLine[] = [] private generator: Iterator | AsyncIterator private done = false private lastLine: ParsedLine | undefined private scanState: StreamingScanState constructor( generator: Iterator | AsyncIterator, scanState: StreamingScanState, ) { this.generator = generator this.scanState = scanState } getBlankLines() { return this.scanState.blankLines } async peek(): Promise { if (this.buffer.length > 0) { return this.buffer[0] } if (this.done) { return undefined } const result = await this.generator.next() if (result.done) { this.done = true return undefined } this.buffer.push(result.value) return result.value } async next(): Promise { const line = await this.peek() if (line !== undefined) { this.buffer.shift() this.lastLine = line } return line } async advance(): Promise { await this.next() } current(): ParsedLine | undefined { return this.lastLine } async atEnd(): Promise { return (await this.peek()) === undefined } peekSync(): ParsedLine | undefined { if (this.buffer.length > 0) { return this.buffer[0] } if (this.done) { return undefined } const result = (this.generator as Iterator).next() if (result.done) { this.done = true return undefined } this.buffer.push(result.value) return result.value } nextSync(): ParsedLine | undefined { const line = this.peekSync() if (line !== undefined) { this.buffer.shift() this.lastLine = line } return line } advanceSync(): void { this.nextSync() } atEndSync(): boolean { return this.peekSync() === undefined } } // #endregion // #region Synchronous streaming decode export function* decodeStreamSync( source: Iterable, options?: DecodeStreamOptions, ): Generator { // Validate options if (options?.expandPaths !== undefined) { throw new Error('expandPaths is not supported in streaming decode') } const resolvedOptions: DecoderContext = { indent: options?.indent ?? 2, strict: options?.strict ?? true, } const scanState = createScanState() const lineGenerator = parseLinesSync(source, resolvedOptions.indent, resolvedOptions.strict, scanState) const cursor = new StreamingLineCursor(lineGenerator, scanState) // Get first line to determine root form const first = cursor.peekSync() if (!first) { // Empty input decodes to empty object yield { type: 'startObject' } yield { type: 'endObject' } return } // Check for root array if (isArrayHeaderContent(first.content)) { const headerInfo = parseArrayHeaderLine(first.content, DEFAULT_DELIMITER) if (headerInfo) { cursor.advanceSync() yield* decodeArrayFromHeaderSync(headerInfo.header, headerInfo.inlineValues, cursor, 0, resolvedOptions) return } } // Check for single primitive cursor.advanceSync() const hasMore = !cursor.atEndSync() if (!hasMore && !isKeyValueLineSync(first)) { // Single non-key-value line is root primitive yield { type: 'primitive', value: parsePrimitiveToken(first.content.trim()) } return } // Root object yield { type: 'startObject' } yield* decodeKeyValueSync(first.content, cursor, 0, resolvedOptions) // Process remaining object fields while (!cursor.atEndSync()) { const line = cursor.peekSync() if (!line || line.depth !== 0) { break } cursor.advanceSync() yield* decodeKeyValueSync(line.content, cursor, 0, resolvedOptions) } yield { type: 'endObject' } } function* decodeKeyValueSync( content: string, cursor: StreamingLineCursor, baseDepth: Depth, options: DecoderContext, ): Generator { // Check for array header first const arrayHeader = parseArrayHeaderLine(content, DEFAULT_DELIMITER) if (arrayHeader && arrayHeader.header.key !== undefined) { yield { type: 'key', key: arrayHeader.header.key } yield* decodeArrayFromHeaderSync(arrayHeader.header, arrayHeader.inlineValues, cursor, baseDepth, options) return } // Regular key-value pair const { key, isQuoted } = parseKeyToken(content, 0) const colonIndex = content.indexOf(COLON, key.length) const rest = colonIndex >= 0 ? content.slice(colonIndex + 1).trim() : '' yield isQuoted ? { type: 'key', key, wasQuoted: true } : { type: 'key', key } // No value after colon - expect nested object or empty if (!rest) { const nextLine = cursor.peekSync() if (nextLine && nextLine.depth > baseDepth) { yield { type: 'startObject' } yield* decodeObjectFieldsSync(cursor, baseDepth + 1, options) yield { type: 'endObject' } return } // Empty object yield { type: 'startObject' } yield { type: 'endObject' } return } // Inline primitive value yield { type: 'primitive', value: parsePrimitiveToken(rest) } } function* decodeObjectFieldsSync( cursor: StreamingLineCursor, baseDepth: Depth, options: DecoderContext, ): Generator { let computedDepth: Depth | undefined while (!cursor.atEndSync()) { const line = cursor.peekSync() if (!line || line.depth < baseDepth) { break } if (computedDepth === undefined && line.depth >= baseDepth) { computedDepth = line.depth } if (line.depth === computedDepth) { cursor.advanceSync() yield* decodeKeyValueSync(line.content, cursor, computedDepth, options) } else { break } } } function* decodeArrayFromHeaderSync( header: ArrayHeaderInfo, inlineValues: string | undefined, cursor: StreamingLineCursor, baseDepth: Depth, options: DecoderContext, ): Generator { yield { type: 'startArray', length: header.length } // Inline primitive array if (inlineValues) { yield* decodeInlinePrimitiveArraySync(header, inlineValues, options) yield { type: 'endArray' } return } // Tabular array if (header.fields && header.fields.length > 0) { yield* decodeTabularArraySync(header, cursor, baseDepth, options) yield { type: 'endArray' } return } // List array yield* decodeListArraySync(header, cursor, baseDepth, options) yield { type: 'endArray' } } function* decodeInlinePrimitiveArraySync( header: ArrayHeaderInfo, inlineValues: string, options: DecoderContext, ): Generator { if (!inlineValues.trim()) { assertExpectedCount(0, header.length, 'inline array items', options) return } const values = parseDelimitedValues(inlineValues, header.delimiter) const primitives = mapRowValuesToPrimitives(values) assertExpectedCount(primitives.length, header.length, 'inline array items', options) for (const primitive of primitives) { yield { type: 'primitive', value: primitive } } } function* decodeTabularArraySync( header: ArrayHeaderInfo, cursor: StreamingLineCursor, baseDepth: Depth, options: DecoderContext, ): Generator { const rowDepth = baseDepth + 1 let rowCount = 0 let startLine: number | undefined let endLine: number | undefined while (!cursor.atEndSync() && rowCount < header.length) { const line = cursor.peekSync() if (!line || line.depth < rowDepth) { break } if (line.depth === rowDepth) { if (startLine === undefined) { startLine = line.lineNumber } endLine = line.lineNumber cursor.advanceSync() const values = parseDelimitedValues(line.content, header.delimiter) assertExpectedCount(values.length, header.fields!.length, 'tabular row values', options) const primitives = mapRowValuesToPrimitives(values) yield* yieldObjectFromFields(header.fields!, primitives) rowCount++ } else { break } } assertExpectedCount(rowCount, header.length, 'tabular rows', options) if (options.strict && startLine !== undefined && endLine !== undefined) { validateNoBlankLinesInRange(startLine, endLine, cursor.getBlankLines(), options.strict, 'tabular array') } if (options.strict) { const nextLine = cursor.peekSync() validateNoExtraTabularRows(nextLine, rowDepth, header) } } function* decodeListArraySync( header: ArrayHeaderInfo, cursor: StreamingLineCursor, baseDepth: Depth, options: DecoderContext, ): Generator { const itemDepth = baseDepth + 1 let itemCount = 0 let startLine: number | undefined let endLine: number | undefined while (!cursor.atEndSync() && itemCount < header.length) { const line = cursor.peekSync() if (!line || line.depth < itemDepth) { break } const isListItem = line.content.startsWith(LIST_ITEM_PREFIX) || line.content === LIST_ITEM_MARKER if (line.depth === itemDepth && isListItem) { if (startLine === undefined) { startLine = line.lineNumber } endLine = line.lineNumber yield* decodeListItemSync(cursor, itemDepth, options) const currentLine = cursor.current() if (currentLine) { endLine = currentLine.lineNumber } itemCount++ } else { break } } assertExpectedCount(itemCount, header.length, 'list array items', options) if (options.strict && startLine !== undefined && endLine !== undefined) { validateNoBlankLinesInRange(startLine, endLine, cursor.getBlankLines(), options.strict, 'list array') } if (options.strict) { const nextLine = cursor.peekSync() validateNoExtraListItems(nextLine, itemDepth, header.length) } } function* decodeListItemSync( cursor: StreamingLineCursor, baseDepth: Depth, options: DecoderContext, ): Generator { const line = cursor.nextSync() if (!line) { throw new ReferenceError('Expected list item') } let afterHyphen: string if (line.content === LIST_ITEM_MARKER) { // Bare list item marker: always an empty object yield { type: 'startObject' } yield { type: 'endObject' } return } else if (line.content.startsWith(LIST_ITEM_PREFIX)) { afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length) } else { throw new SyntaxError(`Expected list item to start with "${LIST_ITEM_PREFIX}"`) } if (!afterHyphen.trim()) { yield { type: 'startObject' } yield { type: 'endObject' } return } // Check for array header after hyphen if (isArrayHeaderContent(afterHyphen)) { const arrayHeader = parseArrayHeaderLine(afterHyphen, DEFAULT_DELIMITER) if (arrayHeader) { yield* decodeArrayFromHeaderSync(arrayHeader.header, arrayHeader.inlineValues, cursor, baseDepth, options) return } } // Check for tabular-first list-item object: `- key[N]{fields}:` const headerInfo = parseArrayHeaderLine(afterHyphen, DEFAULT_DELIMITER) if (headerInfo && headerInfo.header.key !== undefined && headerInfo.header.fields !== undefined) { // Object with tabular array as first field const header = headerInfo.header yield { type: 'startObject' } yield { type: 'key', key: header.key! } // Use baseDepth + 1 for the array so rows are at baseDepth + 2 yield* decodeArrayFromHeaderSync(header, headerInfo.inlineValues, cursor, baseDepth + 1, options) // Read sibling fields at depth = baseDepth + 1 const followDepth = baseDepth + 1 while (!cursor.atEndSync()) { const nextLine = cursor.peekSync() if (!nextLine || nextLine.depth < followDepth) { break } if (nextLine.depth === followDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) { cursor.advanceSync() yield* decodeKeyValueSync(nextLine.content, cursor, followDepth, options) } else { break } } yield { type: 'endObject' } return } // Check for object first field after hyphen if (isKeyValueContent(afterHyphen)) { yield { type: 'startObject' } yield* decodeKeyValueSync(afterHyphen, cursor, baseDepth + 1, options) // Read subsequent fields const followDepth = baseDepth + 1 while (!cursor.atEndSync()) { const nextLine = cursor.peekSync() if (!nextLine || nextLine.depth < followDepth) { break } if (nextLine.depth === followDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) { cursor.advanceSync() yield* decodeKeyValueSync(nextLine.content, cursor, followDepth, options) } else { break } } yield { type: 'endObject' } return } // Primitive value yield { type: 'primitive', value: parsePrimitiveToken(afterHyphen) } } function isKeyValueLineSync(line: ParsedLine): boolean { const content = line.content if (content.startsWith('"')) { const closingQuoteIndex = findClosingQuote(content, 0) if (closingQuoteIndex === -1) { return false } return content.slice(closingQuoteIndex + 1).includes(COLON) } else { return content.includes(COLON) } } // #endregion // #region Asynchronous streaming decode export async function* decodeStream( source: AsyncIterable | Iterable, options?: DecodeStreamOptions, ): AsyncGenerator { // Validate options if (options?.expandPaths !== undefined) { throw new Error('expandPaths is not supported in streaming decode') } const resolvedOptions = { indent: options?.indent ?? 2, strict: options?.strict ?? true, } const scanState = createScanState() // Determine if source is async or sync if (Symbol.asyncIterator in source) { const lineGenerator = parseLinesAsync(source, resolvedOptions.indent, resolvedOptions.strict, scanState) const cursor = new StreamingLineCursor(lineGenerator, scanState) // Get first line to determine root form const first = await cursor.peek() if (!first) { // Empty input decodes to empty object yield { type: 'startObject' } yield { type: 'endObject' } return } // Check for root array if (isArrayHeaderContent(first.content)) { const headerInfo = parseArrayHeaderLine(first.content, DEFAULT_DELIMITER) if (headerInfo) { await cursor.advance() yield* decodeArrayFromHeaderAsync(headerInfo.header, headerInfo.inlineValues, cursor, 0, resolvedOptions) return } } // Check for single primitive await cursor.advance() const hasMore = !(await cursor.atEnd()) if (!hasMore && !isKeyValueLineSync(first)) { yield { type: 'primitive', value: parsePrimitiveToken(first.content.trim()) } return } // Root object yield { type: 'startObject' } yield* decodeKeyValueAsync(first.content, cursor, 0, resolvedOptions) // Process remaining object fields while (!(await cursor.atEnd())) { const line = await cursor.peek() if (!line || line.depth !== 0) { break } await cursor.advance() yield* decodeKeyValueAsync(line.content, cursor, 0, resolvedOptions) } yield { type: 'endObject' } } else { // Sync source, delegate to sync generator yield* decodeStreamSync(source as Iterable, options) } } async function* decodeKeyValueAsync( content: string, cursor: StreamingLineCursor, baseDepth: Depth, options: DecoderContext, ): AsyncGenerator { // Check for array header first const arrayHeader = parseArrayHeaderLine(content, DEFAULT_DELIMITER) if (arrayHeader && arrayHeader.header.key !== undefined) { yield { type: 'key', key: arrayHeader.header.key } yield* decodeArrayFromHeaderAsync(arrayHeader.header, arrayHeader.inlineValues, cursor, baseDepth, options) return } // Regular key-value pair const { key, isQuoted } = parseKeyToken(content, 0) const colonIndex = content.indexOf(COLON, key.length) const rest = colonIndex >= 0 ? content.slice(colonIndex + 1).trim() : '' yield isQuoted ? { type: 'key', key, wasQuoted: true } : { type: 'key', key } // No value after colon - expect nested object or empty if (!rest) { const nextLine = await cursor.peek() if (nextLine && nextLine.depth > baseDepth) { yield { type: 'startObject' } yield* decodeObjectFieldsAsync(cursor, baseDepth + 1, options) yield { type: 'endObject' } return } // Empty object yield { type: 'startObject' } yield { type: 'endObject' } return } // Inline primitive value yield { type: 'primitive', value: parsePrimitiveToken(rest) } } async function* decodeObjectFieldsAsync( cursor: StreamingLineCursor, baseDepth: Depth, options: DecoderContext, ): AsyncGenerator { let computedDepth: Depth | undefined while (!(await cursor.atEnd())) { const line = await cursor.peek() if (!line || line.depth < baseDepth) { break } if (computedDepth === undefined && line.depth >= baseDepth) { computedDepth = line.depth } if (line.depth === computedDepth) { await cursor.advance() yield* decodeKeyValueAsync(line.content, cursor, computedDepth, options) } else { break } } } async function* decodeArrayFromHeaderAsync( header: ArrayHeaderInfo, inlineValues: string | undefined, cursor: StreamingLineCursor, baseDepth: Depth, options: DecoderContext, ): AsyncGenerator { yield { type: 'startArray', length: header.length } // Inline primitive array if (inlineValues) { yield* decodeInlinePrimitiveArraySync(header, inlineValues, options) yield { type: 'endArray' } return } // Tabular array if (header.fields && header.fields.length > 0) { yield* decodeTabularArrayAsync(header, cursor, baseDepth, options) yield { type: 'endArray' } return } // List array yield* decodeListArrayAsync(header, cursor, baseDepth, options) yield { type: 'endArray' } } async function* decodeTabularArrayAsync( header: ArrayHeaderInfo, cursor: StreamingLineCursor, baseDepth: Depth, options: DecoderContext, ): AsyncGenerator { const rowDepth = baseDepth + 1 let rowCount = 0 let startLine: number | undefined let endLine: number | undefined while (!(await cursor.atEnd()) && rowCount < header.length) { const line = await cursor.peek() if (!line || line.depth < rowDepth) { break } if (line.depth === rowDepth) { if (startLine === undefined) { startLine = line.lineNumber } endLine = line.lineNumber await cursor.advance() const values = parseDelimitedValues(line.content, header.delimiter) assertExpectedCount(values.length, header.fields!.length, 'tabular row values', options) const primitives = mapRowValuesToPrimitives(values) yield* yieldObjectFromFields(header.fields!, primitives) rowCount++ } else { break } } assertExpectedCount(rowCount, header.length, 'tabular rows', options) if (options.strict && startLine !== undefined && endLine !== undefined) { validateNoBlankLinesInRange(startLine, endLine, cursor.getBlankLines(), options.strict, 'tabular array') } if (options.strict) { const nextLine = await cursor.peek() validateNoExtraTabularRows(nextLine, rowDepth, header) } } async function* decodeListArrayAsync( header: ArrayHeaderInfo, cursor: StreamingLineCursor, baseDepth: Depth, options: DecoderContext, ): AsyncGenerator { const itemDepth = baseDepth + 1 let itemCount = 0 let startLine: number | undefined let endLine: number | undefined while (!(await cursor.atEnd()) && itemCount < header.length) { const line = await cursor.peek() if (!line || line.depth < itemDepth) { break } const isListItem = line.content.startsWith(LIST_ITEM_PREFIX) || line.content === LIST_ITEM_MARKER if (line.depth === itemDepth && isListItem) { if (startLine === undefined) { startLine = line.lineNumber } endLine = line.lineNumber yield* decodeListItemAsync(cursor, itemDepth, options) const currentLine = cursor.current() if (currentLine) { endLine = currentLine.lineNumber } itemCount++ } else { break } } assertExpectedCount(itemCount, header.length, 'list array items', options) if (options.strict && startLine !== undefined && endLine !== undefined) { validateNoBlankLinesInRange(startLine, endLine, cursor.getBlankLines(), options.strict, 'list array') } if (options.strict) { const nextLine = await cursor.peek() validateNoExtraListItems(nextLine, itemDepth, header.length) } } async function* decodeListItemAsync( cursor: StreamingLineCursor, baseDepth: Depth, options: DecoderContext, ): AsyncGenerator { const line = await cursor.next() if (!line) { throw new ReferenceError('Expected list item') } let afterHyphen: string if (line.content === LIST_ITEM_MARKER) { // Bare list item marker: always an empty object yield { type: 'startObject' } yield { type: 'endObject' } return } else if (line.content.startsWith(LIST_ITEM_PREFIX)) { afterHyphen = line.content.slice(LIST_ITEM_PREFIX.length) } else { throw new SyntaxError(`Expected list item to start with "${LIST_ITEM_PREFIX}"`) } if (!afterHyphen.trim()) { yield { type: 'startObject' } yield { type: 'endObject' } return } // Check for array header after hyphen if (isArrayHeaderContent(afterHyphen)) { const arrayHeader = parseArrayHeaderLine(afterHyphen, DEFAULT_DELIMITER) if (arrayHeader) { yield* decodeArrayFromHeaderAsync(arrayHeader.header, arrayHeader.inlineValues, cursor, baseDepth, options) return } } // Check for tabular-first list-item object: `- key[N]{fields}:` const headerInfo = parseArrayHeaderLine(afterHyphen, DEFAULT_DELIMITER) if (headerInfo && headerInfo.header.key !== undefined && headerInfo.header.fields !== undefined) { // Object with tabular array as first field const header = headerInfo.header yield { type: 'startObject' } yield { type: 'key', key: header.key! } // Use baseDepth + 1 for the array so rows are at baseDepth + 2 yield* decodeArrayFromHeaderAsync(header, headerInfo.inlineValues, cursor, baseDepth + 1, options) // Read sibling fields at depth = baseDepth + 1 const followDepth = baseDepth + 1 while (!(await cursor.atEnd())) { const nextLine = await cursor.peek() if (!nextLine || nextLine.depth < followDepth) { break } if (nextLine.depth === followDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) { await cursor.advance() yield* decodeKeyValueAsync(nextLine.content, cursor, followDepth, options) } else { break } } yield { type: 'endObject' } return } // Check for object first field after hyphen if (isKeyValueContent(afterHyphen)) { yield { type: 'startObject' } yield* decodeKeyValueAsync(afterHyphen, cursor, baseDepth + 1, options) // Read subsequent fields const followDepth = baseDepth + 1 while (!(await cursor.atEnd())) { const nextLine = await cursor.peek() if (!nextLine || nextLine.depth < followDepth) { break } if (nextLine.depth === followDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX)) { await cursor.advance() yield* decodeKeyValueAsync(nextLine.content, cursor, followDepth, options) } else { break } } yield { type: 'endObject' } return } // Primitive value yield { type: 'primitive', value: parsePrimitiveToken(afterHyphen) } } // #endregion // #region Shared decoder helpers function* yieldObjectFromFields( fields: string[], primitives: JsonPrimitive[], ): Generator { yield { type: 'startObject' } for (let i = 0; i < fields.length; i++) { yield { type: 'key', key: fields[i]! } yield { type: 'primitive', value: primitives[i]! } } yield { type: 'endObject' } } // #endregion ================================================ FILE: packages/toon/src/decode/event-builder.ts ================================================ import type { JsonObject, JsonStreamEvent, JsonValue } from '../types.ts' import { QUOTED_KEY_MARKER } from './expand.ts' // #region Build context types type BuildContext = | { type: 'object', obj: JsonObject, currentKey?: string, quotedKeys: Set } | { type: 'array', arr: JsonValue[] } interface BuildState { stack: BuildContext[] root: JsonValue | undefined } // #endregion // #region Synchronous AST builder export function buildValueFromEvents(events: Iterable): JsonValue { const state: BuildState = { stack: [], root: undefined } for (const event of events) { applyEvent(state, event) } return finalizeState(state) } // #endregion // #region Asynchronous AST builder export async function buildValueFromEventsAsync(events: AsyncIterable): Promise { const state: BuildState = { stack: [], root: undefined } for await (const event of events) { applyEvent(state, event) } return finalizeState(state) } // #endregion // #region Shared event handlers function applyEvent(state: BuildState, event: JsonStreamEvent): void { const { stack } = state switch (event.type) { case 'startObject': { const obj: JsonObject = {} const quotedKeys = new Set() if (stack.length === 0) { // Root object stack.push({ type: 'object', obj, quotedKeys }) } else { const parent = stack[stack.length - 1]! if (parent.type === 'object') { if (parent.currentKey === undefined) { throw new Error('Object startObject event without preceding key') } parent.obj[parent.currentKey] = obj parent.currentKey = undefined } else if (parent.type === 'array') { parent.arr.push(obj) } stack.push({ type: 'object', obj, quotedKeys }) } break } case 'endObject': { if (stack.length === 0) { throw new Error('Unexpected endObject event') } const context = stack.pop()! if (context.type !== 'object') { throw new Error('Mismatched endObject event') } // Attach quoted keys metadata if any keys were quoted if (context.quotedKeys.size > 0) { Object.defineProperty(context.obj, QUOTED_KEY_MARKER, { value: context.quotedKeys, enumerable: false, writable: false, configurable: false, }) } if (stack.length === 0) { state.root = context.obj } break } case 'startArray': { const arr: JsonValue[] = [] if (stack.length === 0) { // Root array stack.push({ type: 'array', arr }) } else { const parent = stack[stack.length - 1]! if (parent.type === 'object') { if (parent.currentKey === undefined) { throw new Error('Array startArray event without preceding key') } parent.obj[parent.currentKey] = arr parent.currentKey = undefined } else if (parent.type === 'array') { parent.arr.push(arr) } stack.push({ type: 'array', arr }) } break } case 'endArray': { if (stack.length === 0) { throw new Error('Unexpected endArray event') } const context = stack.pop()! if (context.type !== 'array') { throw new Error('Mismatched endArray event') } if (stack.length === 0) { state.root = context.arr } break } case 'key': { if (stack.length === 0) { throw new Error('Key event outside of object context') } const parent = stack[stack.length - 1]! if (parent.type !== 'object') { throw new Error('Key event in non-object context') } parent.currentKey = event.key // Track quoted keys for path expansion if (event.wasQuoted) { parent.quotedKeys.add(event.key) } break } case 'primitive': { if (stack.length === 0) { // Root primitive state.root = event.value } else { const parent = stack[stack.length - 1]! if (parent.type === 'object') { if (parent.currentKey === undefined) { throw new Error('Primitive event without preceding key in object') } parent.obj[parent.currentKey] = event.value parent.currentKey = undefined } else if (parent.type === 'array') { parent.arr.push(event.value) } } break } } } function finalizeState(state: BuildState): JsonValue { if (state.stack.length !== 0) { throw new Error('Incomplete event stream: stack not empty at end') } if (state.root === undefined) { throw new Error('No root value built from events') } return state.root } // #endregion ================================================ FILE: packages/toon/src/decode/expand.ts ================================================ import type { JsonObject, JsonValue } from '../types.ts' import { DOT } from '../constants.ts' import { isJsonObject } from '../encode/normalize.ts' import { isIdentifierSegment } from '../shared/validation.ts' // #region Path expansion (safe) /** * Symbol used to mark object keys that were originally quoted in the TOON source. * Quoted dotted keys should not be expanded, even if they meet expansion criteria. */ export const QUOTED_KEY_MARKER: unique symbol = Symbol('quotedKey') /** * Objects that may have quoted key metadata attached. */ export interface ObjectWithQuotedKeys extends JsonObject { [QUOTED_KEY_MARKER]?: Set } /** * Expands dotted keys into nested objects in safe mode. * * @remarks * This function recursively traverses a decoded TOON value and expands any keys * containing dots (`.`) into nested object structures, provided all segments * are valid identifiers. * * Expansion rules: * - Keys containing dots are split into segments * - All segments must pass `isIdentifierSegment` validation * - Non-eligible keys (with special characters) are left as literal dotted keys * - Deep merge: When multiple dotted keys expand to the same path, their values are merged if both are objects * - Conflict handling: * - `strict=true`: Throws TypeError on conflicts (non-object collision) * - `strict=false`: LWW (silent overwrite) * * @param value - The decoded value to expand * @param strict - Whether to throw errors on conflicts * @returns The expanded value with dotted keys reconstructed as nested objects * @throws TypeError if conflicts occur in strict mode */ export function expandPathsSafe(value: JsonValue, strict: boolean): JsonValue { if (Array.isArray(value)) { // Recursively expand array elements return value.map(item => expandPathsSafe(item, strict)) } if (isJsonObject(value)) { const expandedObject: JsonObject = {} // Check if this object has quoted key metadata const quotedKeys = (value as ObjectWithQuotedKeys)[QUOTED_KEY_MARKER] for (const [key, keyValue] of Object.entries(value)) { // Skip expansion for keys that were originally quoted const isQuoted = quotedKeys?.has(key) // Check if key contains dots and should be expanded if (key.includes(DOT) && !isQuoted) { const segments = key.split(DOT) // Validate all segments are identifiers if (segments.every(seg => isIdentifierSegment(seg))) { // Expand this dotted key const expandedValue = expandPathsSafe(keyValue, strict) insertPathSafe(expandedObject, segments, expandedValue, strict) continue } } // Not expandable - keep as literal key, but still recursively expand the value const expandedValue = expandPathsSafe(keyValue, strict) // Check for conflicts with already-expanded keys if (key in expandedObject) { const conflictingValue = expandedObject[key]! // If both are objects, try to merge them if (canMerge(conflictingValue, expandedValue)) { mergeObjects(conflictingValue as JsonObject, expandedValue as JsonObject, strict) } else { // Conflict: incompatible types if (strict) { throw new TypeError( `Path expansion conflict at key "${key}": cannot merge ${typeof conflictingValue} with ${typeof expandedValue}`, ) } // Non-strict: overwrite (LWW) expandedObject[key] = expandedValue } } else { // No conflict - insert directly expandedObject[key] = expandedValue } } return expandedObject } // Primitive value - return as-is return value } /** * Inserts a value at a nested path, creating intermediate objects as needed. * * @remarks * This function walks the segment path, creating nested objects as needed. * When an existing value is encountered: * - If both are objects: deep merge (continue insertion) * - If values differ: conflict * - strict=true: throw TypeError * - strict=false: overwrite with new value (LWW) * * @param target - The object to insert into * @param segments - Array of path segments (e.g., ['data', 'metadata', 'items']) * @param value - The value to insert at the end of the path * @param strict - Whether to throw on conflicts * @throws TypeError if a conflict occurs in strict mode */ function insertPathSafe( target: JsonObject, segments: readonly string[], value: JsonValue, strict: boolean, ): void { let currentNode: JsonObject = target // Walk to the penultimate segment, creating objects as needed for (let i = 0; i < segments.length - 1; i++) { const currentSegment = segments[i]! const segmentValue = currentNode[currentSegment] if (segmentValue === undefined) { // Create new intermediate object const newObj: JsonObject = {} currentNode[currentSegment] = newObj currentNode = newObj } else if (isJsonObject(segmentValue)) { // Continue into existing object currentNode = segmentValue } else { // Conflict: existing value is not an object if (strict) { throw new TypeError( `Path expansion conflict at segment "${currentSegment}": expected object but found ${typeof segmentValue}`, ) } // Non-strict: overwrite with new object const newObj: JsonObject = {} currentNode[currentSegment] = newObj currentNode = newObj } } // Insert at the final segment const lastSeg = segments[segments.length - 1]! const destinationValue = currentNode[lastSeg] if (destinationValue === undefined) { // No conflict - insert directly currentNode[lastSeg] = value } else if (canMerge(destinationValue, value)) { // Both are objects - deep merge mergeObjects(destinationValue as JsonObject, value as JsonObject, strict) } else { // Conflict: incompatible types if (strict) { throw new TypeError( `Path expansion conflict at key "${lastSeg}": cannot merge ${typeof destinationValue} with ${typeof value}`, ) } // Non-strict: overwrite (LWW) currentNode[lastSeg] = value } } /** * Deep merges properties from source into target. * * @remarks * For each key in source: * - If key doesn't exist in target: copy it * - If both values are objects: recursively merge * - Otherwise: conflict (strict throws, non-strict overwrites) * * @param target - The target object to merge into * @param source - The source object to merge from * @param strict - Whether to throw on conflicts * @throws TypeError if a conflict occurs in strict mode */ function mergeObjects( target: JsonObject, source: JsonObject, strict: boolean, ): void { for (const [key, sourceValue] of Object.entries(source)) { const targetValue = target[key] if (targetValue === undefined) { // Key doesn't exist in target - copy it target[key] = sourceValue } else if (canMerge(targetValue, sourceValue)) { // Both are objects - recursively merge mergeObjects(targetValue as JsonObject, sourceValue as JsonObject, strict) } else { // Conflict: incompatible types if (strict) { throw new TypeError( `Path expansion conflict at key "${key}": cannot merge ${typeof targetValue} with ${typeof sourceValue}`, ) } // Non-strict: overwrite (LWW) target[key] = sourceValue } } } // #endregion // #region Type Guards function canMerge(a: JsonValue, b: JsonValue): a is JsonObject { return isJsonObject(a) && isJsonObject(b) } // #endregion ================================================ FILE: packages/toon/src/decode/parser.ts ================================================ import type { ArrayHeaderInfo, Delimiter, JsonPrimitive } from '../types.ts' import { BACKSLASH, CLOSE_BRACE, CLOSE_BRACKET, COLON, DELIMITERS, DOUBLE_QUOTE, FALSE_LITERAL, NULL_LITERAL, OPEN_BRACE, OPEN_BRACKET, PIPE, TAB, TRUE_LITERAL } from '../constants.ts' import { isBooleanOrNullLiteral, isNumericLiteral } from '../shared/literal-utils.ts' import { findClosingQuote, findUnquotedChar, unescapeString } from '../shared/string-utils.ts' // #region Array header parsing export function parseArrayHeaderLine( content: string, defaultDelimiter: Delimiter, ): { header: ArrayHeaderInfo, inlineValues?: string } | undefined { const trimmedToken = content.trimStart() // Find the bracket segment, accounting for quoted keys that may contain brackets let bracketStart = -1 // For quoted keys, find bracket after closing quote (not inside the quoted string) if (trimmedToken.startsWith(DOUBLE_QUOTE)) { const closingQuoteIndex = findClosingQuote(trimmedToken, 0) if (closingQuoteIndex === -1) { return } const afterQuote = trimmedToken.slice(closingQuoteIndex + 1) if (!afterQuote.startsWith(OPEN_BRACKET)) { return } // Calculate position in original content and find bracket after the quoted key const leadingWhitespace = content.length - trimmedToken.length const keyEndIndex = leadingWhitespace + closingQuoteIndex + 1 bracketStart = content.indexOf(OPEN_BRACKET, keyEndIndex) } else { // Unquoted key - find first bracket bracketStart = content.indexOf(OPEN_BRACKET) } if (bracketStart === -1) { return } const bracketEnd = content.indexOf(CLOSE_BRACKET, bracketStart) if (bracketEnd === -1) { return } // Find the colon that comes after all brackets and braces let colonIndex = bracketEnd + 1 let braceEnd = colonIndex // Check for fields segment (braces come after bracket) const braceStart = content.indexOf(OPEN_BRACE, bracketEnd) if (braceStart !== -1 && braceStart < content.indexOf(COLON, bracketEnd)) { // Validate: no extraneous content between bracket end and brace start const gapBeforeBrace = content.slice(bracketEnd + 1, braceStart) if (gapBeforeBrace.trim() !== '') { return } const foundBraceEnd = content.indexOf(CLOSE_BRACE, braceStart) if (foundBraceEnd !== -1) { braceEnd = foundBraceEnd + 1 } } // Now find colon after brackets and braces colonIndex = content.indexOf(COLON, Math.max(bracketEnd, braceEnd)) if (colonIndex === -1) { return } // Validate: no extraneous content between bracket/fields end and colon const gapStart = Math.max(bracketEnd + 1, braceEnd) const gapBeforeColon = content.slice(gapStart, colonIndex) if (gapBeforeColon.trim() !== '') { return } // Extract and parse the key (might be quoted) let key: string | undefined if (bracketStart > 0) { const rawKey = content.slice(0, bracketStart).trim() key = rawKey.startsWith(DOUBLE_QUOTE) ? parseStringLiteral(rawKey) : rawKey } const afterColon = content.slice(colonIndex + 1).trim() const bracketContent = content.slice(bracketStart + 1, bracketEnd) // Try to parse bracket segment let parsedBracket: ReturnType try { parsedBracket = parseBracketSegment(bracketContent, defaultDelimiter) } catch { return } const { length, delimiter } = parsedBracket // Check for fields segment let fields: string[] | undefined if (braceStart !== -1 && braceStart < colonIndex) { const foundBraceEnd = content.indexOf(CLOSE_BRACE, braceStart) if (foundBraceEnd !== -1 && foundBraceEnd < colonIndex) { const fieldsContent = content.slice(braceStart + 1, foundBraceEnd) fields = parseDelimitedValues(fieldsContent, delimiter).map(field => parseStringLiteral(field.trim())) } } return { header: { key, length, delimiter, fields, }, inlineValues: afterColon || undefined, } } export function parseBracketSegment( seg: string, defaultDelimiter: Delimiter, ): { length: number, delimiter: Delimiter } { let content = seg // Check for delimiter suffix let delimiter = defaultDelimiter if (content.endsWith(TAB)) { delimiter = DELIMITERS.tab content = content.slice(0, -1) } else if (content.endsWith(PIPE)) { delimiter = DELIMITERS.pipe content = content.slice(0, -1) } const length = Number.parseInt(content, 10) if (Number.isNaN(length)) { throw new TypeError(`Invalid array length: ${seg}`) } return { length, delimiter } } // #endregion // #region Delimited value parsing /** * Parses a delimited string into values, respecting quoted strings and escape sequences. * * @remarks * Uses a state machine that tracks: * - `inQuotes`: Whether we're inside a quoted string (to ignore delimiters) * - `valueBuffer`: Accumulates characters for the current value * - Escape sequences: Handled within quoted strings */ export function parseDelimitedValues(input: string, delimiter: Delimiter): string[] { const values: string[] = [] let valueBuffer = '' let inQuotes = false let i = 0 while (i < input.length) { const char = input[i] if (char === BACKSLASH && i + 1 < input.length && inQuotes) { // Escape sequence in quoted string valueBuffer += char + input[i + 1] i += 2 continue } if (char === DOUBLE_QUOTE) { inQuotes = !inQuotes valueBuffer += char i++ continue } if (char === delimiter && !inQuotes) { values.push(valueBuffer.trim()) valueBuffer = '' i++ continue } valueBuffer += char i++ } // Add last value if (valueBuffer || values.length > 0) { values.push(valueBuffer.trim()) } return values } export function mapRowValuesToPrimitives(values: string[]): JsonPrimitive[] { return values.map(v => parsePrimitiveToken(v)) } // #endregion // #region Primitive and key parsing export function parsePrimitiveToken(token: string): JsonPrimitive { const trimmedToken = token.trim() // Empty token if (!trimmedToken) { return '' } // Quoted string (if starts with quote, it MUST be properly quoted) if (trimmedToken.startsWith(DOUBLE_QUOTE)) { return parseStringLiteral(trimmedToken) } // Boolean or null literals if (isBooleanOrNullLiteral(trimmedToken)) { if (trimmedToken === TRUE_LITERAL) return true if (trimmedToken === FALSE_LITERAL) return false if (trimmedToken === NULL_LITERAL) return null } // Numeric literal if (isNumericLiteral(trimmedToken)) { const parsedNumber = Number.parseFloat(trimmedToken) // Normalize negative zero to positive zero return Object.is(parsedNumber, -0) ? 0 : parsedNumber } // Unquoted string return trimmedToken } export function parseStringLiteral(token: string): string { const trimmedToken = token.trim() if (trimmedToken.startsWith(DOUBLE_QUOTE)) { // Find the closing quote, accounting for escaped quotes const closingQuoteIndex = findClosingQuote(trimmedToken, 0) if (closingQuoteIndex === -1) { // No closing quote was found throw new SyntaxError('Unterminated string: missing closing quote') } if (closingQuoteIndex !== trimmedToken.length - 1) { throw new SyntaxError('Unexpected characters after closing quote') } const content = trimmedToken.slice(1, closingQuoteIndex) return unescapeString(content) } return trimmedToken } export function parseUnquotedKey(content: string, start: number): { key: string, end: number } { let parsePosition = start while (parsePosition < content.length && content[parsePosition] !== COLON) { parsePosition++ } // Validate that a colon was found if (parsePosition >= content.length || content[parsePosition] !== COLON) { throw new SyntaxError('Missing colon after key') } const key = content.slice(start, parsePosition).trim() // Skip the colon parsePosition++ return { key, end: parsePosition } } export function parseQuotedKey(content: string, start: number): { key: string, end: number } { // Find the closing quote, accounting for escaped quotes const closingQuoteIndex = findClosingQuote(content, start) if (closingQuoteIndex === -1) { throw new SyntaxError('Unterminated quoted key') } // Extract and unescape the key content const keyContent = content.slice(start + 1, closingQuoteIndex) const key = unescapeString(keyContent) let parsePosition = closingQuoteIndex + 1 // Validate and skip colon after quoted key if (parsePosition >= content.length || content[parsePosition] !== COLON) { throw new SyntaxError('Missing colon after key') } parsePosition++ return { key, end: parsePosition } } export function parseKeyToken(content: string, start: number): { key: string, end: number, isQuoted: boolean } { const isQuoted = content[start] === DOUBLE_QUOTE const result = isQuoted ? parseQuotedKey(content, start) : parseUnquotedKey(content, start) return { ...result, isQuoted } } // #endregion // #region Array content detection helpers export function isArrayHeaderContent(content: string): boolean { return content.trim().startsWith(OPEN_BRACKET) && findUnquotedChar(content, COLON) !== -1 } export function isKeyValueContent(content: string): boolean { return findUnquotedChar(content, COLON) !== -1 } // #endregion ================================================ FILE: packages/toon/src/decode/scanner.ts ================================================ import type { BlankLineInfo, Depth, ParsedLine } from '../types.ts' import { SPACE, TAB } from '../constants.ts' // #region Scan state export interface StreamingScanState { lineNumber: number blankLines: BlankLineInfo[] } export function createScanState(): StreamingScanState { return { lineNumber: 0, blankLines: [], } } // #endregion // #region Line parsing export function parseLineIncremental( raw: string, state: StreamingScanState, indentSize: number, strict: boolean, ): ParsedLine | undefined { state.lineNumber++ const lineNumber = state.lineNumber // Count leading spaces let indent = 0 while (indent < raw.length && raw[indent] === SPACE) { indent++ } const content = raw.slice(indent) // Track blank lines if (!content.trim()) { const depth = computeDepthFromIndent(indent, indentSize) state.blankLines.push({ lineNumber, indent, depth }) return undefined } const depth = computeDepthFromIndent(indent, indentSize) // Strict mode validation if (strict) { // Find the full leading whitespace region (spaces and tabs) let whitespaceEndIndex = 0 while ( whitespaceEndIndex < raw.length && (raw[whitespaceEndIndex] === SPACE || raw[whitespaceEndIndex] === TAB) ) { whitespaceEndIndex++ } // Check for tabs in leading whitespace (before actual content) if (raw.slice(0, whitespaceEndIndex).includes(TAB)) { throw new SyntaxError(`Line ${lineNumber}: Tabs are not allowed in indentation in strict mode`) } // Check for exact multiples of indentSize if (indent > 0 && indent % indentSize !== 0) { throw new SyntaxError( `Line ${lineNumber}: Indentation must be exact multiple of ${indentSize}, but found ${indent} spaces`, ) } } return { raw, indent, content, depth, lineNumber } } export function* parseLinesSync( source: Iterable, indentSize: number, strict: boolean, state: StreamingScanState, ): Generator { for (const raw of source) { const parsedLine = parseLineIncremental(raw, state, indentSize, strict) if (parsedLine !== undefined) { yield parsedLine } } } export async function* parseLinesAsync( source: AsyncIterable, indentSize: number, strict: boolean, state: StreamingScanState, ): AsyncGenerator { for await (const raw of source) { const parsedLine = parseLineIncremental(raw, state, indentSize, strict) if (parsedLine !== undefined) { yield parsedLine } } } function computeDepthFromIndent(indentSpaces: number, indentSize: number): Depth { return Math.floor(indentSpaces / indentSize) } // #endregion ================================================ FILE: packages/toon/src/decode/validation.ts ================================================ import type { ArrayHeaderInfo, BlankLineInfo, Delimiter, Depth, ParsedLine } from '../types.ts' import { COLON, LIST_ITEM_PREFIX } from '../constants.ts' // #region Count and structure validation /** * Asserts that the actual count matches the expected count in strict mode. */ export function assertExpectedCount( actual: number, expected: number, itemType: string, options: { strict: boolean }, ): void { if (options.strict && actual !== expected) { throw new RangeError(`Expected ${expected} ${itemType}, but got ${actual}`) } } /** * Validates that there are no extra list items beyond the expected count. */ export function validateNoExtraListItems( nextLine: ParsedLine | undefined, itemDepth: Depth, expectedCount: number, ): void { if (nextLine?.depth === itemDepth && nextLine.content.startsWith(LIST_ITEM_PREFIX)) { throw new RangeError(`Expected ${expectedCount} list array items, but found more`) } } /** * Validates that there are no extra tabular rows beyond the expected count. */ export function validateNoExtraTabularRows( nextLine: ParsedLine | undefined, rowDepth: Depth, header: ArrayHeaderInfo, ): void { if ( nextLine?.depth === rowDepth && !nextLine.content.startsWith(LIST_ITEM_PREFIX) && isDataRow(nextLine.content, header.delimiter) ) { throw new RangeError(`Expected ${header.length} tabular rows, but found more`) } } /** * Validates that there are no blank lines within a specific line range in strict mode. */ export function validateNoBlankLinesInRange( startLine: number, endLine: number, blankLines: BlankLineInfo[], strict: boolean, context: string, ): void { if (!strict) return // Find blank lines within the range const firstBlank = blankLines.find( blank => blank.lineNumber > startLine && blank.lineNumber < endLine, ) if (firstBlank) { throw new SyntaxError( `Line ${firstBlank.lineNumber}: Blank lines inside ${context} are not allowed in strict mode`, ) } } // #endregion // #region Row classification helpers /** * Checks if a line is a data row (vs a key-value pair) in a tabular array. */ function isDataRow(content: string, delimiter: Delimiter): boolean { const colonPos = content.indexOf(COLON) const delimiterPos = content.indexOf(delimiter) // No colon = definitely a data row if (colonPos === -1) { return true } // Has delimiter and it comes before colon = data row if (delimiterPos !== -1 && delimiterPos < colonPos) { return true } // Colon before delimiter or no delimiter = key-value pair return false } // #endregion ================================================ FILE: packages/toon/src/encode/encoders.ts ================================================ import type { Depth, JsonArray, JsonObject, JsonPrimitive, JsonValue, ResolvedEncodeOptions } from '../types.ts' import { DOT, LIST_ITEM_MARKER, LIST_ITEM_PREFIX } from '../constants.ts' import { tryFoldKeyChain } from './folding.ts' import { isArrayOfArrays, isArrayOfObjects, isArrayOfPrimitives, isEmptyObject, isJsonArray, isJsonObject, isJsonPrimitive } from './normalize.ts' import { encodeAndJoinPrimitives, encodeKey, encodePrimitive, formatHeader } from './primitives.ts' // #region Encode normalized JsonValue export function* encodeJsonValue(value: JsonValue, options: ResolvedEncodeOptions, depth: Depth): Generator { if (isJsonPrimitive(value)) { // Primitives at root level are returned as a single line const encodedPrimitive = encodePrimitive(value, options.delimiter) if (encodedPrimitive !== '') yield encodedPrimitive return } if (isJsonArray(value)) { yield* encodeArrayLines(undefined, value, depth, options) } else if (isJsonObject(value)) { yield* encodeObjectLines(value, depth, options) } } // #endregion // #region Object encoding export function* encodeObjectLines( value: JsonObject, depth: Depth, options: ResolvedEncodeOptions, rootLiteralKeys?: Set, pathPrefix?: string, remainingDepth?: number, ): Generator { const keys = Object.keys(value) // At root level (depth 0), collect all literal dotted keys for collision checking if (depth === 0 && !rootLiteralKeys) { rootLiteralKeys = new Set(keys.filter(k => k.includes('.'))) } const effectiveFlattenDepth = remainingDepth ?? options.flattenDepth for (const [key, val] of Object.entries(value)) { yield* encodeKeyValuePairLines(key, val, depth, options, keys, rootLiteralKeys, pathPrefix, effectiveFlattenDepth) } } export function* encodeKeyValuePairLines( key: string, value: JsonValue, depth: Depth, options: ResolvedEncodeOptions, siblings?: readonly string[], rootLiteralKeys?: Set, pathPrefix?: string, flattenDepth?: number, ): Generator { const currentPath = pathPrefix ? `${pathPrefix}${DOT}${key}` : key const effectiveFlattenDepth = flattenDepth ?? options.flattenDepth // Attempt key folding when enabled if (options.keyFolding === 'safe' && siblings) { const foldResult = tryFoldKeyChain(key, value, siblings, options, rootLiteralKeys, pathPrefix, effectiveFlattenDepth) if (foldResult) { const { foldedKey, remainder, leafValue, segmentCount } = foldResult const encodedFoldedKey = encodeKey(foldedKey) // Case 1: Fully folded to a leaf value if (remainder === undefined) { // The folded chain ended at a leaf (primitive, array, or empty object) if (isJsonPrimitive(leafValue)) { yield indentedLine(depth, `${encodedFoldedKey}: ${encodePrimitive(leafValue, options.delimiter)}`, options.indent) return } else if (isJsonArray(leafValue)) { yield* encodeArrayLines(foldedKey, leafValue, depth, options) return } else if (isJsonObject(leafValue) && isEmptyObject(leafValue)) { yield indentedLine(depth, `${encodedFoldedKey}:`, options.indent) return } } // Case 2: Partially folded with a tail object if (isJsonObject(remainder)) { yield indentedLine(depth, `${encodedFoldedKey}:`, options.indent) // Calculate remaining depth budget (subtract segments already folded) const remainingDepth = effectiveFlattenDepth - segmentCount const foldedPath = pathPrefix ? `${pathPrefix}${DOT}${foldedKey}` : foldedKey yield* encodeObjectLines(remainder, depth + 1, options, rootLiteralKeys, foldedPath, remainingDepth) return } } } const encodedKey = encodeKey(key) if (isJsonPrimitive(value)) { yield indentedLine(depth, `${encodedKey}: ${encodePrimitive(value, options.delimiter)}`, options.indent) } else if (isJsonArray(value)) { yield* encodeArrayLines(key, value, depth, options) } else if (isJsonObject(value)) { yield indentedLine(depth, `${encodedKey}:`, options.indent) if (!isEmptyObject(value)) { yield* encodeObjectLines(value, depth + 1, options, rootLiteralKeys, currentPath, effectiveFlattenDepth) } } } // #endregion // #region Array encoding export function* encodeArrayLines( key: string | undefined, value: JsonArray, depth: Depth, options: ResolvedEncodeOptions, ): Generator { if (value.length === 0) { const header = formatHeader(0, { key, delimiter: options.delimiter }) yield indentedLine(depth, header, options.indent) return } // Primitive array if (isArrayOfPrimitives(value)) { const arrayLine = encodeInlineArrayLine(value, options.delimiter, key) yield indentedLine(depth, arrayLine, options.indent) return } // Array of arrays (all primitives) if (isArrayOfArrays(value)) { const allPrimitiveArrays = value.every(arr => isArrayOfPrimitives(arr)) if (allPrimitiveArrays) { yield* encodeArrayOfArraysAsListItemsLines(key, value, depth, options) return } } // Array of objects if (isArrayOfObjects(value)) { const header = extractTabularHeader(value) if (header) { yield* encodeArrayOfObjectsAsTabularLines(key, value, header, depth, options) } else { yield* encodeMixedArrayAsListItemsLines(key, value, depth, options) } return } // Mixed array: fallback to expanded format yield* encodeMixedArrayAsListItemsLines(key, value, depth, options) } // #endregion // #region Array of arrays (expanded format) export function* encodeArrayOfArraysAsListItemsLines( prefix: string | undefined, values: readonly JsonArray[], depth: Depth, options: ResolvedEncodeOptions, ): Generator { const header = formatHeader(values.length, { key: prefix, delimiter: options.delimiter }) yield indentedLine(depth, header, options.indent) for (const arr of values) { if (isArrayOfPrimitives(arr)) { const arrayLine = encodeInlineArrayLine(arr, options.delimiter) yield indentedListItem(depth + 1, arrayLine, options.indent) } } } export function encodeInlineArrayLine(values: readonly JsonPrimitive[], delimiter: string, prefix?: string): string { const header = formatHeader(values.length, { key: prefix, delimiter }) const joinedValue = encodeAndJoinPrimitives(values, delimiter) if (values.length === 0) return header return `${header} ${joinedValue}` } // #endregion // #region Array of objects (tabular format) export function* encodeArrayOfObjectsAsTabularLines( prefix: string | undefined, rows: readonly JsonObject[], header: readonly string[], depth: Depth, options: ResolvedEncodeOptions, ): Generator { const formattedHeader = formatHeader(rows.length, { key: prefix, fields: header, delimiter: options.delimiter }) yield indentedLine(depth, formattedHeader, options.indent) yield* writeTabularRowsLines(rows, header, depth + 1, options) } export function extractTabularHeader(rows: readonly JsonObject[]): string[] | undefined { if (rows.length === 0) return const firstRow = rows[0]! const firstKeys = Object.keys(firstRow) if (firstKeys.length === 0) return if (isTabularArray(rows, firstKeys)) { return firstKeys } } export function isTabularArray( rows: readonly JsonObject[], header: readonly string[], ): boolean { for (const row of rows) { const keys = Object.keys(row) // All objects must have the same keys (but order can differ) if (keys.length !== header.length) { return false } // Check that all header keys exist in the row and all values are primitives for (const key of header) { if (!(key in row)) { return false } if (!isJsonPrimitive(row[key])) { return false } } } return true } function* writeTabularRowsLines( rows: readonly JsonObject[], header: readonly string[], depth: Depth, options: ResolvedEncodeOptions, ): Generator { for (const row of rows) { const values = header.map(key => row[key]) const joinedValue = encodeAndJoinPrimitives(values as JsonPrimitive[], options.delimiter) yield indentedLine(depth, joinedValue, options.indent) } } // #endregion // #region Array of objects (expanded format) export function* encodeMixedArrayAsListItemsLines( prefix: string | undefined, items: readonly JsonValue[], depth: Depth, options: ResolvedEncodeOptions, ): Generator { const header = formatHeader(items.length, { key: prefix, delimiter: options.delimiter }) yield indentedLine(depth, header, options.indent) for (const item of items) { yield* encodeListItemValueLines(item, depth + 1, options) } } export function* encodeObjectAsListItemLines( obj: JsonObject, depth: Depth, options: ResolvedEncodeOptions, ): Generator { if (isEmptyObject(obj)) { yield indentedLine(depth, LIST_ITEM_MARKER, options.indent) return } const entries = Object.entries(obj) const [firstKey, firstValue] = entries[0]! const restEntries = entries.slice(1) // Check if first field is a tabular array if (isJsonArray(firstValue) && isArrayOfObjects(firstValue)) { const header = extractTabularHeader(firstValue) if (header) { // Tabular array as first field const formattedHeader = formatHeader(firstValue.length, { key: firstKey, fields: header, delimiter: options.delimiter }) yield indentedListItem(depth, formattedHeader, options.indent) yield* writeTabularRowsLines(firstValue, header, depth + 2, options) if (restEntries.length > 0) { const restObj: JsonObject = Object.fromEntries(restEntries) yield* encodeObjectLines(restObj, depth + 1, options) } return } } const encodedKey = encodeKey(firstKey) if (isJsonPrimitive(firstValue)) { // Primitive value: `- key: value` const encodedValue = encodePrimitive(firstValue, options.delimiter) yield indentedListItem(depth, `${encodedKey}: ${encodedValue}`, options.indent) } else if (isJsonArray(firstValue)) { if (firstValue.length === 0) { // Empty array: `- key[0]:` const header = formatHeader(0, { delimiter: options.delimiter }) yield indentedListItem(depth, `${encodedKey}${header}`, options.indent) } else if (isArrayOfPrimitives(firstValue)) { // Inline primitive array: `- key[N]: values` const arrayLine = encodeInlineArrayLine(firstValue, options.delimiter) yield indentedListItem(depth, `${encodedKey}${arrayLine}`, options.indent) } else { // Non-inline array: `- key[N]:` with items at depth + 2 const header = formatHeader(firstValue.length, { delimiter: options.delimiter }) yield indentedListItem(depth, `${encodedKey}${header}`, options.indent) for (const item of firstValue) { yield* encodeListItemValueLines(item, depth + 2, options) } } } else if (isJsonObject(firstValue)) { // Object value: `- key:` with fields at depth + 2 yield indentedListItem(depth, `${encodedKey}:`, options.indent) if (!isEmptyObject(firstValue)) { yield* encodeObjectLines(firstValue, depth + 2, options) } } if (restEntries.length > 0) { const restObj: JsonObject = Object.fromEntries(restEntries) yield* encodeObjectLines(restObj, depth + 1, options) } } // #endregion // #region List item encoding helpers function* encodeListItemValueLines( value: JsonValue, depth: Depth, options: ResolvedEncodeOptions, ): Generator { if (isJsonPrimitive(value)) { yield indentedListItem(depth, encodePrimitive(value, options.delimiter), options.indent) } else if (isJsonArray(value)) { if (isArrayOfPrimitives(value)) { const arrayLine = encodeInlineArrayLine(value, options.delimiter) yield indentedListItem(depth, arrayLine, options.indent) } else { const header = formatHeader(value.length, { delimiter: options.delimiter }) yield indentedListItem(depth, header, options.indent) for (const item of value) { yield* encodeListItemValueLines(item, depth + 1, options) } } } else if (isJsonObject(value)) { yield* encodeObjectAsListItemLines(value, depth, options) } } // #endregion // #region Indentation helpers function indentedLine(depth: Depth, content: string, indentSize: number): string { const indentation = ' '.repeat(indentSize * depth) return indentation + content } function indentedListItem(depth: Depth, content: string, indentSize: number): string { return indentedLine(depth, LIST_ITEM_PREFIX + content, indentSize) } // #endregion ================================================ FILE: packages/toon/src/encode/folding.ts ================================================ import type { JsonValue, ResolvedEncodeOptions } from '../types.ts' import { DOT } from '../constants.ts' import { isIdentifierSegment } from '../shared/validation.ts' import { isEmptyObject, isJsonObject } from './normalize.ts' // #region Key folding helpers /** * Result of attempting to fold a key chain. */ export interface FoldResult { /** * The folded key with dot-separated segments (e.g., "data.metadata.items") */ foldedKey: string /** * The remainder value after folding: * - `undefined` if the chain was fully folded to a leaf (primitive, array, or empty object) * - An object if the chain was partially folded (depth limit reached with nested tail) */ remainder?: JsonValue /** * The leaf value at the end of the folded chain. * Used to avoid redundant traversal when encoding the folded value. */ leafValue: JsonValue /** * The number of segments that were folded. * Used to calculate remaining depth budget for nested encoding. */ segmentCount: number } /** * Attempts to fold a single-key object chain into a dotted path. * * @remarks * Folding traverses nested objects with single keys, collapsing them into a dotted path. * It stops when: * - A non-single-key object is encountered * - An array is encountered (arrays are not "single-key objects") * - A primitive value is reached * - The flatten depth limit is reached * - Any segment fails safe mode validation * * Safe mode requirements: * - `options.keyFolding` must be `'safe'` * - Every segment must be a valid identifier (no dots, no special chars) * - The folded key must not collide with existing sibling keys * - No segment should require quoting * * @param key - The starting key to fold * @param value - The value associated with the key * @param siblings - Array of all sibling keys at this level (for collision detection) * @param options - Resolved encoding options * @returns A FoldResult if folding is possible, undefined otherwise */ export function tryFoldKeyChain( key: string, value: JsonValue, siblings: readonly string[], options: ResolvedEncodeOptions, rootLiteralKeys?: Set, pathPrefix?: string, flattenDepth?: number, ): FoldResult | undefined { // Only fold when safe mode is enabled if (options.keyFolding !== 'safe') { return undefined } // Can only fold objects if (!isJsonObject(value)) { return undefined } // Use provided flattenDepth or fall back to options default const effectiveFlattenDepth = flattenDepth ?? options.flattenDepth // Collect the chain of single-key objects const { segments, tail, leafValue } = collectSingleKeyChain(key, value, effectiveFlattenDepth) // Need at least 2 segments for folding to be worthwhile if (segments.length < 2) { return undefined } // Validate all segments are safe identifiers if (!segments.every(seg => isIdentifierSegment(seg))) { return undefined } // Build the folded key (relative to current nesting level) const foldedKey = buildFoldedKey(segments) // Build the absolute path from root const absolutePath = pathPrefix ? `${pathPrefix}${DOT}${foldedKey}` : foldedKey // Check for collision with existing literal sibling keys (at current level) if (siblings.includes(foldedKey)) { return undefined } // Check for collision with root-level literal dotted keys if (rootLiteralKeys && rootLiteralKeys.has(absolutePath)) { return undefined } return { foldedKey, remainder: tail, leafValue, segmentCount: segments.length, } } /** * Collects a chain of single-key objects into segments. * * @remarks * Traverses nested objects, collecting keys until: * - A non-single-key object is found * - An array is encountered * - A primitive is reached * - An empty object is reached * - The depth limit is reached * * @param startKey - The initial key to start the chain * @param startValue - The value to traverse * @param maxDepth - Maximum number of segments to collect * @returns Object containing segments array, tail value, and leaf value */ function collectSingleKeyChain( startKey: string, startValue: JsonValue, maxDepth: number, ): { segments: string[], tail: JsonValue | undefined, leafValue: JsonValue } { const segments: string[] = [startKey] let currentValue = startValue // Traverse nested single-key objects, collecting each key into segments array // Stop when we encounter: multi-key object, array, primitive, or depth limit while (segments.length < maxDepth) { // Must be an object to continue if (!isJsonObject(currentValue)) { break } const keys = Object.keys(currentValue) // Must have exactly one key to continue the chain if (keys.length !== 1) { break } const nextKey = keys[0]! const nextValue = currentValue[nextKey]! segments.push(nextKey) currentValue = nextValue } // Determine the tail if (!isJsonObject(currentValue) || isEmptyObject(currentValue)) { // Array, primitive, null, or empty object - this is a leaf value return { segments, tail: undefined, leafValue: currentValue } } // Has keys - return as tail (remainder) return { segments, tail: currentValue, leafValue: currentValue } } function buildFoldedKey(segments: readonly string[]): string { return segments.join(DOT) } // #endregion ================================================ FILE: packages/toon/src/encode/normalize.ts ================================================ import type { JsonArray, JsonObject, JsonPrimitive, JsonValue } from '../types.ts' // #region Normalization (unknown → JsonValue) export function normalizeValue(value: unknown): JsonValue { // null if (value === null) { return null } // Objects with toJSON: delegate to its result before host-type normalization if ( typeof value === 'object' && value !== null && 'toJSON' in value && typeof value.toJSON === 'function' ) { const next = value.toJSON() // Avoid infinite recursion when toJSON returns the same object if (next !== value) { return normalizeValue(next) } } // Primitives if (typeof value === 'string' || typeof value === 'boolean') { return value } // Numbers: canonicalize -0 to 0, handle NaN and Infinity if (typeof value === 'number') { if (Object.is(value, -0)) { return 0 } if (!Number.isFinite(value)) { return null } return value } // BigInt → number (if safe) or string if (typeof value === 'bigint') { // Try to convert to number if within safe integer range if (value >= Number.MIN_SAFE_INTEGER && value <= Number.MAX_SAFE_INTEGER) { return Number(value) } // Otherwise convert to string (will be quoted in output) return value.toString() } // Date → ISO string if (value instanceof Date) { return value.toISOString() } // Array if (Array.isArray(value)) { return value.map(normalizeValue) } // Set → array if (value instanceof Set) { return Array.from(value).map(normalizeValue) } // Map → object if (value instanceof Map) { return Object.fromEntries( Array.from(value, ([k, v]) => [String(k), normalizeValue(v)]), ) } // Plain object if (isPlainObject(value)) { const normalized: Record = {} for (const key in value) { if (Object.prototype.hasOwnProperty.call(value, key)) { normalized[key] = normalizeValue(value[key]) } } return normalized } // Fallback: function, symbol, undefined, or other → null return null } // #endregion // #region Type guards export function isJsonPrimitive(value: unknown): value is JsonPrimitive { return ( value === null || typeof value === 'string' || typeof value === 'number' || typeof value === 'boolean' ) } export function isJsonArray(value: unknown): value is JsonArray { return Array.isArray(value) } export function isJsonObject(value: unknown): value is JsonObject { return value !== null && typeof value === 'object' && !Array.isArray(value) } export function isEmptyObject(value: JsonObject): boolean { return Object.keys(value).length === 0 } export function isPlainObject(value: unknown): value is Record { if (value === null || typeof value !== 'object') { return false } const prototype = Object.getPrototypeOf(value) return prototype === null || prototype === Object.prototype } // #endregion // #region Array type detection export function isArrayOfPrimitives(value: JsonArray): value is readonly JsonPrimitive[] { return value.length === 0 || value.every(item => isJsonPrimitive(item)) } export function isArrayOfArrays(value: JsonArray): value is readonly JsonArray[] { return value.length === 0 || value.every(item => isJsonArray(item)) } export function isArrayOfObjects(value: JsonArray): value is readonly JsonObject[] { return value.length === 0 || value.every(item => isJsonObject(item)) } // #endregion ================================================ FILE: packages/toon/src/encode/primitives.ts ================================================ import type { JsonPrimitive } from '../types.ts' import { COMMA, DEFAULT_DELIMITER, DOUBLE_QUOTE, NULL_LITERAL } from '../constants.ts' import { escapeString } from '../shared/string-utils.ts' import { isSafeUnquoted, isValidUnquotedKey } from '../shared/validation.ts' // #region Primitive encoding export function encodePrimitive(value: JsonPrimitive, delimiter?: string): string { if (value === null) { return NULL_LITERAL } if (typeof value === 'boolean') { return String(value) } if (typeof value === 'number') { return String(value) } return encodeStringLiteral(value, delimiter) } export function encodeStringLiteral(value: string, delimiter: string = DEFAULT_DELIMITER): string { if (isSafeUnquoted(value, delimiter)) { return value } return `${DOUBLE_QUOTE}${escapeString(value)}${DOUBLE_QUOTE}` } // #endregion // #region Key encoding export function encodeKey(key: string): string { if (isValidUnquotedKey(key)) { return key } return `${DOUBLE_QUOTE}${escapeString(key)}${DOUBLE_QUOTE}` } // #endregion // #region Value joining export function encodeAndJoinPrimitives(values: readonly JsonPrimitive[], delimiter: string = DEFAULT_DELIMITER): string { return values.map(v => encodePrimitive(v, delimiter)).join(delimiter) } // #endregion // #region Header formatters export function formatHeader( length: number, options?: { key?: string fields?: readonly string[] delimiter?: string }, ): string { const key = options?.key const fields = options?.fields const delimiter = options?.delimiter ?? COMMA let header = '' if (key != null) { header += encodeKey(key) } // Only include delimiter if it's not the default (comma) header += `[${length}${delimiter !== DEFAULT_DELIMITER ? delimiter : ''}]` if (fields) { const quotedFields = fields.map(f => encodeKey(f)) header += `{${quotedFields.join(delimiter)}}` } header += ':' return header } // #endregion ================================================ FILE: packages/toon/src/encode/replacer.ts ================================================ import type { EncodeReplacer, JsonArray, JsonObject, JsonValue } from '../types.ts' import { isJsonArray, isJsonObject, normalizeValue } from './normalize.ts' /** * Applies a replacer function to a `JsonValue` and all its descendants. * * The replacer is called for: * - The root value (with key='', path=[]) * - Every object property (with the property name as key) * - Every array element (with the string index as key: '0', '1', etc.) * * @param root - The normalized `JsonValue` to transform * @param replacer - The replacer function to apply * @returns The transformed `JsonValue` */ export function applyReplacer(root: JsonValue, replacer: EncodeReplacer): JsonValue { // Call replacer on root with empty string key and empty path const replacedRoot = replacer('', root, []) // For root, undefined means "no change" (don't omit the root) if (replacedRoot === undefined) { return transformChildren(root, replacer, []) } // Normalize the replaced value (in case user returned non-JsonValue) const normalizedRoot = normalizeValue(replacedRoot) // Recursively transform children return transformChildren(normalizedRoot, replacer, []) } /** * Recursively transforms the children of a `JsonValue` using the replacer. * * @param value - The value whose children should be transformed * @param replacer - The replacer function to apply * @param path - Current path from root * @returns The value with transformed children */ function transformChildren( value: JsonValue, replacer: EncodeReplacer, path: readonly (string | number)[], ): JsonValue { if (isJsonObject(value)) { return transformObject(value, replacer, path) } if (isJsonArray(value)) { return transformArray(value, replacer, path) } // Primitives have no children return value } /** * Transforms an object by applying the replacer to each property. * * @param obj - The object to transform * @param replacer - The replacer function to apply * @param path - Current path from root * @returns A new object with transformed properties */ function transformObject( obj: JsonObject, replacer: EncodeReplacer, path: readonly (string | number)[], ): JsonObject { const result: Record = {} for (const [key, value] of Object.entries(obj)) { // Call replacer with the property key and current path const childPath = [...path, key] const replacedValue = replacer(key, value, childPath) // undefined means omit this property if (replacedValue === undefined) { continue } // Normalize the replaced value const normalizedValue = normalizeValue(replacedValue) // Recursively transform children of the replaced value result[key] = transformChildren(normalizedValue, replacer, childPath) } return result } /** * Transforms an array by applying the replacer to each element. * * @param arr - The array to transform * @param replacer - The replacer function to apply * @param path - Current path from root * @returns A new array with transformed elements */ function transformArray( arr: JsonArray, replacer: EncodeReplacer, path: readonly (string | number)[], ): JsonArray { const result: JsonValue[] = [] for (let i = 0; i < arr.length; i++) { const value = arr[i]! // Call replacer with string index (`'0'`, `'1'`, etc.) to match `JSON.stringify` behavior const childPath = [...path, i] const replacedValue = replacer(String(i), value, childPath) // undefined means omit this element if (replacedValue === undefined) { continue } // Normalize the replaced value const normalizedValue = normalizeValue(replacedValue) // Recursively transform children of the replaced value result.push(transformChildren(normalizedValue, replacer, childPath)) } return result } ================================================ FILE: packages/toon/src/index.ts ================================================ import type { DecodeOptions, DecodeStreamOptions, EncodeOptions, JsonStreamEvent, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions } from './types.ts' import { DEFAULT_DELIMITER } from './constants.ts' import { decodeStream as decodeStreamCore, decodeStreamSync as decodeStreamSyncCore } from './decode/decoders.ts' import { buildValueFromEvents } from './decode/event-builder.ts' import { expandPathsSafe } from './decode/expand.ts' import { encodeJsonValue } from './encode/encoders.ts' import { normalizeValue } from './encode/normalize.ts' import { applyReplacer } from './encode/replacer.ts' export { DEFAULT_DELIMITER, DELIMITERS } from './constants.ts' export type { DecodeOptions, DecodeStreamOptions, Delimiter, DelimiterKey, EncodeOptions, EncodeReplacer, JsonArray, JsonObject, JsonPrimitive, JsonStreamEvent, JsonValue, ResolvedDecodeOptions, ResolvedEncodeOptions, } from './types.ts' /** * Encodes a JavaScript value into TOON format string. * * @param input - Any JavaScript value (objects, arrays, primitives) * @param options - Optional encoding configuration * @returns TOON formatted string * * @example * ```ts * encode({ name: 'Alice', age: 30 }) * // name: Alice * // age: 30 * * encode({ users: [{ id: 1 }, { id: 2 }] }) * // users[]: * // - id: 1 * // - id: 2 * * encode(data, { indent: 4, keyFolding: 'safe' }) * ``` */ export function encode(input: unknown, options?: EncodeOptions): string { return Array.from(encodeLines(input, options)).join('\n') } /** * Decodes a TOON format string into a JavaScript value. * * @param input - TOON formatted string * @param options - Optional decoding configuration * @returns Parsed JavaScript value (object, array, or primitive) * * @example * ```ts * decode('name: Alice\nage: 30') * // { name: 'Alice', age: 30 } * * decode('users[]:\n - id: 1\n - id: 2') * // { users: [{ id: 1 }, { id: 2 }] } * * decode(toonString, { strict: false, expandPaths: 'safe' }) * ``` */ export function decode(input: string, options?: DecodeOptions): JsonValue { const lines = input.split('\n') return decodeFromLines(lines, options) } /** * Encodes a JavaScript value into TOON format as a sequence of lines. * * This function yields TOON lines one at a time without building the full string, * making it suitable for streaming large outputs to files, HTTP responses, or process stdout. * * @param input - Any JavaScript value (objects, arrays, primitives) * @param options - Optional encoding configuration * @returns Iterable of TOON lines (without trailing newlines) * * @example * ```ts * // Stream to stdout * for (const line of encodeLines({ name: 'Alice', age: 30 })) { * console.log(line) * } * * // Collect to array * const lines = Array.from(encodeLines(data)) * * // Equivalent to encode() * const toonString = Array.from(encodeLines(data, options)).join('\n') * ``` */ export function encodeLines(input: unknown, options?: EncodeOptions): Iterable { const normalizedValue = normalizeValue(input) const resolvedOptions = resolveOptions(options) // Apply replacer if provided const maybeReplacedValue = resolvedOptions.replacer ? applyReplacer(normalizedValue, resolvedOptions.replacer) : normalizedValue return encodeJsonValue(maybeReplacedValue, resolvedOptions, 0) } /** * Decodes TOON format from pre-split lines into a JavaScript value. * * This is a convenience wrapper around the streaming decoder that builds * the full value in memory. Useful when you already have lines as an array * or iterable and want the standard decode behavior with path expansion support. * * @param lines - Iterable of TOON lines (without newlines) * @param options - Optional decoding configuration (supports expandPaths) * @returns Parsed JavaScript value (object, array, or primitive) * * @example * ```ts * const lines = ['name: Alice', 'age: 30'] * decodeFromLines(lines) * // { name: 'Alice', age: 30 } * ``` */ export function decodeFromLines(lines: Iterable, options?: DecodeOptions): JsonValue { const resolvedOptions = resolveDecodeOptions(options) // Use streaming decoder without expandPaths const streamOptions: DecodeStreamOptions = { indent: resolvedOptions.indent, strict: resolvedOptions.strict, } const events = decodeStreamSyncCore(lines, streamOptions) const decodedValue = buildValueFromEvents(events) // Apply path expansion if enabled if (resolvedOptions.expandPaths === 'safe') { return expandPathsSafe(decodedValue, resolvedOptions.strict) } return decodedValue } /** * Synchronously decodes TOON lines into a stream of JSON events. * * This function yields structured events (startObject, endObject, startArray, endArray, * key, primitive) that represent the JSON data model without building the full value tree. * Useful for streaming processing, custom transformations, or memory-efficient parsing. * * @remarks * Path expansion (`expandPaths: 'safe'`) is not supported in streaming mode. * * @param lines - Iterable of TOON lines (without newlines) * @param options - Optional decoding configuration (expandPaths not supported) * @returns Iterable of JSON stream events * * @example * ```ts * const lines = ['name: Alice', 'age: 30'] * for (const event of decodeStreamSync(lines)) { * console.log(event) * // { type: 'startObject' } * // { type: 'key', key: 'name' } * // { type: 'primitive', value: 'Alice' } * // ... * } * ``` */ export function decodeStreamSync(lines: Iterable, options?: DecodeStreamOptions): Iterable { return decodeStreamSyncCore(lines, options) } /** * Asynchronously decodes TOON lines into a stream of JSON events. * * This function yields structured events (startObject, endObject, startArray, endArray, * key, primitive) that represent the JSON data model without building the full value tree. * Supports both sync and async iterables for maximum flexibility with file streams, * network responses, or other async sources. * * @remarks * Path expansion (`expandPaths: 'safe'`) is not supported in streaming mode. * * @param source - Async or sync iterable of TOON lines (without newlines) * @param options - Optional decoding configuration (expandPaths not supported) * @returns Async iterable of JSON stream events * * @example * ```ts * const fileStream = createReadStream('data.toon', 'utf-8') * const lines = splitLines(fileStream) // Async iterable of lines * * for await (const event of decodeStream(lines)) { * console.log(event) * // { type: 'startObject' } * // { type: 'key', key: 'name' } * // { type: 'primitive', value: 'Alice' } * // ... * } * ``` */ export function decodeStream( source: AsyncIterable | Iterable, options?: DecodeStreamOptions, ): AsyncIterable { return decodeStreamCore(source, options) } function resolveOptions(options?: EncodeOptions): ResolvedEncodeOptions { return { indent: options?.indent ?? 2, delimiter: options?.delimiter ?? DEFAULT_DELIMITER, keyFolding: options?.keyFolding ?? 'off', flattenDepth: options?.flattenDepth ?? Number.POSITIVE_INFINITY, replacer: options?.replacer, } } function resolveDecodeOptions(options?: DecodeOptions): ResolvedDecodeOptions { return { indent: options?.indent ?? 2, strict: options?.strict ?? true, expandPaths: options?.expandPaths ?? 'off', } } ================================================ FILE: packages/toon/src/shared/literal-utils.ts ================================================ import { FALSE_LITERAL, NULL_LITERAL, TRUE_LITERAL } from '../constants.ts' const NUMERIC_LITERAL_PATTERN = /^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:e[+-]?\d+)?$/i export function isBooleanOrNullLiteral(token: string): boolean { return token === TRUE_LITERAL || token === FALSE_LITERAL || token === NULL_LITERAL } /** * Checks if a token represents a valid numeric literal. * * @remarks * Rejects numbers with leading zeros (except `"0"` itself or decimals like `"0.5"`). */ export function isNumericLiteral(token: string): boolean { if (!token) return false // Enforce JSON-like grammar with no forbidden leading zeros if (!NUMERIC_LITERAL_PATTERN.test(token)) return false const numericValue = Number(token) return !Number.isNaN(numericValue) && Number.isFinite(numericValue) } ================================================ FILE: packages/toon/src/shared/string-utils.ts ================================================ import { BACKSLASH, CARRIAGE_RETURN, DOUBLE_QUOTE, NEWLINE, TAB } from '../constants.ts' /** * Escapes special characters in a string for encoding. * * @remarks * Handles backslashes, quotes, newlines, carriage returns, and tabs. */ export function escapeString(value: string): string { return value .replace(/\\/g, `${BACKSLASH}${BACKSLASH}`) .replace(/"/g, `${BACKSLASH}${DOUBLE_QUOTE}`) .replace(/\n/g, `${BACKSLASH}n`) .replace(/\r/g, `${BACKSLASH}r`) .replace(/\t/g, `${BACKSLASH}t`) } /** * Unescapes a string by processing escape sequences. * * @remarks * Handles `\n`, `\t`, `\r`, `\\`, and `\"` escape sequences. */ export function unescapeString(value: string): string { let unescaped = '' let i = 0 while (i < value.length) { if (value[i] === BACKSLASH) { if (i + 1 >= value.length) { throw new SyntaxError('Invalid escape sequence: backslash at end of string') } const next = value[i + 1] if (next === 'n') { unescaped += NEWLINE i += 2 continue } if (next === 't') { unescaped += TAB i += 2 continue } if (next === 'r') { unescaped += CARRIAGE_RETURN i += 2 continue } if (next === BACKSLASH) { unescaped += BACKSLASH i += 2 continue } if (next === DOUBLE_QUOTE) { unescaped += DOUBLE_QUOTE i += 2 continue } throw new SyntaxError(`Invalid escape sequence: \\${next}`) } unescaped += value[i] i++ } return unescaped } /** * Finds the index of the closing double quote, accounting for escape sequences. */ export function findClosingQuote(content: string, start: number): number { let i = start + 1 while (i < content.length) { if (content[i] === BACKSLASH && i + 1 < content.length) { // Skip escaped character i += 2 continue } if (content[i] === DOUBLE_QUOTE) { return i } i++ } return -1 // Not found } /** * Finds the index of a character outside of quoted sections. */ export function findUnquotedChar(content: string, char: string, start = 0): number { let inQuotes = false let i = start while (i < content.length) { if (content[i] === BACKSLASH && i + 1 < content.length && inQuotes) { // Skip escaped character i += 2 continue } if (content[i] === DOUBLE_QUOTE) { inQuotes = !inQuotes i++ continue } if (content[i] === char && !inQuotes) { return i } i++ } return -1 } ================================================ FILE: packages/toon/src/shared/validation.ts ================================================ import { DEFAULT_DELIMITER, LIST_ITEM_MARKER } from '../constants.ts' import { isBooleanOrNullLiteral } from './literal-utils.ts' const NUMERIC_LIKE_PATTERN = /^-?\d+(?:\.\d+)?(?:e[+-]?\d+)?$/i const LEADING_ZERO_PATTERN = /^0\d+$/ /** * Checks if a key can be used without quotes. * * @remarks * Valid unquoted keys must start with a letter or underscore, * followed by letters, digits, underscores, or dots. */ export function isValidUnquotedKey(key: string): boolean { return /^[A-Z_][\w.]*$/i.test(key) } /** * Checks if a key segment is a valid identifier for safe folding/expansion. * * @remarks * Identifier segments are more restrictive than unquoted keys: * - Must start with a letter or underscore * - Followed only by letters, digits, or underscores (no dots) * - Used for safe key folding and path expansion */ export function isIdentifierSegment(key: string): boolean { return /^[A-Z_]\w*$/i.test(key) } /** * Determines if a string value can be safely encoded without quotes. * * @remarks * A string needs quoting if it: * - Is empty * - Has leading or trailing whitespace * - Could be confused with a literal (boolean, null, number) * - Contains structural characters (colons, brackets, braces) * - Contains quotes or backslashes (need escaping) * - Contains control characters (newlines, tabs, etc.) * - Contains the active delimiter * - Starts with a list marker (hyphen) */ export function isSafeUnquoted(value: string, delimiter: string = DEFAULT_DELIMITER): boolean { if (!value) { return false } if (value !== value.trim()) { return false } // Check if it looks like any literal value (boolean, null, or numeric) if (isBooleanOrNullLiteral(value) || isNumericLike(value)) { return false } // Check for colon (always structural) if (value.includes(':')) { return false } // Check for quotes and backslash (always need escaping) if (value.includes('"') || value.includes('\\')) { return false } // Check for brackets and braces (always structural) if (/[[\]{}]/.test(value)) { return false } // Check for control characters (newline, carriage return, tab - always need quoting/escaping) if (/[\n\r\t]/.test(value)) { return false } // Check for the active delimiter if (value.includes(delimiter)) { return false } // Check for hyphen at start (list marker) if (value.startsWith(LIST_ITEM_MARKER)) { return false } return true } /** * Checks if a string looks like a number. * * @remarks * Match numbers like `42`, `-3.14`, `1e-6`, `05`, etc. */ function isNumericLike(value: string): boolean { return NUMERIC_LIKE_PATTERN.test(value) || LEADING_ZERO_PATTERN.test(value) } ================================================ FILE: packages/toon/src/types.ts ================================================ // #region JSON types import type { Delimiter, DelimiterKey } from './constants.ts' export type JsonPrimitive = string | number | boolean | null export type JsonObject = { [Key in string]: JsonValue } & { [Key in string]?: JsonValue | undefined } export type JsonArray = JsonValue[] | readonly JsonValue[] export type JsonValue = JsonPrimitive | JsonObject | JsonArray // #endregion // #region Encoder options export type { Delimiter, DelimiterKey } /** * A function that transforms or filters values during encoding. * * Called for every value (root, object properties, array elements) during the encoding process. * Similar to `JSON.stringify`'s replacer, but with path tracking. * * @param key - The property key or array index (as string). Empty string (`''`) for root value. * @param value - The normalized `JsonValue` at this location. * @param path - Array representing the path from root to this value. * * @returns The replacement value (will be normalized again), or `undefined` to omit. * For root value, returning `undefined` means "no change" (don't omit root). * * @example * ```ts * // Remove password fields * const replacer = (key, value) => { * if (key === 'password') return undefined * return value * } * * // Add timestamps * const replacer = (key, value, path) => { * if (path.length === 0 && typeof value === 'object' && value !== null) { * return { ...value, _timestamp: Date.now() } * } * return value * } * ``` */ export type EncodeReplacer = ( key: string, value: JsonValue, path: readonly (string | number)[], ) => unknown export interface EncodeOptions { /** * Number of spaces per indentation level. * @default 2 */ indent?: number /** * Delimiter to use for tabular array rows and inline primitive arrays. * @default DELIMITERS.comma */ delimiter?: Delimiter /** * Enable key folding to collapse single-key wrapper chains. * When set to 'safe', nested objects with single keys are collapsed into dotted paths * (e.g., data.metadata.items instead of nested indentation). * @default 'off' */ keyFolding?: 'off' | 'safe' /** * Maximum number of segments to fold when keyFolding is enabled. * Controls how deep the folding can go in single-key chains. * Values 0 or 1 have no practical effect (treated as effectively disabled). * @default Infinity */ flattenDepth?: number /** * A function to transform or filter values during encoding. * Called for the root value and every nested property/element. * Return `undefined` to omit properties/elements (root cannot be omitted). * @default undefined */ replacer?: EncodeReplacer } export type ResolvedEncodeOptions = Readonly>> & Pick // #endregion // #region Decoder options export interface DecodeOptions { /** * Number of spaces per indentation level. * @default 2 */ indent?: number /** * When true, enforce strict validation of array lengths and tabular row counts. * @default true */ strict?: boolean /** * Enable path expansion to reconstruct dotted keys into nested objects. * When set to 'safe', keys containing dots are expanded into nested structures * if all segments are valid identifiers (e.g., data.metadata.items becomes nested objects). * Pairs with keyFolding='safe' for lossless round-trips. * @default 'off' */ expandPaths?: 'off' | 'safe' } export type ResolvedDecodeOptions = Readonly> /** * Options for streaming decode operations. * * @remarks * Path expansion is not supported in streaming mode. */ export interface DecodeStreamOptions extends Omit { /** * Path expansion is not supported in streaming decode. * This option is explicitly omitted. */ expandPaths?: never } // #endregion // #region Streaming decoder types export type JsonStreamEvent = | { type: 'startObject' } | { type: 'endObject' } | { type: 'startArray', length: number } | { type: 'endArray' } | { type: 'key', key: string, wasQuoted?: boolean } | { type: 'primitive', value: JsonPrimitive } // #endregion // #region Decoder parsing types export interface ArrayHeaderInfo { key?: string length: number delimiter: Delimiter fields?: string[] } export interface ParsedLine { raw: string depth: Depth indent: number content: string lineNumber: number } export interface BlankLineInfo { lineNumber: number indent: number depth: Depth } // #endregion export type Depth = number ================================================ FILE: packages/toon/test/decode.test.ts ================================================ import type { Fixtures } from './types' import arraysNested from '@toon-format/spec/tests/fixtures/decode/arrays-nested.json' import arraysPrimitive from '@toon-format/spec/tests/fixtures/decode/arrays-primitive.json' import arraysTabular from '@toon-format/spec/tests/fixtures/decode/arrays-tabular.json' import blankLines from '@toon-format/spec/tests/fixtures/decode/blank-lines.json' import delimiters from '@toon-format/spec/tests/fixtures/decode/delimiters.json' import indentationErrors from '@toon-format/spec/tests/fixtures/decode/indentation-errors.json' import numbers from '@toon-format/spec/tests/fixtures/decode/numbers.json' import objects from '@toon-format/spec/tests/fixtures/decode/objects.json' import pathExpansion from '@toon-format/spec/tests/fixtures/decode/path-expansion.json' import primitives from '@toon-format/spec/tests/fixtures/decode/primitives.json' import rootForm from '@toon-format/spec/tests/fixtures/decode/root-form.json' import validationErrors from '@toon-format/spec/tests/fixtures/decode/validation-errors.json' import whitespace from '@toon-format/spec/tests/fixtures/decode/whitespace.json' import { describe, expect, it } from 'vitest' import { decode } from '../src/index' const fixtureFiles = [ primitives, numbers, objects, arraysPrimitive, arraysTabular, arraysNested, pathExpansion, delimiters, whitespace, rootForm, validationErrors, indentationErrors, blankLines, ] as Fixtures[] for (const fixtures of fixtureFiles) { describe(fixtures.description, () => { for (const test of fixtures.tests) { it(test.name, () => { if (test.shouldError) { expect(() => decode(test.input as string, test.options)) .toThrow() } else { const result = decode(test.input as string, test.options) expect(result).toEqual(test.expected) } }) } }) } ================================================ FILE: packages/toon/test/decodeStream.test.ts ================================================ import { describe, expect, it } from 'vitest' import { buildValueFromEvents } from '../src/decode/event-builder' import { decode, decodeFromLines, decodeStreamSync } from '../src/index' describe('streaming decode', () => { describe('decodeStreamSync', () => { it('decode simple object', () => { const input = 'name: Alice\nage: 30' const lines = input.split('\n') const events = Array.from(decodeStreamSync(lines)) expect(events).toEqual([ { type: 'startObject' }, { type: 'key', key: 'name' }, { type: 'primitive', value: 'Alice' }, { type: 'key', key: 'age' }, { type: 'primitive', value: 30 }, { type: 'endObject' }, ]) }) it('decode nested object', () => { const input = 'user:\n name: Alice\n age: 30' const lines = input.split('\n') const events = Array.from(decodeStreamSync(lines)) expect(events).toEqual([ { type: 'startObject' }, { type: 'key', key: 'user' }, { type: 'startObject' }, { type: 'key', key: 'name' }, { type: 'primitive', value: 'Alice' }, { type: 'key', key: 'age' }, { type: 'primitive', value: 30 }, { type: 'endObject' }, { type: 'endObject' }, ]) }) it('decode inline primitive array', () => { const input = 'scores[3]: 95, 87, 92' const lines = input.split('\n') const events = Array.from(decodeStreamSync(lines)) expect(events).toEqual([ { type: 'startObject' }, { type: 'key', key: 'scores' }, { type: 'startArray', length: 3 }, { type: 'primitive', value: 95 }, { type: 'primitive', value: 87 }, { type: 'primitive', value: 92 }, { type: 'endArray' }, { type: 'endObject' }, ]) }) it('decode inline array with empty string key', () => { const input = '""[2]: 1,2' const lines = input.split('\n') const events = Array.from(decodeStreamSync(lines)) expect(events).toEqual([ { type: 'startObject' }, { type: 'key', key: '' }, { type: 'startArray', length: 2 }, { type: 'primitive', value: 1 }, { type: 'primitive', value: 2 }, { type: 'endArray' }, { type: 'endObject' }, ]) }) it('decode list array', () => { const input = 'items[2]:\n - Apple\n - Banana' const lines = input.split('\n') const events = Array.from(decodeStreamSync(lines)) expect(events).toEqual([ { type: 'startObject' }, { type: 'key', key: 'items' }, { type: 'startArray', length: 2 }, { type: 'primitive', value: 'Apple' }, { type: 'primitive', value: 'Banana' }, { type: 'endArray' }, { type: 'endObject' }, ]) }) it('decode tabular array', () => { const input = 'users[2]{name,age}:\n Alice, 30\n Bob, 25' const lines = input.split('\n') const events = Array.from(decodeStreamSync(lines)) expect(events).toEqual([ { type: 'startObject' }, { type: 'key', key: 'users' }, { type: 'startArray', length: 2 }, { type: 'startObject' }, { type: 'key', key: 'name' }, { type: 'primitive', value: 'Alice' }, { type: 'key', key: 'age' }, { type: 'primitive', value: 30 }, { type: 'endObject' }, { type: 'startObject' }, { type: 'key', key: 'name' }, { type: 'primitive', value: 'Bob' }, { type: 'key', key: 'age' }, { type: 'primitive', value: 25 }, { type: 'endObject' }, { type: 'endArray' }, { type: 'endObject' }, ]) }) it('decode root primitive', () => { const input = 'Hello World' const lines = input.split('\n') const events = Array.from(decodeStreamSync(lines)) expect(events).toEqual([ { type: 'primitive', value: 'Hello World' }, ]) }) it('decode root array', () => { const input = '[2]:\n - Apple\n - Banana' const lines = input.split('\n') const events = Array.from(decodeStreamSync(lines)) expect(events).toEqual([ { type: 'startArray', length: 2 }, { type: 'primitive', value: 'Apple' }, { type: 'primitive', value: 'Banana' }, { type: 'endArray' }, ]) }) it('decode empty input as empty object', () => { const lines: string[] = [] const events = Array.from(decodeStreamSync(lines)) expect(events).toEqual([ { type: 'startObject' }, { type: 'endObject' }, ]) }) it('throw on expandPaths option', () => { const input = 'name: Alice' const lines = input.split('\n') expect(() => Array.from(decodeStreamSync(lines, { expandPaths: 'safe' } as any))) .toThrow('expandPaths is not supported in streaming decode') }) it('enforce strict mode validation', () => { const input = 'items[2]:\n - Apple' const lines = input.split('\n') expect(() => Array.from(decodeStreamSync(lines, { strict: true }))) .toThrow() }) it('allow count mismatch in non-strict mode', () => { const input = 'items[2]:\n - Apple' const lines = input.split('\n') // Should not throw in non-strict mode const events = Array.from(decodeStreamSync(lines, { strict: false })) expect(events).toBeDefined() expect(events[0]).toEqual({ type: 'startObject' }) }) }) describe('buildValueFromEvents', () => { it('build object from events', () => { const events = [ { type: 'startObject' as const }, { type: 'key' as const, key: 'name' }, { type: 'primitive' as const, value: 'Alice' }, { type: 'key' as const, key: 'age' }, { type: 'primitive' as const, value: 30 }, { type: 'endObject' as const }, ] const result = buildValueFromEvents(events) expect(result).toEqual({ name: 'Alice', age: 30 }) }) it('build nested object from events', () => { const events = [ { type: 'startObject' as const }, { type: 'key' as const, key: 'user' }, { type: 'startObject' as const }, { type: 'key' as const, key: 'name' }, { type: 'primitive' as const, value: 'Alice' }, { type: 'endObject' as const }, { type: 'endObject' as const }, ] const result = buildValueFromEvents(events) expect(result).toEqual({ user: { name: 'Alice' } }) }) it('build array from events', () => { const events = [ { type: 'startArray' as const, length: 3 }, { type: 'primitive' as const, value: 1 }, { type: 'primitive' as const, value: 2 }, { type: 'primitive' as const, value: 3 }, { type: 'endArray' as const }, ] const result = buildValueFromEvents(events) expect(result).toEqual([1, 2, 3]) }) it('build primitive from events', () => { const events = [ { type: 'primitive' as const, value: 'Hello' }, ] const result = buildValueFromEvents(events) expect(result).toEqual('Hello') }) it('throw on incomplete event stream', () => { const events = [ { type: 'startObject' as const }, { type: 'key' as const, key: 'name' }, // Missing primitive and `endObject` ] expect(() => buildValueFromEvents(events)) .toThrow('Incomplete event stream') }) }) describe('decodeFromLines', () => { it('produce same result as decode', () => { const input = 'name: Alice\nage: 30\nscores[3]: 95, 87, 92' const lines = input.split('\n') const fromLines = decodeFromLines(lines) const fromString = decode(input) expect(fromLines).toEqual(fromString) }) it('support expandPaths option', () => { const input = 'user.name: Alice\nuser.age: 30' const lines = input.split('\n') const result = decodeFromLines(lines, { expandPaths: 'safe' }) expect(result).toEqual({ user: { name: 'Alice', age: 30, }, }) }) it('handle complex nested structures', () => { const input = [ 'users[2]:', ' - name: Alice', ' scores[3]: 95, 87, 92', ' - name: Bob', ' scores[3]: 88, 91, 85', ].join('\n') const fromLines = decodeFromLines(input.split('\n')) const fromString = decode(input) expect(fromLines).toEqual(fromString) expect(fromLines).toEqual({ users: [ { name: 'Alice', scores: [95, 87, 92] }, { name: 'Bob', scores: [88, 91, 85] }, ], }) }) it('handle tabular arrays', () => { const input = [ 'users[3]{name,age,city}:', ' Alice, 30, NYC', ' Bob, 25, LA', ' Charlie, 35, SF', ].join('\n') const fromLines = decodeFromLines(input.split('\n')) const fromString = decode(input) expect(fromLines).toEqual(fromString) expect(fromLines).toEqual({ users: [ { name: 'Alice', age: 30, city: 'NYC' }, { name: 'Bob', age: 25, city: 'LA' }, { name: 'Charlie', age: 35, city: 'SF' }, ], }) }) it('handle list item objects with empty string keyed tabular fields', () => { const input = [ 'items[1]:', ' - ""[2]{a}:', ' 1', ' 2', ].join('\n') const fromLines = decodeFromLines(input.split('\n')) const fromString = decode(input) expect(fromLines).toEqual(fromString) expect(fromLines).toEqual({ items: [{ '': [{ a: 1 }, { a: 2 }] }], }) }) }) describe('streaming equivalence', () => { const testCases = [ { name: 'simple object', input: 'name: Alice\nage: 30', }, { name: 'nested objects', input: 'user:\n profile:\n name: Alice\n age: 30', }, { name: 'mixed structures', input: 'name: Alice\nscores[3]: 95, 87, 92\naddress:\n city: NYC\n zip: 10001', }, { name: 'list array with objects', input: 'users[2]:\n - name: Alice\n age: 30\n - name: Bob\n age: 25', }, { name: 'root primitive number', input: '42', }, { name: 'root primitive string', input: 'Hello World', }, { name: 'root primitive boolean', input: 'true', }, { name: 'root primitive null', input: 'null', }, ] for (const testCase of testCases) { it(`should match decode() for: ${testCase.name}`, () => { const lines = testCase.input.split('\n') const streamResult = decodeFromLines(lines) const regularResult = decode(testCase.input) expect(streamResult).toEqual(regularResult) }) } }) }) ================================================ FILE: packages/toon/test/decodeStreamAsync.test.ts ================================================ import { describe, expect, it } from 'vitest' import { buildValueFromEventsAsync } from '../src/decode/event-builder' import { decodeStream } from '../src/index' describe('async streaming decode', () => { describe('decodeStream (async)', () => { it('decodes simple object', async () => { const input = 'name: Alice\nage: 30' const lines = input.split('\n') const events = await collect(decodeStream(asyncLines(lines))) expect(events).toEqual([ { type: 'startObject' }, { type: 'key', key: 'name' }, { type: 'primitive', value: 'Alice' }, { type: 'key', key: 'age' }, { type: 'primitive', value: 30 }, { type: 'endObject' }, ]) }) it('decodes nested object', async () => { const input = 'user:\n name: Alice\n age: 30' const lines = input.split('\n') const events = await collect(decodeStream(asyncLines(lines))) expect(events).toEqual([ { type: 'startObject' }, { type: 'key', key: 'user' }, { type: 'startObject' }, { type: 'key', key: 'name' }, { type: 'primitive', value: 'Alice' }, { type: 'key', key: 'age' }, { type: 'primitive', value: 30 }, { type: 'endObject' }, { type: 'endObject' }, ]) }) it('decodes inline primitive array', async () => { const input = 'scores[3]: 95, 87, 92' const lines = input.split('\n') const events = await collect(decodeStream(asyncLines(lines))) expect(events).toEqual([ { type: 'startObject' }, { type: 'key', key: 'scores' }, { type: 'startArray', length: 3 }, { type: 'primitive', value: 95 }, { type: 'primitive', value: 87 }, { type: 'primitive', value: 92 }, { type: 'endArray' }, { type: 'endObject' }, ]) }) it('decodes inline array with empty string key', async () => { const input = '""[2]: 1,2' const lines = input.split('\n') const events = await collect(decodeStream(asyncLines(lines))) expect(events).toEqual([ { type: 'startObject' }, { type: 'key', key: '' }, { type: 'startArray', length: 2 }, { type: 'primitive', value: 1 }, { type: 'primitive', value: 2 }, { type: 'endArray' }, { type: 'endObject' }, ]) }) it('decodes list array', async () => { const input = 'items[2]:\n - Apple\n - Banana' const lines = input.split('\n') const events = await collect(decodeStream(asyncLines(lines))) expect(events).toEqual([ { type: 'startObject' }, { type: 'key', key: 'items' }, { type: 'startArray', length: 2 }, { type: 'primitive', value: 'Apple' }, { type: 'primitive', value: 'Banana' }, { type: 'endArray' }, { type: 'endObject' }, ]) }) it('decodes tabular array', async () => { const input = 'users[2]{name,age}:\n Alice, 30\n Bob, 25' const lines = input.split('\n') const events = await collect(decodeStream(asyncLines(lines))) expect(events).toEqual([ { type: 'startObject' }, { type: 'key', key: 'users' }, { type: 'startArray', length: 2 }, { type: 'startObject' }, { type: 'key', key: 'name' }, { type: 'primitive', value: 'Alice' }, { type: 'key', key: 'age' }, { type: 'primitive', value: 30 }, { type: 'endObject' }, { type: 'startObject' }, { type: 'key', key: 'name' }, { type: 'primitive', value: 'Bob' }, { type: 'key', key: 'age' }, { type: 'primitive', value: 25 }, { type: 'endObject' }, { type: 'endArray' }, { type: 'endObject' }, ]) }) it('decodes root primitive', async () => { const input = 'Hello World' const lines = input.split('\n') const events = await collect(decodeStream(asyncLines(lines))) expect(events).toEqual([ { type: 'primitive', value: 'Hello World' }, ]) }) it('decodes root array', async () => { const input = '[2]:\n - Apple\n - Banana' const lines = input.split('\n') const events = await collect(decodeStream(asyncLines(lines))) expect(events).toEqual([ { type: 'startArray', length: 2 }, { type: 'primitive', value: 'Apple' }, { type: 'primitive', value: 'Banana' }, { type: 'endArray' }, ]) }) it('decodes empty input as empty object', async () => { const lines: string[] = [] const events = await collect(decodeStream(asyncLines(lines))) expect(events).toEqual([ { type: 'startObject' }, { type: 'endObject' }, ]) }) it('throws on expandPaths option', async () => { const input = 'name: Alice' const lines = input.split('\n') await expect(async () => { await collect(decodeStream(asyncLines(lines), { expandPaths: 'safe' } as any)) }).rejects.toThrow('expandPaths is not supported in streaming decode') }) it('enforces strict mode validation', async () => { const input = 'items[2]:\n - Apple' const lines = input.split('\n') await expect(async () => { await collect(decodeStream(asyncLines(lines), { strict: true })) }).rejects.toThrow() }) it('allows count mismatch in non-strict mode', async () => { const input = 'items[2]:\n - Apple' const lines = input.split('\n') // Should not throw in non-strict mode const events = await collect(decodeStream(asyncLines(lines), { strict: false })) expect(events).toBeDefined() expect(events[0]).toEqual({ type: 'startObject' }) }) it('decodes list item objects with empty string keyed tabular fields', async () => { const input = 'items[1]:\n - ""[2]{a}:\n 1\n 2' const lines = input.split('\n') const events = await collect(decodeStream(asyncLines(lines))) await expect(buildValueFromEventsAsync(asyncEvents(events))).resolves.toEqual({ items: [{ '': [{ a: 1 }, { a: 2 }] }], }) }) }) describe('buildValueFromEventsAsync', () => { it('builds object from events', async () => { const events = [ { type: 'startObject' as const }, { type: 'key' as const, key: 'name' }, { type: 'primitive' as const, value: 'Alice' }, { type: 'key' as const, key: 'age' }, { type: 'primitive' as const, value: 30 }, { type: 'endObject' as const }, ] const result = await buildValueFromEventsAsync(asyncEvents(events)) expect(result).toEqual({ name: 'Alice', age: 30 }) }) it('builds nested object from events', async () => { const events = [ { type: 'startObject' as const }, { type: 'key' as const, key: 'user' }, { type: 'startObject' as const }, { type: 'key' as const, key: 'name' }, { type: 'primitive' as const, value: 'Alice' }, { type: 'endObject' as const }, { type: 'endObject' as const }, ] const result = await buildValueFromEventsAsync(asyncEvents(events)) expect(result).toEqual({ user: { name: 'Alice' } }) }) it('builds array from events', async () => { const events = [ { type: 'startArray' as const, length: 3 }, { type: 'primitive' as const, value: 1 }, { type: 'primitive' as const, value: 2 }, { type: 'primitive' as const, value: 3 }, { type: 'endArray' as const }, ] const result = await buildValueFromEventsAsync(asyncEvents(events)) expect(result).toEqual([1, 2, 3]) }) it('builds primitive from events', async () => { const events = [ { type: 'primitive' as const, value: 'Hello' }, ] const result = await buildValueFromEventsAsync(asyncEvents(events)) expect(result).toEqual('Hello') }) it('throws on incomplete event stream', async () => { const events = [ { type: 'startObject' as const }, { type: 'key' as const, key: 'name' }, // Missing primitive and `endObject` ] await expect(async () => { await buildValueFromEventsAsync(asyncEvents(events)) }).rejects.toThrow('Incomplete event stream') }) }) }) /** * Collects all items from an async iterable into an array. */ async function collect(iterable: AsyncIterable): Promise { const results: T[] = [] for await (const item of iterable) { results.push(item) } return results } /** * Converts array of lines to async iterable. */ async function* asyncLines(lines: string[]): AsyncGenerator { for (const line of lines) { await Promise.resolve() yield line } } /** * Converts array of events to async iterable. */ async function* asyncEvents(events: T[]): AsyncGenerator { for (const event of events) { await Promise.resolve() yield event } } ================================================ FILE: packages/toon/test/encode.test.ts ================================================ import type { ResolvedEncodeOptions } from '../src/types' import type { Fixtures, TestCase } from './types' import arraysNested from '@toon-format/spec/tests/fixtures/encode/arrays-nested.json' import arraysObjects from '@toon-format/spec/tests/fixtures/encode/arrays-objects.json' import arraysPrimitive from '@toon-format/spec/tests/fixtures/encode/arrays-primitive.json' import arraysTabular from '@toon-format/spec/tests/fixtures/encode/arrays-tabular.json' import delimiters from '@toon-format/spec/tests/fixtures/encode/delimiters.json' import keyFolding from '@toon-format/spec/tests/fixtures/encode/key-folding.json' import objects from '@toon-format/spec/tests/fixtures/encode/objects.json' import primitives from '@toon-format/spec/tests/fixtures/encode/primitives.json' import whitespace from '@toon-format/spec/tests/fixtures/encode/whitespace.json' import { describe, expect, it } from 'vitest' import { DEFAULT_DELIMITER, encode } from '../src/index' const fixtureFiles = [ primitives, objects, arraysPrimitive, arraysTabular, arraysNested, arraysObjects, keyFolding, delimiters, whitespace, ] as Fixtures[] for (const fixtures of fixtureFiles) { describe(fixtures.description, () => { for (const test of fixtures.tests) { it(test.name, () => { const resolvedOptions = resolveEncodeOptions(test.options) if (test.shouldError) { expect(() => encode(test.input, resolvedOptions)) .toThrow() } else { const result = encode(test.input, resolvedOptions) expect(result).toBe(test.expected) } }) } }) } function resolveEncodeOptions(options?: TestCase['options']): ResolvedEncodeOptions { return { indent: options?.indent ?? 2, delimiter: options?.delimiter ?? DEFAULT_DELIMITER, keyFolding: options?.keyFolding ?? 'off', flattenDepth: options?.flattenDepth ?? Number.POSITIVE_INFINITY, } } ================================================ FILE: packages/toon/test/encodeLines.test.ts ================================================ import { describe, expect, it } from 'vitest' import { encodeLines } from '../src/index' describe('encodeLines', () => { it('yield lines without newline characters', () => { const value = { name: 'Alice', age: 30, city: 'Paris' } const lines = Array.from(encodeLines(value)) for (const line of lines) { expect(line).not.toContain('\n') } }) it('yield zero lines for empty object', () => { const lines = Array.from(encodeLines({})) expect(lines.length).toBe(0) }) it('be iterable with for-of loop', () => { const value = { x: 10, y: 20 } const collectedLines: string[] = [] for (const line of encodeLines(value)) { collectedLines.push(line) } expect(collectedLines.length).toBe(2) expect(collectedLines[0]).toBe('x: 10') expect(collectedLines[1]).toBe('y: 20') }) it('not have trailing spaces in lines', () => { const value = { user: { name: 'Alice', tags: ['a', 'b'], nested: { deep: 'value', }, }, } const lines = Array.from(encodeLines(value)) for (const line of lines) { expect(line).not.toMatch(/\s$/) } }) it('yield correct number of lines', () => { const value = { a: 1, b: 2, c: 3 } const lines = Array.from(encodeLines(value)) expect(lines.length).toBe(3) }) }) ================================================ FILE: packages/toon/test/normalization.test.ts ================================================ /* eslint-disable test/prefer-lowercase-title */ import type { EncodeReplacer } from '../src/index' import { describe, expect, it } from 'vitest' import { decode, encode } from '../src/index' describe('JavaScript-specific type normalization', () => { describe('BigInt normalization', () => { it('converts BigInt within safe integer range to number', () => { const result = encode(BigInt(123)) expect(result).toBe('123') }) it('converts BigInt at MAX_SAFE_INTEGER boundary to number', () => { const result = encode(BigInt(Number.MAX_SAFE_INTEGER)) expect(result).toBe('9007199254740991') }) it('converts BigInt beyond safe integer range to quoted string', () => { const result = encode(BigInt('9007199254740992')) expect(result).toBe('"9007199254740992"') }) it('converts large BigInt to quoted decimal string', () => { const result = encode(BigInt('12345678901234567890')) expect(result).toBe('"12345678901234567890"') }) }) describe('Date normalization', () => { it('converts Date to ISO 8601 quoted string', () => { const result = encode(new Date('2025-01-01T00:00:00.000Z')) expect(result).toBe('"2025-01-01T00:00:00.000Z"') }) it('converts Date with milliseconds to ISO quoted string', () => { const result = encode(new Date('2025-11-05T12:34:56.789Z')) expect(result).toBe('"2025-11-05T12:34:56.789Z"') }) }) describe('Set normalization', () => { it('converts Set to array', () => { const input = new Set(['a', 'b', 'c']) const encoded = encode(input) const decoded = decode(encoded) expect(decoded).toEqual(['a', 'b', 'c']) }) it('converts empty Set to empty array', () => { const result = encode(new Set()) expect(result).toBe('[0]:') }) }) describe('Map normalization', () => { it('converts Map to object', () => { const input = new Map([['key1', 'value1'], ['key2', 'value2']]) const encoded = encode(input) const decoded = decode(encoded) expect(decoded).toEqual({ key1: 'value1', key2: 'value2' }) }) it('converts empty Map to empty object', () => { const input = new Map() const result = encode(input) expect(result).toBe('') }) it('converts Map with numeric keys to object with quoted string keys', () => { const input = new Map([[1, 'one'], [2, 'two']]) const result = encode(input) expect(result).toBe('"1": one\n"2": two') }) }) describe('undefined, function, and Symbol normalization', () => { it('converts undefined to null', () => { const result = encode(undefined) expect(result).toBe('null') }) it('converts function to null', () => { const result = encode(() => {}) expect(result).toBe('null') }) it('converts Symbol to null', () => { const result = encode(Symbol('test')) expect(result).toBe('null') }) }) describe('NaN and Infinity normalization', () => { it('converts NaN to null', () => { const result = encode(Number.NaN) expect(result).toBe('null') }) it('converts Infinity to null', () => { const result = encode(Number.POSITIVE_INFINITY) expect(result).toBe('null') }) it('converts negative Infinity to null', () => { const result = encode(Number.NEGATIVE_INFINITY) expect(result).toBe('null') }) }) describe('negative zero normalization', () => { it('normalizes -0 to 0', () => { const result = encode(-0) expect(result).toBe('0') }) }) describe('toJSON method support', () => { it('calls toJSON method when object has it', () => { const obj = { data: 'example', toJSON() { return { info: this.data } }, } const result = encode(obj) expect(result).toBe('info: example') }) it('calls toJSON returning a primitive', () => { const obj = { value: 42, toJSON() { return 'custom-string' }, } const result = encode(obj) expect(result).toBe('custom-string') }) it('calls toJSON returning an array', () => { const obj = { items: [1, 2, 3], toJSON() { return ['a', 'b', 'c'] }, } const result = encode(obj) expect(result).toBe('[3]: a,b,c') }) it('calls toJSON in nested object properties', () => { const nestedObj = { secret: 'hidden', toJSON() { return { public: 'visible' } }, } const obj = { nested: nestedObj, other: 'value', } const result = encode(obj) expect(result).toBe('nested:\n public: visible\nother: value') }) it('calls toJSON in array elements', () => { const obj1 = { data: 'first', toJSON() { return { transformed: 'first-transformed' } }, } const obj2 = { data: 'second', toJSON() { return { transformed: 'second-transformed' } }, } const arr = [obj1, obj2] const result = encode(arr) expect(result).toBe('[2]{transformed}:\n first-transformed\n second-transformed') }) it('toJSON takes precedence over Date normalization', () => { const customDate = { toJSON() { return { type: 'custom-date', value: '2025-01-01' } }, } // Make it look like a Date but with toJSON Object.setPrototypeOf(customDate, Date.prototype) const result = encode(customDate) expect(result).toBe('type: custom-date\nvalue: 2025-01-01') }) it('works with toJSON inherited from prototype', () => { class CustomClass { value: string constructor(value: string) { this.value = value } toJSON() { return { classValue: this.value } } } const instance = new CustomClass('test-value') const result = encode(instance) expect(result).toBe('classValue: test-value') }) it('handles toJSON returning undefined (normalizes to null)', () => { const obj = { data: 'test', toJSON() { return undefined }, } const result = encode(obj) expect(result).toBe('null') }) it('works with replacer function', () => { const obj = { id: 1, secret: 'hidden', toJSON() { return { id: this.id, public: 'visible' } }, } const replacer: EncodeReplacer = (key, value) => { // Replacer should see the toJSON result, not the original object if (typeof value === 'object' && value !== null && 'public' in value) { return { ...value, extra: 'added' } } return value } const result = encode(obj, { replacer }) const decoded = decode(result) expect(decoded).toEqual({ id: 1, public: 'visible', extra: 'added' }) expect(decoded).not.toHaveProperty('secret') }) it('toJSON result is normalized before replacer is applied', () => { const dateObj = { date: new Date('2025-01-01T00:00:00.000Z'), toJSON() { return { date: this.date } }, } const replacer: EncodeReplacer = (key, value) => { // The date should already be normalized to ISO string by the time replacer sees it if (key === 'date' && typeof value === 'string') { return value.replace('2025', 'YEAR') } return value } const result = encode(dateObj, { replacer }) const decoded = decode(result) expect(decoded).toEqual({ date: 'YEAR-01-01T00:00:00.000Z' }) }) }) }) ================================================ FILE: packages/toon/test/replacer.test.ts ================================================ import type { EncodeReplacer, JsonObject, JsonValue } from '../src/types' import { describe, expect, it } from 'vitest' import { decode, encode } from '../src/index' describe('replacer function', () => { describe('basic filtering', () => { it('removes properties by returning undefined', () => { const input = { name: 'Alice', password: 'secret', email: 'alice@example.com' } const replacer: EncodeReplacer = (key, value) => { if (key === 'password') return undefined return value } const result = encode(input, { replacer }) const decoded = decode(result) expect(decoded).toEqual({ name: 'Alice', email: 'alice@example.com' }) expect(decoded).not.toHaveProperty('password') }) it('removes array elements by returning undefined', () => { const input = [1, 2, 3, 4, 5] const replacer: EncodeReplacer = (key, value) => { // Remove even numbers (key is index as string) if (typeof value === 'number' && value % 2 === 0) return undefined return value } const result = encode(input, { replacer }) const decoded = decode(result) expect(decoded).toEqual([1, 3, 5]) }) it('handles deeply nested filtering', () => { const input = { users: [ { name: 'Alice', password: 'secret1', role: 'admin' }, { name: 'Bob', password: 'secret2', role: 'user' }, ], } const replacer: EncodeReplacer = (key, value) => { if (key === 'password') return undefined return value } const result = encode(input, { replacer }) const decoded = decode(result) expect(decoded).toEqual({ users: [ { name: 'Alice', role: 'admin' }, { name: 'Bob', role: 'user' }, ], }) }) }) describe('value transformation', () => { it('transforms primitive values', () => { const input = { name: 'alice', age: 30 } const replacer: EncodeReplacer = (key, value) => { // Uppercase all strings if (typeof value === 'string') return value.toUpperCase() return value } const result = encode(input, { replacer }) const decoded = decode(result) expect(decoded).toEqual({ name: 'ALICE', age: 30 }) }) it('transforms objects', () => { const input = { user: { name: 'Alice' } } const replacer: EncodeReplacer = (key, value, path) => { // Add metadata to all objects at depth 1 if (path.length === 1 && typeof value === 'object' && value !== null && !Array.isArray(value)) { return { ...value as object, _id: `${key}_123` } } return value } const result = encode(input, { replacer }) const decoded = decode(result) expect(decoded).toEqual({ user: { name: 'Alice', _id: 'user_123' }, }) }) it('transforms arrays', () => { const input = { numbers: [1, 2, 3] } const replacer: EncodeReplacer = (key, value) => { // Double all numbers if (typeof value === 'number') return value * 2 return value } const result = encode(input, { replacer }) const decoded = decode(result) expect(decoded).toEqual({ numbers: [2, 4, 6] }) }) }) describe('root value handling', () => { it('calls replacer on root value with empty string key', () => { const input = { value: 42 } let rootKeySeen = false let rootPathSeen = false const replacer: EncodeReplacer = (key, value, path) => { if (key === '' && path.length === 0) { rootKeySeen = true rootPathSeen = true } return value } encode(input, { replacer }) expect(rootKeySeen).toBe(true) expect(rootPathSeen).toBe(true) }) it('transforms root object', () => { const input = { name: 'Alice' } const replacer: EncodeReplacer = (key, value, path) => { if (path.length === 0) { return { ...value as object, timestamp: 1234567890 } } return value } const result = encode(input, { replacer }) const decoded = decode(result) expect(decoded).toEqual({ name: 'Alice', timestamp: 1234567890 }) }) it('does not omit root when replacer returns undefined', () => { const input = { name: 'Alice' } const replacer: EncodeReplacer = (key, value, path) => { // Try to omit root (should be ignored) if (path.length === 0) return undefined return value } const result = encode(input, { replacer }) const decoded = decode(result) // Root should still be encoded expect(decoded).toEqual({ name: 'Alice' }) }) it('handles primitive root values', () => { const input = 'hello' const replacer: EncodeReplacer = (key, value) => { if (typeof value === 'string') return value.toUpperCase() return value } const result = encode(input, { replacer }) expect(result).toBe('HELLO') }) it('provides correct arguments to root call', () => { const input = { data: 'test' } const calls: { key: string, path: (string | number)[] }[] = [] const replacer: EncodeReplacer = (key, value, path) => { calls.push({ key, path: [...path] }) return value } encode(input, { replacer }) // First call should be root expect(calls[0]).toEqual({ key: '', path: [] }) }) }) describe('path tracking', () => { it('provides correct paths for nested objects', () => { const input = { user: { profile: { name: 'Alice', }, }, } const paths: string[] = [] const replacer: EncodeReplacer = (key, value, path) => { paths.push(path.join('.')) return value } encode(input, { replacer }) expect(paths).toContain('') // root expect(paths).toContain('user') expect(paths).toContain('user.profile') expect(paths).toContain('user.profile.name') }) it('provides correct paths for arrays with string indices', () => { const input = { items: ['a', 'b', 'c'] } const seenKeys: string[] = [] const replacer: EncodeReplacer = (key, value, path) => { if (path.length > 0 && path[path.length - 1] !== 'items') { seenKeys.push(key) } return value } encode(input, { replacer }) // Array indices should be string '0', '1', '2' expect(seenKeys).toEqual(['0', '1', '2']) }) it('provides correct paths for nested arrays', () => { const input = { matrix: [ [1, 2], [3, 4], ], } const paths: string[] = [] const replacer: EncodeReplacer = (key, value, path) => { if (typeof value === 'number') { paths.push(`${path.join('.')} (key="${key}")`) } return value } encode(input, { replacer }) expect(paths).toContain('matrix.0.0 (key="0")') expect(paths).toContain('matrix.0.1 (key="1")') expect(paths).toContain('matrix.1.0 (key="0")') expect(paths).toContain('matrix.1.1 (key="1")') }) }) describe('edge cases', () => { it('handles empty objects', () => { const input = {} const replacer: EncodeReplacer = (key, value) => value const result = encode(input, { replacer }) expect(result).toBe('') }) it('handles empty arrays', () => { const input: JsonValue[] = [] const replacer: EncodeReplacer = (key, value) => value const result = encode(input, { replacer }) const decoded = decode(result) expect(decoded).toEqual([]) }) it('handles null values', () => { const input = { value: null } const replacer: EncodeReplacer = (key, value) => { if (value === null) return 'NULL' return value } const result = encode(input, { replacer }) const decoded = decode(result) expect(decoded).toEqual({ value: 'NULL' }) }) it('re-normalizes non-JsonValue returns', () => { const input = { date: '2025-01-01' } const replacer: EncodeReplacer = (key, value) => { // Return a Date object (will be normalized to ISO string) if (key === 'date') return new Date(value as string) return value } const result = encode(input, { replacer }) const decoded = decode(result) as JsonObject // Date should be normalized to ISO string expect(typeof decoded.date).toBe('string') expect(decoded.date).toMatch(/^\d{4}-\d{2}-\d{2}T/) }) it('handles all properties being filtered out', () => { const input = { a: 1, b: 2, c: 3 } const replacer: EncodeReplacer = (key, value, path) => { // Filter out all properties (but not root) if (path.length > 0) return undefined return value } const result = encode(input, { replacer }) const decoded = decode(result) // Should result in empty object expect(decoded).toEqual({}) }) it('handles all array elements being filtered out', () => { const input = [1, 2, 3] const replacer: EncodeReplacer = (key, value, path) => { // Filter out all elements if (path.length > 0) return undefined return value } const result = encode(input, { replacer }) const decoded = decode(result) // Should result in empty array expect(decoded).toEqual([]) }) it('handles nested objects with mixed omissions', () => { const input = { keep: 'this', remove: 'that', nested: { keep: 'nested keep', remove: 'nested remove', }, } const replacer: EncodeReplacer = (key, value) => { if (key === 'remove') return undefined return value } const result = encode(input, { replacer }) const decoded = decode(result) expect(decoded).toEqual({ keep: 'this', nested: { keep: 'nested keep', }, }) }) it('handles arrays with some elements removed', () => { const input = { items: [{ id: 1, keep: true }, { id: 2, keep: false }, { id: 3, keep: true }] } const replacer: EncodeReplacer = (key, value) => { // Remove objects where keep is false if (typeof value === 'object' && value !== null && !Array.isArray(value) && 'keep' in value && value.keep === false) { return undefined } return value } const result = encode(input, { replacer }) const decoded = decode(result) expect(decoded).toEqual({ items: [{ id: 1, keep: true }, { id: 3, keep: true }], }) }) }) describe('integration with other options', () => { it('works with keyFolding', () => { const input = { user: { profile: { name: 'Alice', }, }, } const replacer: EncodeReplacer = (key, value) => { if (typeof value === 'string') return value.toUpperCase() return value } const result = encode(input, { replacer, keyFolding: 'safe' }) expect(result).toContain('user.profile.name: ALICE') }) it('works with custom delimiters', () => { const input = { items: [1, 2, 3] } const replacer: EncodeReplacer = (key, value) => { if (typeof value === 'number') return value * 10 return value } const result = encode(input, { replacer, delimiter: '\t' }) expect(result).toContain('10\t20\t30') }) it('works with custom indent', () => { const input = { user: { name: 'Alice' } } const replacer: EncodeReplacer = (key, value) => value const result = encode(input, { replacer, indent: 4 }) // Should use 4-space indent expect(result).toContain(' name: Alice') }) }) describe('comparison with JSON.stringify replacer', () => { it('behaves similarly to JSON.stringify for filtering', () => { const input = { name: 'Alice', password: 'secret' } // TOON replacer const toonReplacer: EncodeReplacer = (key, value) => { if (key === 'password') return undefined return value } // JSON.stringify replacer const jsonReplacer = (key: string, value: unknown) => { if (key === 'password') return undefined return value } const toonResult = decode(encode(input, { replacer: toonReplacer })) const jsonResult = JSON.parse(JSON.stringify(input, jsonReplacer)) expect(toonResult).toEqual(jsonResult) }) it('uses string indices for arrays like JSON.stringify', () => { const input = ['a', 'b', 'c'] const keys: string[] = [] const replacer: EncodeReplacer = (key, value, path) => { if (path.length > 0) keys.push(key) return value } encode(input, { replacer }) // Should match JSON.stringify behavior (string indices) expect(keys).toEqual(['0', '1', '2']) }) }) }) ================================================ FILE: packages/toon/test/types.ts ================================================ /** * Type definitions for TOON test fixtures * * @remarks * Matches the JSON schema at https://github.com/toon-format/spec/blob/main/tests/fixtures.schema.json. */ export interface TestCase { name: string input: unknown expected: unknown shouldError?: boolean options?: { delimiter?: ',' | '\t' | '|' indent?: number strict?: boolean keyFolding?: 'off' | 'safe' flattenDepth?: number expandPaths?: 'off' | 'safe' } specSection?: string note?: string minSpecVersion?: string } export interface Fixtures { version: string category: 'encode' | 'decode' description: string tests: TestCase[] } ================================================ FILE: packages/toon/tsdown.config.ts ================================================ import type { UserConfig } from 'tsdown/config' import { defineConfig } from 'tsdown/config' const config: UserConfig = defineConfig({ entry: 'src/index.ts', dts: true, }) export default config ================================================ FILE: pnpm-workspace.yaml ================================================ packages: - docs - benchmarks - packages/* onlyBuiltDependencies: - '@parcel/watcher' - esbuild ================================================ FILE: tsconfig.json ================================================ { "compilerOptions": { "target": "ESNext", "rootDir": ".", "module": "ESNext", "moduleResolution": "Bundler", "resolveJsonModule": true, "allowImportingTsExtensions": true, "strict": true, "noUncheckedIndexedAccess": true, "declaration": true, "noEmit": true, "esModuleInterop": true, "isolatedDeclarations": true, "isolatedModules": true, "verbatimModuleSyntax": true, "erasableSyntaxOnly": true, "skipLibCheck": true } }
Experiment with JSON to TOON encoding in real-time.
{{ toonOutput }}