Repository: toon-format/toon Branch: main Commit: d5f50a2ce5e0 Files: 114 Total size: 1.5 MB Directory structure: gitextract_qnn9ipfr/ ├── .editorconfig ├── .github/ │ └── workflows/ │ ├── ci.yml │ ├── deploy.yml │ ├── pr-title.yml │ └── release.yml ├── .gitignore ├── .npmrc ├── .vscode/ │ ├── extensions.json │ └── settings.json ├── LICENSE ├── SPEC.md ├── automd.config.ts ├── benchmarks/ │ ├── README.md │ ├── data/ │ │ └── github-repos.json │ ├── package.json │ ├── results/ │ │ ├── accuracy/ │ │ │ └── models/ │ │ │ ├── claude-haiku-4-5-20251001 │ │ │ ├── gemini-3-flash-preview │ │ │ ├── gpt-5-nano │ │ │ └── grok-4-1-fast-non-reasoning │ │ ├── retrieval-accuracy.md │ │ └── token-efficiency.md │ ├── scripts/ │ │ ├── accuracy-benchmark.ts │ │ ├── fetch-github-repos.ts │ │ └── token-efficiency-benchmark.ts │ └── src/ │ ├── constants.ts │ ├── datasets.ts │ ├── evaluate.ts │ ├── formatters.ts │ ├── normalize.ts │ ├── questions/ │ │ ├── analytics.ts │ │ ├── event-logs.ts │ │ ├── github.ts │ │ ├── index.ts │ │ ├── nested-config.ts │ │ ├── nested.ts │ │ ├── structural-validation.ts │ │ ├── structure.ts │ │ ├── tabular.ts │ │ └── utils.ts │ ├── report.ts │ ├── storage.ts │ ├── types.ts │ └── utils.ts ├── commitlint.config.ts ├── docs/ │ ├── .vitepress/ │ │ ├── config.ts │ │ ├── meta.ts │ │ └── theme/ │ │ ├── components/ │ │ │ ├── PlaygroundLayout.vue │ │ │ └── VPInput.vue │ │ ├── index.ts │ │ ├── overrides.css │ │ └── vars.css │ ├── cli/ │ │ └── index.md │ ├── ecosystem/ │ │ ├── implementations.md │ │ └── tools-and-playgrounds.md │ ├── guide/ │ │ ├── benchmarks.md │ │ ├── format-overview.md │ │ ├── getting-started.md │ │ └── llm-prompts.md │ ├── index.md │ ├── package.json │ ├── playground.md │ ├── reference/ │ │ ├── api.md │ │ ├── efficiency-formalization.md │ │ ├── spec.md │ │ └── syntax-cheatsheet.md │ ├── uno.config.ts │ └── wrangler.toml ├── eslint.config.ts ├── package.json ├── packages/ │ ├── cli/ │ │ ├── README.md │ │ ├── bin/ │ │ │ └── toon.mjs │ │ ├── package.json │ │ ├── src/ │ │ │ ├── cli-entry.ts │ │ │ ├── conversion.ts │ │ │ ├── index.ts │ │ │ ├── json-from-events.ts │ │ │ ├── json-stringify-stream.ts │ │ │ ├── types.ts │ │ │ └── utils.ts │ │ ├── test/ │ │ │ ├── index.test.ts │ │ │ ├── json-from-events.test.ts │ │ │ ├── json-stringify-stream.test.ts │ │ │ └── utils.ts │ │ └── tsdown.config.ts │ └── toon/ │ ├── README.md │ ├── package.json │ ├── src/ │ │ ├── constants.ts │ │ ├── decode/ │ │ │ ├── decoders.ts │ │ │ ├── event-builder.ts │ │ │ ├── expand.ts │ │ │ ├── parser.ts │ │ │ ├── scanner.ts │ │ │ └── validation.ts │ │ ├── encode/ │ │ │ ├── encoders.ts │ │ │ ├── folding.ts │ │ │ ├── normalize.ts │ │ │ ├── primitives.ts │ │ │ └── replacer.ts │ │ ├── index.ts │ │ ├── shared/ │ │ │ ├── literal-utils.ts │ │ │ ├── string-utils.ts │ │ │ └── validation.ts │ │ └── types.ts │ ├── test/ │ │ ├── decode.test.ts │ │ ├── decodeStream.test.ts │ │ ├── decodeStreamAsync.test.ts │ │ ├── encode.test.ts │ │ ├── encodeLines.test.ts │ │ ├── normalization.test.ts │ │ ├── replacer.test.ts │ │ └── types.ts │ └── tsdown.config.ts ├── pnpm-workspace.yaml └── tsconfig.json ================================================ FILE CONTENTS ================================================ ================================================ FILE: .editorconfig ================================================ root = true [*] charset = utf-8 indent_style = space indent_size = 2 end_of_line = lf insert_final_newline = true trim_trailing_whitespace = true ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: push: branches: - main pull_request: branches: - main concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true permissions: contents: read jobs: ci: runs-on: ubuntu-slim timeout-minutes: 10 steps: - name: Checkout uses: actions/checkout@v5 with: persist-credentials: false - name: Setup pnpm uses: pnpm/action-setup@v4 - name: Setup Node.js uses: actions/setup-node@v6 with: node-version: 24 - name: Get pnpm store directory id: pnpm-cache run: echo "pnpm_cache_dir=$(pnpm store path)" >> $GITHUB_OUTPUT - name: Cache pnpm dependencies uses: actions/cache@v4 with: path: ${{ steps.pnpm-cache.outputs.pnpm_cache_dir }} key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }} restore-keys: | ${{ runner.os }}-pnpm-store- - name: Install dependencies run: pnpm install --frozen-lockfile - name: Lint run: pnpm run lint - name: Typecheck run: pnpm run test:types - name: Test run: pnpm run test ================================================ FILE: .github/workflows/deploy.yml ================================================ name: Deploy Docs on: push: branches: - main paths: - docs/** - automd.config.ts - package.json - eslint.config.mjs concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true permissions: {} jobs: deploy: name: Deploy Docs runs-on: ubuntu-slim steps: - name: Checkout uses: actions/checkout@v5 with: persist-credentials: false - name: Setup pnpm uses: pnpm/action-setup@v4 - name: Setup Node.js uses: actions/setup-node@v6 with: node-version: 24 - name: Get pnpm store directory id: pnpm-cache run: echo "pnpm_cache_dir=$(pnpm store path)" >> $GITHUB_OUTPUT - name: Cache pnpm dependencies uses: actions/cache@v4 with: path: ${{ steps.pnpm-cache.outputs.pnpm_cache_dir }} key: ${{ runner.os }}-pnpm-store-${{ hashFiles('**/pnpm-lock.yaml') }} restore-keys: | ${{ runner.os }}-pnpm-store- - name: Install dependencies run: pnpm install --frozen-lockfile - name: Build docs run: pnpm run docs:build - name: Deploy to Cloudflare run: cd docs && npx wrangler deploy env: CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }} ================================================ FILE: .github/workflows/pr-title.yml ================================================ name: Check PR Title on: pull_request: types: [opened, edited] permissions: contents: read concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: lint-pr-title: name: Lint PR title runs-on: ubuntu-slim if: ${{ (github.event.action == 'opened' || github.event.changes.title != null) && github.actor != 'renovate[bot]' }} steps: - name: Checkout uses: actions/checkout@v5 with: persist-credentials: false # Only fetch the config file from the repository sparse-checkout-cone-mode: false sparse-checkout: commitlint.config.ts - name: Install dependencies run: npm install -D @commitlint/cli @commitlint/config-conventional - name: Validate PR title with commitlint run: echo "$PR_TITLE" | npx commitlint env: PR_TITLE: ${{ github.event.pull_request.title }} ================================================ FILE: .github/workflows/release.yml ================================================ name: Release + Publish on: push: tags: - 'v*' concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: release: name: Release runs-on: ubuntu-slim permissions: id-token: write contents: write steps: - name: Checkout uses: actions/checkout@v5 with: fetch-depth: 0 # Required for fetching tags and generating release notes persist-credentials: true - name: Setup pnpm uses: pnpm/action-setup@v4 - name: Setup Node.js uses: actions/setup-node@v6 with: node-version: 24 registry-url: https://registry.npmjs.org/ cache: pnpm - name: Generate changelog and create GitHub release run: npx changelogithub env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Install dependencies run: pnpm install --frozen-lockfile - name: Build packages run: pnpm run build - name: Publish packages to npm run: npm install -g npm@latest && pnpm -r publish --access public --no-git-checks env: NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} ================================================ FILE: .gitignore ================================================ dist node_modules .DS_Store .env docs/.vitepress/dist docs/.vitepress/cache packages/toon/test/fixtures/*.json packages/toon/test/fixtures/*.toon ================================================ FILE: .npmrc ================================================ shamefully-hoist=true ================================================ FILE: .vscode/extensions.json ================================================ { "recommendations": [ "dbaeumer.vscode-eslint" ] } ================================================ FILE: .vscode/settings.json ================================================ { // Disable the default formatter, use ESLint instead "prettier.enable": false, "editor.formatOnSave": false, // Auto-fix "editor.codeActionsOnSave": { "source.fixAll.eslint": "explicit", "source.organizeImports": "never" }, // Silent the stylistic rules in you IDE, but still auto-fix them "eslint.rules.customizations": [ { "rule": "style/*", "severity": "off" }, { "rule": "format/*", "severity": "off" }, { "rule": "*-indent", "severity": "off" }, { "rule": "*-spacing", "severity": "off" }, { "rule": "*-spaces", "severity": "off" }, { "rule": "*-order", "severity": "off" }, { "rule": "*-dangle", "severity": "off" }, { "rule": "*-newline", "severity": "off" }, { "rule": "*quotes", "severity": "off" }, { "rule": "*semi", "severity": "off" } ], // Enable ESLint for all supported languages "eslint.validate": [ "javascript", "javascriptreact", "typescript", "typescriptreact", "vue", "html", "markdown", "json", "jsonc", "yaml", "toml", "xml", "gql", "graphql", "astro", "svelte", "css", "less", "scss", "pcss", "postcss" ] } ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2025-PRESENT Johann Schopplich Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: SPEC.md ================================================ # TOON Specification The TOON specification has moved to a dedicated repository: [github.com/toon-format/spec](https://github.com/toon-format/spec) ## Current Version **Version 3.0** (2025-11-24) ## Quick Links - **[Full Specification](https://github.com/toon-format/spec/blob/main/SPEC.md)** - Complete technical specification - **[Changelog](https://github.com/toon-format/spec/blob/main/CHANGELOG.md)** - Version history - **[Examples](https://github.com/toon-format/spec/tree/main/examples)** - Example TOON files - **[Conformance Tests](https://github.com/toon-format/spec/tree/main/tests)** - Language-agnostic test fixtures for implementations - **[Contributing](https://github.com/toon-format/spec/blob/main/CONTRIBUTING.md)** - How to propose spec changes ## Why a Separate Repo? The specification has been moved to `toon-format/spec` to: - Provide a canonical, language-agnostic source of truth - Enable independent versioning of spec and implementations - Support the growing community of TOON implementations across multiple languages - Facilitate collaboration on spec evolution through a dedicated RFC process ## This Repository This repository (`toon-format/toon`) remains the **reference implementation** in TypeScript/JavaScript. For specification discussions, issues, and contributions, please use the spec repository. ================================================ FILE: automd.config.ts ================================================ import type { Config } from 'automd' const config: Config = { input: ['docs/guide/benchmarks.md'], } export default config ================================================ FILE: benchmarks/README.md ================================================ # TOON Benchmarks Benchmarks measuring TOON's **token efficiency** and **retrieval accuracy** compared to JSON, XML, YAML, and CSV. > [!NOTE] > Results are automatically embedded in the [main README](https://github.com/toon-format/toon/#benchmarks). This guide focuses on running the benchmarks locally. ## Quick Start ```bash # Run token efficiency benchmark pnpm benchmark:tokens # Run retrieval accuracy benchmark (requires API keys) pnpm benchmark:accuracy ``` ## Token Efficiency Benchmark Measures token count reduction across JSON, XML, YAML, CSV, and TOON: 1. Generate datasets (GitHub repos, analytics, orders) 2. Convert to all formats (TOON, JSON, XML, YAML, CSV) 3. Tokenize using `gpt-tokenizer` (`o200k_base` encoding) 4. Calculate savings and generate report ```bash pnpm benchmark:tokens ``` Results are saved to `results/token-efficiency.md`. ## Retrieval Accuracy Benchmark Tests how well LLMs can answer questions about data in different formats (TOON, JSON, JSON compact, XML, YAML, CSV): 1. Generate 209 questions across 11 datasets (6 primary + 5 structural validation; CSV only included for datasets with flat/tabular structure) 2. Convert each dataset to all supported formats 3. Query each LLM with formatted data + question 4. Validate answers deterministically using type-aware comparison (no LLM judge needed) 5. Aggregate metrics and generate report ### Setup 1. Edit [`src/evaluate.ts`](./src/evaluate.ts) and add models to the exported `models` array: ```ts export const models: LanguageModelV3[] = [ openai('gpt-5-nano'), anthropic('claude-haiku-4-5-20251001'), google('gemini-3-flash-preview'), xai('grok-4-1-fast-non-reasoning'), // Add your models here ] ``` 2. Duplicate `.env.example` to `.env` and add your API keys: ```bash cp .env.example .env ``` ### Usage ```bash # Full benchmark pnpm benchmark:accuracy # Dry run (10 questions only, for testing setup) DRY_RUN=true pnpm benchmark:accuracy ``` Running the script will: 1. Prompt you to select which models to test. 2. Skip models with existing results (rerun to overwrite). 3. Show progress with rate limiting. 4. Save results to `results/accuracy/models/{model-id}.json`. 5. Generate report at `results/retrieval-accuracy.md`. ### Configuration Edit [`src/constants.ts`](./src/constants.ts) to adjust: - `MODEL_RPM_LIMITS` – Rate limits per model - `DEFAULT_CONCURRENCY` – Parallel tasks (default: 10) - `DRY_RUN_LIMITS` – Questions per dry run (default: 10) ## Project Structure ``` scripts/ ├── accuracy-benchmark.ts # Retrieval accuracy benchmark ├── token-efficiency-benchmark.ts # Token counting benchmark └── fetch-github-repos.ts # Update GitHub dataset src/ ├── constants.ts # Configuration ├── datasets.ts # Test data generators ├── evaluate.ts # LLM evaluation ├── formatters.ts # Format converters ├── normalize.ts # Answer normalization ├── report.ts # Markdown reports ├── storage.ts # Result caching ├── types.ts # Type definitions ├── utils.ts # Helpers └── questions/ # Question generators ├── analytics.ts ├── event-logs.ts ├── github.ts ├── index.ts ├── nested-config.ts ├── nested.ts ├── structural-validation.ts ├── structure.ts ├── tabular.ts └── utils.ts data/ └── github-repos.json # Top 100 GitHub repos results/ ├── token-efficiency.md # Token savings report ├── retrieval-accuracy.md # Accuracy report └── accuracy/models/ # Per-model results (JSON) ``` ================================================ FILE: benchmarks/data/github-repos.json ================================================ [ { "id": 28457823, "name": "freeCodeCamp", "repo": "freeCodeCamp/freeCodeCamp", "description": "freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming, and computer science for free.", "createdAt": "2014-12-24T17:49:19Z", "updatedAt": "2025-10-28T11:58:08Z", "pushedAt": "2025-10-28T10:17:16Z", "stars": 430886, "watchers": 8583, "forks": 42146, "defaultBranch": "main" }, { "id": 132750724, "name": "build-your-own-x", "repo": "codecrafters-io/build-your-own-x", "description": "Master programming by recreating your favorite technologies from scratch.", "createdAt": "2018-05-09T12:03:18Z", "updatedAt": "2025-10-28T12:37:11Z", "pushedAt": "2025-10-10T18:45:01Z", "stars": 430877, "watchers": 6332, "forks": 40453, "defaultBranch": "master" }, { "id": 21737465, "name": "awesome", "repo": "sindresorhus/awesome", "description": "😎 Awesome lists about all kinds of interesting topics", "createdAt": "2014-07-11T13:42:37Z", "updatedAt": "2025-10-28T12:40:21Z", "pushedAt": "2025-10-27T17:57:31Z", "stars": 410052, "watchers": 8017, "forks": 32029, "defaultBranch": "main" }, { "id": 13491895, "name": "free-programming-books", "repo": "EbookFoundation/free-programming-books", "description": ":books: Freely available programming books", "createdAt": "2013-10-11T06:50:37Z", "updatedAt": "2025-10-28T12:16:59Z", "pushedAt": "2025-10-28T01:52:13Z", "stars": 375307, "watchers": 9786, "forks": 65199, "defaultBranch": "main" }, { "id": 54346799, "name": "public-apis", "repo": "public-apis/public-apis", "description": "A collective list of free APIs", "createdAt": "2016-03-20T23:49:42Z", "updatedAt": "2025-10-28T12:33:14Z", "pushedAt": "2025-05-20T15:56:34Z", "stars": 374003, "watchers": 4400, "forks": 39473, "defaultBranch": "master" }, { "id": 85077558, "name": "developer-roadmap", "repo": "kamranahmedse/developer-roadmap", "description": "Interactive roadmaps, guides and other educational content to help developers grow in their careers.", "createdAt": "2017-03-15T13:45:52Z", "updatedAt": "2025-10-28T12:31:02Z", "pushedAt": "2025-10-28T11:09:58Z", "stars": 342136, "watchers": 6886, "forks": 43234, "defaultBranch": "master" }, { "id": 60493101, "name": "coding-interview-university", "repo": "jwasham/coding-interview-university", "description": "A complete computer science study plan to become a software engineer.", "createdAt": "2016-06-06T02:34:12Z", "updatedAt": "2025-10-28T12:21:02Z", "pushedAt": "2025-08-28T14:42:47Z", "stars": 331947, "watchers": 8511, "forks": 81057, "defaultBranch": "main" }, { "id": 83222441, "name": "system-design-primer", "repo": "donnemartin/system-design-primer", "description": "Learn how to design large-scale systems. Prep for the system design interview. Includes Anki flashcards.", "createdAt": "2017-02-26T16:15:28Z", "updatedAt": "2025-10-28T12:32:56Z", "pushedAt": "2025-05-21T11:13:33Z", "stars": 324409, "watchers": 6819, "forks": 52904, "defaultBranch": "master" }, { "id": 177736533, "name": "996.ICU", "repo": "996icu/996.ICU", "description": "Repo for counting stars and contributing. Press F to pay respect to glorious developers.", "createdAt": "2019-03-26T07:31:14Z", "updatedAt": "2025-10-28T11:07:13Z", "pushedAt": "2025-08-22T06:01:29Z", "stars": 274706, "watchers": 4216, "forks": 21029, "defaultBranch": "master" }, { "id": 21289110, "name": "awesome-python", "repo": "vinta/awesome-python", "description": "An opinionated list of awesome Python frameworks, libraries, software and resources.", "createdAt": "2014-06-27T21:00:06Z", "updatedAt": "2025-10-28T12:28:13Z", "pushedAt": "2025-10-16T13:40:58Z", "stars": 266661, "watchers": 6128, "forks": 26604, "defaultBranch": "master" }, { "id": 36633370, "name": "awesome-selfhosted", "repo": "awesome-selfhosted/awesome-selfhosted", "description": "A list of Free Software network services and web applications which can be hosted on your own servers", "createdAt": "2015-06-01T02:33:17Z", "updatedAt": "2025-10-28T12:24:53Z", "pushedAt": "2025-10-27T21:40:26Z", "stars": 255143, "watchers": 2990, "forks": 11802, "defaultBranch": "master" }, { "id": 88011908, "name": "project-based-learning", "repo": "practical-tutorials/project-based-learning", "description": "Curated list of project-based tutorials", "createdAt": "2017-04-12T05:07:46Z", "updatedAt": "2025-10-28T12:22:51Z", "pushedAt": "2024-08-15T05:33:54Z", "stars": 248050, "watchers": 3446, "forks": 32431, "defaultBranch": "master" }, { "id": 10270250, "name": "react", "repo": "facebook/react", "description": "The library for web and native user interfaces.", "createdAt": "2013-05-24T16:15:54Z", "updatedAt": "2025-10-28T12:24:55Z", "pushedAt": "2025-10-28T01:25:20Z", "stars": 240100, "watchers": 6686, "forks": 49682, "defaultBranch": "main" }, { "id": 63476337, "name": "Python", "repo": "TheAlgorithms/Python", "description": "All Algorithms implemented in Python", "createdAt": "2016-07-16T09:44:01Z", "updatedAt": "2025-10-28T12:25:22Z", "pushedAt": "2025-10-20T00:59:36Z", "stars": 212119, "watchers": 5975, "forks": 49025, "defaultBranch": "master" }, { "id": 11730342, "name": "vue", "repo": "vuejs/vue", "description": "This is the repo for Vue 2. For Vue 3, go to https://github.com/vuejs/core", "createdAt": "2013-07-29T03:24:51Z", "updatedAt": "2025-10-28T10:39:45Z", "pushedAt": "2024-10-10T07:24:15Z", "stars": 209636, "watchers": 5786, "forks": 33795, "defaultBranch": "main" }, { "id": 2325298, "name": "linux", "repo": "torvalds/linux", "description": "Linux kernel source tree", "createdAt": "2011-09-04T22:48:12Z", "updatedAt": "2025-10-28T12:39:23Z", "pushedAt": "2025-10-27T18:11:32Z", "stars": 205858, "watchers": 7743, "forks": 58047, "defaultBranch": "master" }, { "id": 19415064, "name": "computer-science", "repo": "ossu/computer-science", "description": "🎓 Path to a free self-taught education in Computer Science!", "createdAt": "2014-05-04T00:18:39Z", "updatedAt": "2025-10-28T12:41:20Z", "pushedAt": "2025-08-23T18:48:52Z", "stars": 196086, "watchers": 5936, "forks": 24474, "defaultBranch": "master" }, { "id": 126577260, "name": "javascript-algorithms", "repo": "trekhleb/javascript-algorithms", "description": "📝 Algorithms and data structures implemented in JavaScript with explanations and links to further readings", "createdAt": "2018-03-24T07:47:04Z", "updatedAt": "2025-10-28T12:37:32Z", "pushedAt": "2025-10-22T15:03:29Z", "stars": 193744, "watchers": 4268, "forks": 30929, "defaultBranch": "master" }, { "id": 45717250, "name": "tensorflow", "repo": "tensorflow/tensorflow", "description": "An Open Source Machine Learning Framework for Everyone", "createdAt": "2015-11-07T01:19:20Z", "updatedAt": "2025-10-28T11:56:54Z", "pushedAt": "2025-10-28T12:37:04Z", "stars": 192240, "watchers": 7431, "forks": 74932, "defaultBranch": "master" }, { "id": 138393139, "name": "the-book-of-secret-knowledge", "repo": "trimstray/the-book-of-secret-knowledge", "description": "A collection of inspiring lists, manuals, cheatsheets, blogs, hacks, one-liners, cli/web tools and more.", "createdAt": "2018-06-23T10:43:14Z", "updatedAt": "2025-10-28T12:40:20Z", "pushedAt": "2024-11-19T14:00:38Z", "stars": 191487, "watchers": 2678, "forks": 11764, "defaultBranch": "master" }, { "id": 14440270, "name": "You-Dont-Know-JS", "repo": "getify/You-Dont-Know-JS", "description": "A book series (2 published editions) on the JS language.", "createdAt": "2013-11-16T02:37:24Z", "updatedAt": "2025-10-28T11:34:43Z", "pushedAt": "2025-05-20T14:22:36Z", "stars": 183653, "watchers": 5803, "forks": 33671, "defaultBranch": "2nd-ed" }, { "id": 121395510, "name": "CS-Notes", "repo": "CyC2018/CS-Notes", "description": ":books: 技术面试必备基础知识、Leetcode、计算机操作系统、计算机网络、系统设计", "createdAt": "2018-02-13T14:56:24Z", "updatedAt": "2025-10-28T11:56:57Z", "pushedAt": "2024-08-21T09:40:10Z", "stars": 182661, "watchers": 5249, "forks": 51249, "defaultBranch": "master" }, { "id": 291137, "name": "ohmyzsh", "repo": "ohmyzsh/ohmyzsh", "description": "🙃 A delightful community-driven (with 2,400+ contributors) framework for managing your zsh configuration. Includes 300+ optional plugins (rails, git, macOS, hub, docker, homebrew, node, php, python, etc), 140+ themes to spice up your morning, and an auto-update tool that makes it easy to keep up with the latest updates from the community.", "createdAt": "2009-08-28T18:15:37Z", "updatedAt": "2025-10-28T12:39:19Z", "pushedAt": "2025-10-27T18:37:07Z", "stars": 182331, "watchers": 2620, "forks": 26261, "defaultBranch": "master" }, { "id": 614765452, "name": "AutoGPT", "repo": "Significant-Gravitas/AutoGPT", "description": "AutoGPT is the vision of accessible AI for everyone, to use and to build on. Our mission is to provide the tools, so that you can focus on what matters.", "createdAt": "2023-03-16T09:21:07Z", "updatedAt": "2025-10-28T12:01:03Z", "pushedAt": "2025-10-28T11:50:06Z", "stars": 179337, "watchers": 1547, "forks": 46094, "defaultBranch": "master" }, { "id": 41881900, "name": "vscode", "repo": "microsoft/vscode", "description": "Visual Studio Code", "createdAt": "2015-09-03T20:23:38Z", "updatedAt": "2025-10-28T12:22:53Z", "pushedAt": "2025-10-28T12:33:55Z", "stars": 177962, "watchers": 3366, "forks": 35810, "defaultBranch": "main" }, { "id": 123458551, "name": "Python-100-Days", "repo": "jackfrued/Python-100-Days", "description": "Python - 100天从新手到大师", "createdAt": "2018-03-01T16:05:52Z", "updatedAt": "2025-10-28T12:40:38Z", "pushedAt": "2025-03-28T10:29:23Z", "stars": 173818, "watchers": 6098, "forks": 54782, "defaultBranch": "master" }, { "id": 2126244, "name": "bootstrap", "repo": "twbs/bootstrap", "description": "The most popular HTML, CSS, and JavaScript framework for developing responsive, mobile first projects on the web.", "createdAt": "2011-07-29T21:19:00Z", "updatedAt": "2025-10-28T12:25:19Z", "pushedAt": "2025-10-28T10:02:33Z", "stars": 173612, "watchers": 6680, "forks": 79159, "defaultBranch": "main" }, { "id": 31792824, "name": "flutter", "repo": "flutter/flutter", "description": "Flutter makes it easy and fast to build beautiful apps for mobile and beyond", "createdAt": "2015-03-06T22:54:58Z", "updatedAt": "2025-10-28T12:35:50Z", "pushedAt": "2025-10-28T12:35:51Z", "stars": 173572, "watchers": 3481, "forks": 29419, "defaultBranch": "master" }, { "id": 1062897, "name": "gitignore", "repo": "github/gitignore", "description": "A collection of useful .gitignore templates", "createdAt": "2010-11-08T20:17:14Z", "updatedAt": "2025-10-28T12:36:17Z", "pushedAt": "2025-09-10T18:42:03Z", "stars": 170327, "watchers": 3367, "forks": 82996, "defaultBranch": "main" }, { "id": 35955666, "name": "the-art-of-command-line", "repo": "jlevy/the-art-of-command-line", "description": "Master the command line, in one page", "createdAt": "2015-05-20T15:11:03Z", "updatedAt": "2025-10-28T10:16:58Z", "pushedAt": "2024-06-25T18:13:44Z", "stars": 158603, "watchers": 2812, "forks": 14753, "defaultBranch": "master" }, { "id": 527591471, "name": "stable-diffusion-webui", "repo": "AUTOMATIC1111/stable-diffusion-webui", "description": "Stable Diffusion web UI", "createdAt": "2022-08-22T14:05:26Z", "updatedAt": "2025-10-28T12:41:21Z", "pushedAt": "2025-10-07T20:06:10Z", "stars": 157629, "watchers": 1156, "forks": 29254, "defaultBranch": "master" }, { "id": 21540759, "name": "awesome-go", "repo": "avelino/awesome-go", "description": "A curated list of awesome Go frameworks, libraries and software", "createdAt": "2014-07-06T13:42:15Z", "updatedAt": "2025-10-28T12:41:20Z", "pushedAt": "2025-10-22T12:15:14Z", "stars": 155912, "watchers": 2820, "forks": 12712, "defaultBranch": "main" }, { "id": 658928958, "name": "ollama", "repo": "ollama/ollama", "description": "Get up and running with OpenAI gpt-oss, DeepSeek-R1, Gemma 3 and other models.", "createdAt": "2023-06-26T19:39:32Z", "updatedAt": "2025-10-28T12:05:06Z", "pushedAt": "2025-10-28T08:16:13Z", "stars": 154883, "watchers": 876, "forks": 13480, "defaultBranch": "main" }, { "id": 233472199, "name": "Microsoft-Activation-Scripts", "repo": "massgravel/Microsoft-Activation-Scripts", "description": "Open-source Windows and Office activator featuring HWID, Ohook, TSforge, KMS38, and Online KMS activation methods, along with advanced troubleshooting.", "createdAt": "2020-01-12T23:03:34Z", "updatedAt": "2025-10-28T12:40:24Z", "pushedAt": "2025-09-30T22:22:59Z", "stars": 154022, "watchers": 1319, "forks": 14869, "defaultBranch": "master" }, { "id": 132464395, "name": "JavaGuide", "repo": "Snailclimb/JavaGuide", "description": "「Java学习+面试指南」一份涵盖大部分 Java 程序员所需要掌握的核心知识。准备 Java 面试,首选 JavaGuide!", "createdAt": "2018-05-07T13:27:00Z", "updatedAt": "2025-10-28T12:01:53Z", "pushedAt": "2025-10-27T11:09:05Z", "stars": 152325, "watchers": 4469, "forks": 46021, "defaultBranch": "main" }, { "id": 193215554, "name": "n8n", "repo": "n8n-io/n8n", "description": "Fair-code workflow automation platform with native AI capabilities. Combine visual building with custom code, self-host or cloud, 400+ integrations.", "createdAt": "2019-06-22T09:24:21Z", "updatedAt": "2025-10-28T12:41:23Z", "pushedAt": "2025-10-28T12:34:50Z", "stars": 152300, "watchers": 889, "forks": 48578, "defaultBranch": "master" }, { "id": 155220641, "name": "transformers", "repo": "huggingface/transformers", "description": "🤗 Transformers: the model-definition framework for state-of-the-art machine learning models in text, vision, audio, and multimodal models, for both inference and training. ", "createdAt": "2018-10-29T13:56:00Z", "updatedAt": "2025-10-28T12:41:10Z", "pushedAt": "2025-10-28T12:38:18Z", "stars": 151745, "watchers": 1167, "forks": 30971, "defaultBranch": "main" }, { "id": 6498492, "name": "javascript", "repo": "airbnb/javascript", "description": "JavaScript Style Guide", "createdAt": "2012-11-01T23:13:50Z", "updatedAt": "2025-10-28T11:07:36Z", "pushedAt": "2025-09-17T18:12:44Z", "stars": 147700, "watchers": 3702, "forks": 26795, "defaultBranch": "master" }, { "id": 1039520, "name": "youtube-dl", "repo": "ytdl-org/youtube-dl", "description": "Command-line program to download videos from YouTube.com and other video sites", "createdAt": "2010-10-31T14:35:07Z", "updatedAt": "2025-10-28T12:01:08Z", "pushedAt": "2025-10-18T10:02:28Z", "stars": 138581, "watchers": 2160, "forks": 10527, "defaultBranch": "master" }, { "id": 599320067, "name": "langflow", "repo": "langflow-ai/langflow", "description": "Langflow is a powerful tool for building and deploying AI-powered agents and workflows.", "createdAt": "2023-02-08T22:28:03Z", "updatedAt": "2025-10-28T12:04:14Z", "pushedAt": "2025-10-28T11:44:40Z", "stars": 136336, "watchers": 454, "forks": 7859, "defaultBranch": "main" }, { "id": 574523116, "name": "awesome-chatgpt-prompts", "repo": "f/awesome-chatgpt-prompts", "description": "This repo includes ChatGPT prompt curation to use ChatGPT and other LLM tools better.", "createdAt": "2022-12-05T13:54:13Z", "updatedAt": "2025-10-28T12:32:02Z", "pushedAt": "2025-10-14T17:23:13Z", "stars": 135843, "watchers": 1563, "forks": 18078, "defaultBranch": "main" }, { "id": 70107786, "name": "next.js", "repo": "vercel/next.js", "description": "The React Framework", "createdAt": "2016-10-05T23:32:51Z", "updatedAt": "2025-10-28T12:19:30Z", "pushedAt": "2025-10-28T12:22:48Z", "stars": 135333, "watchers": 1495, "forks": 29693, "defaultBranch": "canary" }, { "id": 307260205, "name": "yt-dlp", "repo": "yt-dlp/yt-dlp", "description": "A feature-rich command-line audio/video downloader", "createdAt": "2020-10-26T04:22:55Z", "updatedAt": "2025-10-28T12:38:42Z", "pushedAt": "2025-10-27T23:21:38Z", "stars": 132949, "watchers": 678, "forks": 10668, "defaultBranch": "master" }, { "id": 58028038, "name": "HelloGitHub", "repo": "521xueweihan/HelloGitHub", "description": ":octocat: 分享 GitHub 上有趣、入门级的开源项目。Share interesting, entry-level open source projects on GitHub.", "createdAt": "2016-05-04T06:24:11Z", "updatedAt": "2025-10-28T12:13:38Z", "pushedAt": "2025-10-28T00:14:25Z", "stars": 132365, "watchers": 4187, "forks": 10822, "defaultBranch": "master" }, { "id": 62607227, "name": "tech-interview-handbook", "repo": "yangshun/tech-interview-handbook", "description": "💯 Curated coding interview preparation materials for busy software engineers", "createdAt": "2016-07-05T05:00:48Z", "updatedAt": "2025-10-28T09:33:23Z", "pushedAt": "2025-08-27T00:17:33Z", "stars": 131430, "watchers": 2182, "forks": 15945, "defaultBranch": "main" }, { "id": 23096959, "name": "go", "repo": "golang/go", "description": "The Go programming language", "createdAt": "2014-08-19T04:33:40Z", "updatedAt": "2025-10-28T11:52:10Z", "pushedAt": "2025-10-28T06:29:46Z", "stars": 130554, "watchers": 3347, "forks": 18419, "defaultBranch": "master" }, { "id": 111583593, "name": "scrcpy", "repo": "Genymobile/scrcpy", "description": "Display and control your Android device", "createdAt": "2017-11-21T18:00:27Z", "updatedAt": "2025-10-28T12:05:50Z", "pushedAt": "2025-10-27T08:59:41Z", "stars": 130304, "watchers": 1322, "forks": 12194, "defaultBranch": "master" }, { "id": 241576270, "name": "fucking-algorithm", "repo": "labuladong/fucking-algorithm", "description": "刷算法全靠套路,认准 labuladong 就够了!English version supported! Crack LeetCode, not only how, but also why. ", "createdAt": "2020-02-19T09:01:23Z", "updatedAt": "2025-10-28T08:35:53Z", "pushedAt": "2025-10-08T04:06:00Z", "stars": 129669, "watchers": 2283, "forks": 23452, "defaultBranch": "master" }, { "id": 112507086, "name": "30-seconds-of-code", "repo": "Chalarangelo/30-seconds-of-code", "description": "Coding articles to level up your development skills", "createdAt": "2017-11-29T17:35:03Z", "updatedAt": "2025-10-28T09:14:02Z", "pushedAt": "2025-10-22T12:51:11Z", "stars": 125639, "watchers": 2594, "forks": 12362, "defaultBranch": "master" }, { "id": 184456251, "name": "PowerToys", "repo": "microsoft/PowerToys", "description": "Microsoft PowerToys is a collection of utilities that help you customize Windows and streamline everyday tasks", "createdAt": "2019-05-01T17:44:02Z", "updatedAt": "2025-10-28T12:21:07Z", "pushedAt": "2025-10-28T10:55:13Z", "stars": 125271, "watchers": 1166, "forks": 7454, "defaultBranch": "main" }, { "id": 29028775, "name": "react-native", "repo": "facebook/react-native", "description": "A framework for building native applications using React", "createdAt": "2015-01-09T18:10:16Z", "updatedAt": "2025-10-28T12:36:00Z", "pushedAt": "2025-10-28T12:25:56Z", "stars": 124334, "watchers": 3563, "forks": 24916, "defaultBranch": "main" }, { "id": 9384267, "name": "electron", "repo": "electron/electron", "description": ":electron: Build cross-platform desktop apps with JavaScript, HTML, and CSS", "createdAt": "2013-04-12T01:47:36Z", "updatedAt": "2025-10-28T11:35:46Z", "pushedAt": "2025-10-28T09:28:32Z", "stars": 118860, "watchers": 2801, "forks": 16584, "defaultBranch": "main" }, { "id": 552661142, "name": "langchain", "repo": "langchain-ai/langchain", "description": "🦜🔗 Build context-aware reasoning applications", "createdAt": "2022-10-17T02:58:36Z", "updatedAt": "2025-10-28T12:37:33Z", "pushedAt": "2025-10-27T23:47:43Z", "stars": 118261, "watchers": 776, "forks": 19476, "defaultBranch": "master" }, { "id": 20580498, "name": "kubernetes", "repo": "kubernetes/kubernetes", "description": "Production-Grade Container Scheduling and Management", "createdAt": "2014-06-06T22:56:04Z", "updatedAt": "2025-10-28T12:19:38Z", "pushedAt": "2025-10-28T10:29:37Z", "stars": 118246, "watchers": 3189, "forks": 41587, "defaultBranch": "master" }, { "id": 561730219, "name": "hello-algo", "repo": "krahets/hello-algo", "description": "《Hello 算法》:动画图解、一键运行的数据结构与算法教程。支持 Python, Java, C++, C, C#, JS, Go, Swift, Rust, Ruby, Kotlin, TS, Dart 代码。简体版和繁体版同步更新,English version in translation", "createdAt": "2022-11-04T11:08:34Z", "updatedAt": "2025-10-28T12:30:36Z", "pushedAt": "2025-10-16T21:33:36Z", "stars": 118105, "watchers": 583, "forks": 14500, "defaultBranch": "main" }, { "id": 626805178, "name": "dify", "repo": "langgenius/dify", "description": "Production-ready platform for agentic workflow development.", "createdAt": "2023-04-12T07:40:24Z", "updatedAt": "2025-10-28T12:18:46Z", "pushedAt": "2025-10-28T10:48:12Z", "stars": 117486, "watchers": 698, "forks": 18151, "defaultBranch": "main" }, { "id": 14098069, "name": "free-programming-books-zh_CN", "repo": "justjavac/free-programming-books-zh_CN", "description": ":books: 免费的计算机编程类中文书籍,欢迎投稿", "createdAt": "2013-11-04T01:59:19Z", "updatedAt": "2025-10-28T09:19:09Z", "pushedAt": "2024-07-15T08:55:20Z", "stars": 115543, "watchers": 5859, "forks": 28362, "defaultBranch": "main" }, { "id": 32484381, "name": "free-for-dev", "repo": "ripienaar/free-for-dev", "description": "A list of SaaS, PaaS and IaaS offerings that have free tiers of interest to devops and infradev", "createdAt": "2015-03-18T21:06:26Z", "updatedAt": "2025-10-28T11:38:56Z", "pushedAt": "2025-10-23T04:49:00Z", "stars": 114128, "watchers": 1735, "forks": 11684, "defaultBranch": "master" }, { "id": 27193779, "name": "node", "repo": "nodejs/node", "description": "Node.js JavaScript runtime ✨🐢🚀✨", "createdAt": "2014-11-26T19:57:11Z", "updatedAt": "2025-10-28T12:34:32Z", "pushedAt": "2025-10-28T11:29:04Z", "stars": 114019, "watchers": 2963, "forks": 33580, "defaultBranch": "main" }, { "id": 701547123, "name": "open-webui", "repo": "open-webui/open-webui", "description": "User-friendly AI Interface (Supports Ollama, OpenAI API, ...)", "createdAt": "2023-10-06T22:08:27Z", "updatedAt": "2025-10-28T12:22:47Z", "pushedAt": "2025-10-28T08:46:37Z", "stars": 113575, "watchers": 515, "forks": 15783, "defaultBranch": "main" }, { "id": 808144141, "name": "FreeDomain", "repo": "DigitalPlatDev/FreeDomain", "description": "DigitalPlat FreeDomain: Free Domain For Everyone", "createdAt": "2024-05-30T13:23:00Z", "updatedAt": "2025-10-28T12:40:49Z", "pushedAt": "2025-09-25T12:12:01Z", "stars": 111985, "watchers": 120, "forks": 2068, "defaultBranch": "main" }, { "id": 943149, "name": "d3", "repo": "d3/d3", "description": "Bring data to life with SVG, Canvas and HTML. :bar_chart::chart_with_upwards_trend::tada:", "createdAt": "2010-09-27T17:22:42Z", "updatedAt": "2025-10-28T09:47:08Z", "pushedAt": "2025-07-27T11:30:40Z", "stars": 111693, "watchers": 3558, "forks": 22850, "defaultBranch": "main" }, { "id": 231283452, "name": "excalidraw", "repo": "excalidraw/excalidraw", "description": "Virtual whiteboard for sketching hand-drawn like diagrams", "createdAt": "2020-01-02T01:04:43Z", "updatedAt": "2025-10-28T12:38:34Z", "pushedAt": "2025-10-28T11:43:31Z", "stars": 109315, "watchers": 467, "forks": 11345, "defaultBranch": "master" }, { "id": 576201, "name": "three.js", "repo": "mrdoob/three.js", "description": "JavaScript 3D Library.", "createdAt": "2010-03-23T18:58:01Z", "updatedAt": "2025-10-28T12:07:59Z", "pushedAt": "2025-10-28T12:13:11Z", "stars": 109143, "watchers": 2518, "forks": 36054, "defaultBranch": "dev" }, { "id": 23088740, "name": "axios", "repo": "axios/axios", "description": "Promise based HTTP client for the browser and node.js", "createdAt": "2014-08-18T22:30:27Z", "updatedAt": "2025-10-28T12:10:56Z", "pushedAt": "2025-10-27T19:08:10Z", "stars": 108032, "watchers": 1169, "forks": 11371, "defaultBranch": "v1.x" }, { "id": 724712, "name": "rust", "repo": "rust-lang/rust", "description": "Empowering everyone to build reliable and efficient software.", "createdAt": "2010-06-16T20:39:03Z", "updatedAt": "2025-10-28T12:40:15Z", "pushedAt": "2025-10-28T11:12:51Z", "stars": 107478, "watchers": 1468, "forks": 13900, "defaultBranch": "master" }, { "id": 20929025, "name": "TypeScript", "repo": "microsoft/TypeScript", "description": "TypeScript is a superset of JavaScript that compiles to clean JavaScript output.", "createdAt": "2014-06-17T15:28:39Z", "updatedAt": "2025-10-28T12:19:23Z", "pushedAt": "2025-10-27T23:52:12Z", "stars": 106557, "watchers": 2148, "forks": 13086, "defaultBranch": "main" }, { "id": 133442384, "name": "deno", "repo": "denoland/deno", "description": "A modern runtime for JavaScript and TypeScript.", "createdAt": "2018-05-15T01:34:26Z", "updatedAt": "2025-10-28T12:27:16Z", "pushedAt": "2025-10-28T09:10:45Z", "stars": 104939, "watchers": 1398, "forks": 5754, "defaultBranch": "main" }, { "id": 103633984, "name": "nodebestpractices", "repo": "goldbergyoni/nodebestpractices", "description": ":white_check_mark: The Node.js best practices list (July 2024)", "createdAt": "2017-09-15T08:33:19Z", "updatedAt": "2025-10-28T11:50:28Z", "pushedAt": "2025-04-15T21:52:42Z", "stars": 104455, "watchers": 1944, "forks": 10625, "defaultBranch": "master" }, { "id": 63537249, "name": "create-react-app", "repo": "facebook/create-react-app", "description": "Set up a modern web app by running one command.", "createdAt": "2016-07-17T14:55:11Z", "updatedAt": "2025-10-28T12:35:24Z", "pushedAt": "2025-02-15T01:32:11Z", "stars": 103813, "watchers": 1891, "forks": 27148, "defaultBranch": "main" }, { "id": 206462776, "name": "GitHub-Chinese-Top-Charts", "repo": "GrowingGit/GitHub-Chinese-Top-Charts", "description": ":cn: GitHub中文排行榜,各语言分设「软件 | 资料」榜单,精准定位中文好项目。各取所需,高效学习。", "createdAt": "2019-09-05T03:01:56Z", "updatedAt": "2025-10-28T10:36:09Z", "pushedAt": "2024-10-12T06:51:36Z", "stars": 103358, "watchers": 2607, "forks": 13363, "defaultBranch": "master" }, { "id": 15634981, "name": "godot", "repo": "godotengine/godot", "description": "Godot Engine – Multi-platform 2D and 3D game engine", "createdAt": "2014-01-04T16:05:36Z", "updatedAt": "2025-10-28T11:39:26Z", "pushedAt": "2025-10-28T08:43:09Z", "stars": 102655, "watchers": 1493, "forks": 23457, "defaultBranch": "master" }, { "id": 299354207, "name": "rustdesk", "repo": "rustdesk/rustdesk", "description": "An open-source remote desktop application designed for self-hosting, as an alternative to TeamViewer.", "createdAt": "2020-09-28T15:36:08Z", "updatedAt": "2025-10-28T12:27:03Z", "pushedAt": "2025-10-28T12:25:33Z", "stars": 101531, "watchers": 548, "forks": 14850, "defaultBranch": "master" }, { "id": 655806940, "name": "generative-ai-for-beginners", "repo": "microsoft/generative-ai-for-beginners", "description": "21 Lessons, Get Started Building with Generative AI ", "createdAt": "2023-06-19T16:28:59Z", "updatedAt": "2025-10-28T12:25:17Z", "pushedAt": "2025-10-27T03:19:39Z", "stars": 101010, "watchers": 887, "forks": 53526, "defaultBranch": "main" }, { "id": 100060912, "name": "terminal", "repo": "microsoft/terminal", "description": "The new Windows Terminal and the original Windows console host, all in the same place!", "createdAt": "2017-08-11T18:38:22Z", "updatedAt": "2025-10-28T12:08:57Z", "pushedAt": "2025-10-28T03:04:50Z", "stars": 100746, "watchers": 1334, "forks": 8879, "defaultBranch": "main" }, { "id": 48378947, "name": "frp", "repo": "fatedier/frp", "description": "A fast reverse proxy to help you expose a local server behind a NAT or firewall to the internet.", "createdAt": "2015-12-21T15:24:59Z", "updatedAt": "2025-10-28T11:57:26Z", "pushedAt": "2025-10-28T09:52:35Z", "stars": 100048, "watchers": 1564, "forks": 14567, "defaultBranch": "dev" }, { "id": 908531752, "name": "DeepSeek-V3", "repo": "deepseek-ai/DeepSeek-V3", "description": null, "createdAt": "2024-12-26T09:52:40Z", "updatedAt": "2025-10-28T12:11:53Z", "pushedAt": "2025-08-28T03:24:37Z", "stars": 100020, "watchers": 752, "forks": 16313, "defaultBranch": "main" }, { "id": 55076063, "name": "Awesome-Hacking", "repo": "Hack-with-Github/Awesome-Hacking", "description": "A collection of various awesome lists for hackers, pentesters and security researchers", "createdAt": "2016-03-30T15:47:10Z", "updatedAt": "2025-10-28T12:11:25Z", "pushedAt": "2025-01-18T01:48:02Z", "stars": 99746, "watchers": 3932, "forks": 9633, "defaultBranch": "master" }, { "id": 15204860, "name": "papers-we-love", "repo": "papers-we-love/papers-we-love", "description": "Papers from the computer science community to read and discuss.", "createdAt": "2013-12-15T14:31:41Z", "updatedAt": "2025-10-28T12:35:57Z", "pushedAt": "2025-10-10T15:35:14Z", "stars": 99660, "watchers": 3159, "forks": 6144, "defaultBranch": "main" }, { "id": 24195339, "name": "angular", "repo": "angular/angular", "description": "Deliver web apps with confidence 🚀", "createdAt": "2014-09-18T16:12:01Z", "updatedAt": "2025-10-28T11:07:05Z", "pushedAt": "2025-10-28T10:04:30Z", "stars": 99174, "watchers": 2980, "forks": 26730, "defaultBranch": "main" }, { "id": 585146387, "name": "ui", "repo": "shadcn-ui/ui", "description": "A set of beautifully-designed, accessible components and a code distribution platform. Works with your favorite frameworks. Open Source. Open Code.", "createdAt": "2023-01-04T12:43:27Z", "updatedAt": "2025-10-28T12:32:30Z", "pushedAt": "2025-10-28T12:41:17Z", "stars": 98552, "watchers": 307, "forks": 7046, "defaultBranch": "main" }, { "id": 196701619, "name": "tauri", "repo": "tauri-apps/tauri", "description": "Build smaller, faster, and more secure desktop and mobile applications with a web frontend.", "createdAt": "2019-07-13T09:09:37Z", "updatedAt": "2025-10-28T12:32:07Z", "pushedAt": "2025-10-28T10:29:35Z", "stars": 98262, "watchers": 530, "forks": 3139, "defaultBranch": "dev" }, { "id": 157616880, "name": "iptv", "repo": "iptv-org/iptv", "description": "Collection of publicly available IPTV channels from all over the world", "createdAt": "2018-11-14T22:00:57Z", "updatedAt": "2025-10-28T12:32:18Z", "pushedAt": "2025-10-28T00:11:46Z", "stars": 98083, "watchers": 1952, "forks": 4199, "defaultBranch": "master" }, { "id": 23083156, "name": "material-ui", "repo": "mui/material-ui", "description": "Material UI: Comprehensive React component library that implements Google's Material Design. Free forever.", "createdAt": "2014-08-18T19:11:54Z", "updatedAt": "2025-10-28T08:02:20Z", "pushedAt": "2025-10-28T06:08:34Z", "stars": 96887, "watchers": 1312, "forks": 32696, "defaultBranch": "master" }, { "id": 34526884, "name": "ant-design", "repo": "ant-design/ant-design", "description": "An enterprise-class UI design language and React UI library", "createdAt": "2015-04-24T15:37:24Z", "updatedAt": "2025-10-28T11:00:38Z", "pushedAt": "2025-10-28T10:52:44Z", "stars": 96472, "watchers": 236, "forks": 53890, "defaultBranch": "master" }, { "id": 243950408, "name": "HowToCook", "repo": "Anduin2017/HowToCook", "description": "程序员在家做饭方法指南。Programmer's guide about how to cook at home (Simplified Chinese only).", "createdAt": "2020-02-29T10:43:49Z", "updatedAt": "2025-10-28T12:35:03Z", "pushedAt": "2025-10-28T11:30:11Z", "stars": 95425, "watchers": 488, "forks": 10651, "defaultBranch": "master" }, { "id": 33614304, "name": "thefuck", "repo": "nvbn/thefuck", "description": "Magnificent app which corrects your previous console command.", "createdAt": "2015-04-08T15:08:04Z", "updatedAt": "2025-10-28T12:34:25Z", "pushedAt": "2024-07-19T14:56:13Z", "stars": 94497, "watchers": 825, "forks": 3792, "defaultBranch": "master" }, { "id": 65600975, "name": "pytorch", "repo": "pytorch/pytorch", "description": "Tensors and Dynamic neural networks in Python with strong GPU acceleration", "createdAt": "2016-08-13T05:26:41Z", "updatedAt": "2025-10-28T12:25:28Z", "pushedAt": "2025-10-28T12:40:19Z", "stars": 94326, "watchers": 1770, "forks": 25678, "defaultBranch": "main" }, { "id": 74791366, "name": "clean-code-javascript", "repo": "ryanmcdermott/clean-code-javascript", "description": "Clean Code concepts adapted for JavaScript", "createdAt": "2016-11-25T22:25:41Z", "updatedAt": "2025-10-28T08:55:17Z", "pushedAt": "2024-07-29T07:24:37Z", "stars": 93959, "watchers": 1744, "forks": 12495, "defaultBranch": "master" }, { "id": 101296881, "name": "every-programmer-should-know", "repo": "mtdvio/every-programmer-should-know", "description": "A collection of (mostly) technical things every software developer should know about", "createdAt": "2017-08-24T13:18:26Z", "updatedAt": "2025-10-28T11:51:44Z", "pushedAt": "2025-10-22T15:21:18Z", "stars": 93832, "watchers": 2011, "forks": 8437, "defaultBranch": "master" }, { "id": 16408992, "name": "neovim", "repo": "neovim/neovim", "description": "Vim-fork focused on extensibility and usability", "createdAt": "2014-01-31T13:39:22Z", "updatedAt": "2025-10-28T12:38:30Z", "pushedAt": "2025-10-28T08:45:46Z", "stars": 93768, "watchers": 972, "forks": 6376, "defaultBranch": "master" }, { "id": 943398999, "name": "system-prompts-and-models-of-ai-tools", "repo": "x1xhlol/system-prompts-and-models-of-ai-tools", "description": "FULL Augment Code, Claude Code, Cluely, CodeBuddy, Comet, Cursor, Devin AI, Junie, Kiro, Leap.new, Lovable, Manus Agent Tools, NotionAI, Orchids.app, Perplexity, Poke, Qoder, Replit, Same.dev, Trae, Traycer AI, VSCode Agent, Warp.dev, Windsurf, Xcode, Z.ai Code, dia & v0. (And other Open Sourced) System Prompts, Internal Tools & AI Models", "createdAt": "2025-03-05T16:38:29Z", "updatedAt": "2025-10-28T12:37:42Z", "pushedAt": "2025-10-19T18:44:24Z", "stars": 93450, "watchers": 1183, "forks": 25250, "defaultBranch": "main" }, { "id": 22790488, "name": "java-design-patterns", "repo": "iluwatar/java-design-patterns", "description": "Design patterns implemented in Java", "createdAt": "2014-08-09T16:45:18Z", "updatedAt": "2025-10-28T11:55:32Z", "pushedAt": "2025-10-21T21:30:34Z", "stars": 93230, "watchers": 3717, "forks": 27312, "defaultBranch": "master" }, { "id": 90796663, "name": "puppeteer", "repo": "puppeteer/puppeteer", "description": "JavaScript API for Chrome and Firefox", "createdAt": "2017-05-09T22:16:13Z", "updatedAt": "2025-10-28T11:55:21Z", "pushedAt": "2025-10-28T11:35:29Z", "stars": 92732, "watchers": 1184, "forks": 9314, "defaultBranch": "main" }, { "id": 311525798, "name": "Web-Dev-For-Beginners", "repo": "microsoft/Web-Dev-For-Beginners", "description": "24 Lessons, 12 Weeks, Get Started as a Web Developer", "createdAt": "2020-11-10T02:44:00Z", "updatedAt": "2025-10-28T12:11:24Z", "pushedAt": "2025-10-27T13:01:13Z", "stars": 92494, "watchers": 2690, "forks": 14334, "defaultBranch": "main" }, { "id": 589831718, "name": "ComfyUI", "repo": "comfyanonymous/ComfyUI", "description": "The most powerful and modular diffusion model GUI, api and backend with a graph/nodes interface.", "createdAt": "2023-01-17T03:15:56Z", "updatedAt": "2025-10-28T12:38:44Z", "pushedAt": "2025-10-28T08:45:49Z", "stars": 92150, "watchers": 615, "forks": 10367, "defaultBranch": "master" }, { "id": 63539055, "name": "awesome-mac", "repo": "jaywcjlove/awesome-mac", "description": " Now we have become very big, Different from the original idea. Collect premium software in various categories.", "createdAt": "2016-07-17T15:33:47Z", "updatedAt": "2025-10-28T12:29:52Z", "pushedAt": "2025-10-27T17:27:24Z", "stars": 91942, "watchers": 1517, "forks": 6956, "defaultBranch": "master" }, { "id": 919443098, "name": "DeepSeek-R1", "repo": "deepseek-ai/DeepSeek-R1", "description": null, "createdAt": "2025-01-20T11:57:28Z", "updatedAt": "2025-10-28T12:33:45Z", "pushedAt": "2025-06-27T08:35:54Z", "stars": 91406, "watchers": 607, "forks": 11768, "defaultBranch": "main" }, { "id": 160919119, "name": "fastapi", "repo": "fastapi/fastapi", "description": "FastAPI framework, high performance, easy to learn, fast to code, ready for production", "createdAt": "2018-12-08T08:21:47Z", "updatedAt": "2025-10-28T11:31:45Z", "pushedAt": "2025-10-28T07:50:29Z", "stars": 91252, "watchers": 721, "forks": 8135, "defaultBranch": "master" }, { "id": 106017343, "name": "tailwindcss", "repo": "tailwindlabs/tailwindcss", "description": "A utility-first CSS framework for rapid UI development.", "createdAt": "2017-10-06T14:59:14Z", "updatedAt": "2025-10-28T12:25:13Z", "pushedAt": "2025-10-28T12:25:08Z", "stars": 90816, "watchers": 615, "forks": 4766, "defaultBranch": "main" } ] ================================================ FILE: benchmarks/package.json ================================================ { "name": "@toon/benchmarks", "type": "module", "private": true, "scripts": { "benchmark:tokens": "node scripts/token-efficiency-benchmark.ts", "benchmark:accuracy": "node --env-file=.env scripts/accuracy-benchmark.ts", "fetch:github-repos": "node scripts/fetch-github-repos.ts" }, "devDependencies": { "@ai-sdk/anthropic": "^3.0.58", "@ai-sdk/google": "^3.0.43", "@ai-sdk/openai": "^3.0.41", "@ai-sdk/provider": "^3.0.8", "@ai-sdk/xai": "^3.0.67", "@clack/prompts": "^1.1.0", "@faker-js/faker": "^10.3.0", "ai": "^6.0.116", "csv-stringify": "^6.6.0", "fast-xml-parser": "^5.4.2", "gpt-tokenizer": "^3.4.0", "ofetch": "^1.5.1", "p-map": "^7.0.4", "p-queue": "^9.1.0", "unstorage": "^1.17.4", "yaml": "^2.8.2" } } ================================================ FILE: benchmarks/results/accuracy/models/claude-haiku-4-5-20251001 ================================================ [{"questionId":"q1","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":7939,"outputTokens":6,"latencyMs":1488.7717919999996},{"questionId":"q1","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":4830,"outputTokens":6,"latencyMs":1424.8316250000007},{"questionId":"q1","format":"toon","model":"claude-haiku-4-5-20251001","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":3079,"outputTokens":6,"latencyMs":1200.6324999999997},{"questionId":"q1","format":"csv","model":"claude-haiku-4-5-20251001","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":2925,"outputTokens":6,"latencyMs":1187.5327080000006},{"questionId":"q1","format":"xml","model":"claude-haiku-4-5-20251001","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":9424,"outputTokens":6,"latencyMs":1200.2132079999992},{"questionId":"q1","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":5830,"outputTokens":6,"latencyMs":1341.1957500000008},{"questionId":"q2","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":7938,"outputTokens":4,"latencyMs":1182.2189579999995},{"questionId":"q2","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":4829,"outputTokens":4,"latencyMs":1191.913125000001},{"questionId":"q2","format":"toon","model":"claude-haiku-4-5-20251001","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":3078,"outputTokens":4,"latencyMs":1393.160915999999},{"questionId":"q2","format":"csv","model":"claude-haiku-4-5-20251001","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":2924,"outputTokens":4,"latencyMs":1192.7132500000007},{"questionId":"q2","format":"xml","model":"claude-haiku-4-5-20251001","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":9423,"outputTokens":4,"latencyMs":1360.8396249999987},{"questionId":"q2","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":5829,"outputTokens":4,"latencyMs":1619.3704579999994},{"questionId":"q3","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":7943,"outputTokens":12,"latencyMs":1131.9942499999997},{"questionId":"q3","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":4834,"outputTokens":12,"latencyMs":1391.2939580000002},{"questionId":"q3","format":"toon","model":"claude-haiku-4-5-20251001","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":3083,"outputTokens":12,"latencyMs":1181.2237920000007},{"questionId":"q3","format":"csv","model":"claude-haiku-4-5-20251001","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":2929,"outputTokens":12,"latencyMs":1098.3214580000003},{"questionId":"q3","format":"xml","model":"claude-haiku-4-5-20251001","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":9428,"outputTokens":12,"latencyMs":1468.2502499999991},{"questionId":"q3","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":5834,"outputTokens":12,"latencyMs":1044.1940839999988},{"questionId":"q4","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"22","actual":"22","isCorrect":true,"inputTokens":7941,"outputTokens":5,"latencyMs":1145.9674579999992},{"questionId":"q4","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"22","actual":"22","isCorrect":true,"inputTokens":4832,"outputTokens":5,"latencyMs":1282.7140419999996},{"questionId":"q4","format":"toon","model":"claude-haiku-4-5-20251001","expected":"22","actual":"22","isCorrect":true,"inputTokens":3081,"outputTokens":5,"latencyMs":993.7237079999995},{"questionId":"q4","format":"csv","model":"claude-haiku-4-5-20251001","expected":"22","actual":"22","isCorrect":true,"inputTokens":2927,"outputTokens":5,"latencyMs":1029.5127499999999},{"questionId":"q4","format":"xml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"22","isCorrect":true,"inputTokens":9426,"outputTokens":5,"latencyMs":1170.4240829999999},{"questionId":"q4","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"22","isCorrect":true,"inputTokens":5832,"outputTokens":5,"latencyMs":1297.9727910000001},{"questionId":"q5","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"no","actual":"false","isCorrect":true,"inputTokens":7936,"outputTokens":4,"latencyMs":1060.0110000000004},{"questionId":"q5","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"no","actual":"false","isCorrect":true,"inputTokens":4827,"outputTokens":4,"latencyMs":932.3761250000007},{"questionId":"q5","format":"toon","model":"claude-haiku-4-5-20251001","expected":"no","actual":"false","isCorrect":true,"inputTokens":3076,"outputTokens":4,"latencyMs":967.2909170000003},{"questionId":"q5","format":"csv","model":"claude-haiku-4-5-20251001","expected":"no","actual":"0","isCorrect":true,"inputTokens":2922,"outputTokens":5,"latencyMs":989.7537499999999},{"questionId":"q5","format":"xml","model":"claude-haiku-4-5-20251001","expected":"no","actual":"false","isCorrect":true,"inputTokens":9421,"outputTokens":4,"latencyMs":1163.8299580000003},{"questionId":"q5","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"no","actual":"false","isCorrect":true,"inputTokens":5827,"outputTokens":4,"latencyMs":1012.1698340000003},{"questionId":"q6","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":7939,"outputTokens":6,"latencyMs":1043.6857499999987},{"questionId":"q6","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":4830,"outputTokens":6,"latencyMs":1741.9372920000005},{"questionId":"q6","format":"toon","model":"claude-haiku-4-5-20251001","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":3079,"outputTokens":6,"latencyMs":1061.195040999999},{"questionId":"q6","format":"csv","model":"claude-haiku-4-5-20251001","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":2925,"outputTokens":6,"latencyMs":897.5309579999994},{"questionId":"q6","format":"xml","model":"claude-haiku-4-5-20251001","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":9424,"outputTokens":6,"latencyMs":1091.2617499999997},{"questionId":"q6","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":5830,"outputTokens":6,"latencyMs":1198.945791},{"questionId":"q7","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":7939,"outputTokens":4,"latencyMs":1124.808833000001},{"questionId":"q7","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":4830,"outputTokens":4,"latencyMs":1004.0825409999998},{"questionId":"q7","format":"toon","model":"claude-haiku-4-5-20251001","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":3079,"outputTokens":4,"latencyMs":1026.612874999999},{"questionId":"q7","format":"csv","model":"claude-haiku-4-5-20251001","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":2925,"outputTokens":4,"latencyMs":900.933500000001},{"questionId":"q7","format":"xml","model":"claude-haiku-4-5-20251001","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":9424,"outputTokens":4,"latencyMs":1537.3743749999994},{"questionId":"q7","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":5830,"outputTokens":4,"latencyMs":1069.091042},{"questionId":"q8","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":7941,"outputTokens":12,"latencyMs":1463.4106250000004},{"questionId":"q8","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":4832,"outputTokens":12,"latencyMs":1045.6618749999998},{"questionId":"q8","format":"toon","model":"claude-haiku-4-5-20251001","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":3081,"outputTokens":12,"latencyMs":1144.8265419999989},{"questionId":"q8","format":"csv","model":"claude-haiku-4-5-20251001","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":2927,"outputTokens":12,"latencyMs":1266.5881250000002},{"questionId":"q8","format":"xml","model":"claude-haiku-4-5-20251001","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":9426,"outputTokens":12,"latencyMs":1094.5647079999999},{"questionId":"q8","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":5832,"outputTokens":12,"latencyMs":1037.4817500000008},{"questionId":"q9","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":7941,"outputTokens":5,"latencyMs":1064.6803340000006},{"questionId":"q9","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":4832,"outputTokens":5,"latencyMs":1234.0882500000007},{"questionId":"q9","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":3081,"outputTokens":5,"latencyMs":1072.323041999989},{"questionId":"q9","format":"csv","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":2927,"outputTokens":5,"latencyMs":1155.7975410000072},{"questionId":"q9","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":9426,"outputTokens":5,"latencyMs":1583.6992499999906},{"questionId":"q9","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":5832,"outputTokens":5,"latencyMs":1564.031124999994},{"questionId":"q10","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"yes","actual":"true","isCorrect":true,"inputTokens":7938,"outputTokens":4,"latencyMs":1184.0812499999884},{"questionId":"q10","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"yes","actual":"true","isCorrect":true,"inputTokens":4829,"outputTokens":4,"latencyMs":1815.7772499999992},{"questionId":"q10","format":"toon","model":"claude-haiku-4-5-20251001","expected":"yes","actual":"true","isCorrect":true,"inputTokens":3078,"outputTokens":4,"latencyMs":1103.1678749999992},{"questionId":"q10","format":"csv","model":"claude-haiku-4-5-20251001","expected":"yes","actual":"1","isCorrect":true,"inputTokens":2924,"outputTokens":5,"latencyMs":1059.914082999996},{"questionId":"q10","format":"xml","model":"claude-haiku-4-5-20251001","expected":"yes","actual":"true","isCorrect":true,"inputTokens":9423,"outputTokens":4,"latencyMs":1852.225999999995},{"questionId":"q10","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"yes","actual":"true","isCorrect":true,"inputTokens":5829,"outputTokens":4,"latencyMs":1238.4458329999907},{"questionId":"q11","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":7938,"outputTokens":6,"latencyMs":1264.0889580000076},{"questionId":"q11","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":4829,"outputTokens":6,"latencyMs":984.5701249999984},{"questionId":"q11","format":"toon","model":"claude-haiku-4-5-20251001","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":3078,"outputTokens":6,"latencyMs":1400.2183340000047},{"questionId":"q11","format":"csv","model":"claude-haiku-4-5-20251001","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":2924,"outputTokens":6,"latencyMs":891.1542500000069},{"questionId":"q11","format":"xml","model":"claude-haiku-4-5-20251001","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":9423,"outputTokens":6,"latencyMs":1176.492333000002},{"questionId":"q11","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":5829,"outputTokens":6,"latencyMs":1310.523291999998},{"questionId":"q12","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":7935,"outputTokens":4,"latencyMs":1027.5788330000069},{"questionId":"q12","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":4826,"outputTokens":4,"latencyMs":992.2010420000006},{"questionId":"q12","format":"toon","model":"claude-haiku-4-5-20251001","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":3075,"outputTokens":4,"latencyMs":1090.145749999996},{"questionId":"q12","format":"csv","model":"claude-haiku-4-5-20251001","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":2921,"outputTokens":4,"latencyMs":867.523457999996},{"questionId":"q12","format":"xml","model":"claude-haiku-4-5-20251001","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":9420,"outputTokens":4,"latencyMs":1272.2972080000036},{"questionId":"q12","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":5826,"outputTokens":4,"latencyMs":1223.0700840000063},{"questionId":"q13","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":7934,"outputTokens":5,"latencyMs":1089.1235420000012},{"questionId":"q13","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":4825,"outputTokens":5,"latencyMs":992.0438330000034},{"questionId":"q13","format":"toon","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":3074,"outputTokens":5,"latencyMs":1052.5001670000056},{"questionId":"q13","format":"csv","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":2920,"outputTokens":5,"latencyMs":893.6204160000052},{"questionId":"q13","format":"xml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":9419,"outputTokens":5,"latencyMs":1324.1672920000128},{"questionId":"q13","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":5825,"outputTokens":5,"latencyMs":1415.1354579999897},{"questionId":"q14","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"17","actual":"13","isCorrect":false,"inputTokens":7934,"outputTokens":5,"latencyMs":1215.046291000006},{"questionId":"q14","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":4825,"outputTokens":5,"latencyMs":1065.2862919999898},{"questionId":"q14","format":"toon","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":3074,"outputTokens":5,"latencyMs":973.8248339999991},{"questionId":"q14","format":"csv","model":"claude-haiku-4-5-20251001","expected":"17","actual":"14","isCorrect":false,"inputTokens":2920,"outputTokens":5,"latencyMs":1048.0387499999924},{"questionId":"q14","format":"xml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":9419,"outputTokens":5,"latencyMs":4246.3034999999945},{"questionId":"q14","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":5825,"outputTokens":5,"latencyMs":3089.0154579999944},{"questionId":"q15","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"17","actual":"13","isCorrect":false,"inputTokens":7934,"outputTokens":5,"latencyMs":1041.0287499999977},{"questionId":"q15","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":4825,"outputTokens":5,"latencyMs":1126.9441249999945},{"questionId":"q15","format":"toon","model":"claude-haiku-4-5-20251001","expected":"17","actual":"12","isCorrect":false,"inputTokens":3074,"outputTokens":5,"latencyMs":1174.3665829999954},{"questionId":"q15","format":"csv","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":2920,"outputTokens":5,"latencyMs":1421.4708329999994},{"questionId":"q15","format":"xml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":9419,"outputTokens":5,"latencyMs":1255.9135000000097},{"questionId":"q15","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"14","isCorrect":false,"inputTokens":5825,"outputTokens":5,"latencyMs":1288.064916000003},{"questionId":"q16","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"91","actual":"85","isCorrect":false,"inputTokens":7939,"outputTokens":5,"latencyMs":1213.8851250000007},{"questionId":"q16","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"91","actual":"78","isCorrect":false,"inputTokens":4830,"outputTokens":5,"latencyMs":1034.356375000003},{"questionId":"q16","format":"toon","model":"claude-haiku-4-5-20251001","expected":"91","actual":"85","isCorrect":false,"inputTokens":3079,"outputTokens":5,"latencyMs":1150.7799589999922},{"questionId":"q16","format":"csv","model":"claude-haiku-4-5-20251001","expected":"91","actual":"76","isCorrect":false,"inputTokens":2925,"outputTokens":5,"latencyMs":927.4066250000033},{"questionId":"q16","format":"xml","model":"claude-haiku-4-5-20251001","expected":"91","actual":"92","isCorrect":false,"inputTokens":9424,"outputTokens":5,"latencyMs":989.8042080000014},{"questionId":"q16","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"91","actual":"91","isCorrect":true,"inputTokens":5830,"outputTokens":5,"latencyMs":1249.2909999999974},{"questionId":"q17","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"67","actual":"57","isCorrect":false,"inputTokens":7939,"outputTokens":5,"latencyMs":1313.7873749999999},{"questionId":"q17","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"67","actual":"60","isCorrect":false,"inputTokens":4830,"outputTokens":5,"latencyMs":1045.1794999999984},{"questionId":"q17","format":"toon","model":"claude-haiku-4-5-20251001","expected":"67","actual":"46","isCorrect":false,"inputTokens":3079,"outputTokens":5,"latencyMs":985.7277500000055},{"questionId":"q17","format":"csv","model":"claude-haiku-4-5-20251001","expected":"67","actual":"42","isCorrect":false,"inputTokens":2925,"outputTokens":5,"latencyMs":964.684500000003},{"questionId":"q17","format":"xml","model":"claude-haiku-4-5-20251001","expected":"67","actual":"58","isCorrect":false,"inputTokens":9424,"outputTokens":5,"latencyMs":1365.7662500000006},{"questionId":"q17","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"67","actual":"61","isCorrect":false,"inputTokens":5830,"outputTokens":5,"latencyMs":1559.340124999988},{"questionId":"q18","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"41","actual":"34","isCorrect":false,"inputTokens":7939,"outputTokens":5,"latencyMs":1418.1319579999981},{"questionId":"q18","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"41","actual":"26","isCorrect":false,"inputTokens":4830,"outputTokens":5,"latencyMs":1229.3413750000036},{"questionId":"q18","format":"toon","model":"claude-haiku-4-5-20251001","expected":"41","actual":"26","isCorrect":false,"inputTokens":3079,"outputTokens":5,"latencyMs":1237.7892920000013},{"questionId":"q18","format":"csv","model":"claude-haiku-4-5-20251001","expected":"41","actual":"26","isCorrect":false,"inputTokens":2925,"outputTokens":5,"latencyMs":1149.4620410000061},{"questionId":"q18","format":"xml","model":"claude-haiku-4-5-20251001","expected":"41","actual":"34","isCorrect":false,"inputTokens":9424,"outputTokens":5,"latencyMs":1498.104582999993},{"questionId":"q18","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"41","actual":"28","isCorrect":false,"inputTokens":5830,"outputTokens":5,"latencyMs":1437.632666999998},{"questionId":"q19","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":7935,"outputTokens":5,"latencyMs":1284.2168329999986},{"questionId":"q19","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":4826,"outputTokens":5,"latencyMs":1304.1590419999993},{"questionId":"q19","format":"toon","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":3075,"outputTokens":5,"latencyMs":1151.4296670000185},{"questionId":"q19","format":"csv","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":2921,"outputTokens":5,"latencyMs":1547.9670420000039},{"questionId":"q19","format":"xml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":9420,"outputTokens":5,"latencyMs":1110.2993750000169},{"questionId":"q19","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":5826,"outputTokens":5,"latencyMs":1029.0622079999885},{"questionId":"q20","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"96503","actual":"99857.49","isCorrect":false,"inputTokens":7936,"outputTokens":8,"latencyMs":1178.934208000006},{"questionId":"q20","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"96503","actual":"100657.68","isCorrect":false,"inputTokens":4827,"outputTokens":8,"latencyMs":1070.5215410000092},{"questionId":"q20","format":"toon","model":"claude-haiku-4-5-20251001","expected":"96503","actual":"97474.27","isCorrect":false,"inputTokens":3076,"outputTokens":8,"latencyMs":1914.8881249999831},{"questionId":"q20","format":"csv","model":"claude-haiku-4-5-20251001","expected":"96503","actual":"98208.5","isCorrect":false,"inputTokens":2922,"outputTokens":8,"latencyMs":1134.84874999999},{"questionId":"q20","format":"xml","model":"claude-haiku-4-5-20251001","expected":"96503","actual":"100591.83","isCorrect":false,"inputTokens":9421,"outputTokens":8,"latencyMs":1201.0446670000092},{"questionId":"q20","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"96503","actual":"100560.31","isCorrect":false,"inputTokens":5827,"outputTokens":8,"latencyMs":1365.4649169999902},{"questionId":"q21","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"78","actual":"78","isCorrect":true,"inputTokens":7933,"outputTokens":5,"latencyMs":1057.5927919999813},{"questionId":"q21","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"78","actual":"73","isCorrect":false,"inputTokens":4824,"outputTokens":5,"latencyMs":984.6357499999867},{"questionId":"q21","format":"toon","model":"claude-haiku-4-5-20251001","expected":"78","actual":"80","isCorrect":false,"inputTokens":3073,"outputTokens":5,"latencyMs":961.1789580000041},{"questionId":"q21","format":"csv","model":"claude-haiku-4-5-20251001","expected":"78","actual":"75","isCorrect":false,"inputTokens":2919,"outputTokens":5,"latencyMs":1046.232457999984},{"questionId":"q21","format":"xml","model":"claude-haiku-4-5-20251001","expected":"78","actual":"76","isCorrect":false,"inputTokens":9418,"outputTokens":5,"latencyMs":1062.490832999989},{"questionId":"q21","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"78","actual":"76","isCorrect":false,"inputTokens":5824,"outputTokens":5,"latencyMs":1386.1599170000118},{"questionId":"q22","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":7933,"outputTokens":5,"latencyMs":1155.2419999999984},{"questionId":"q22","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":4824,"outputTokens":5,"latencyMs":984.3614579999994},{"questionId":"q22","format":"toon","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":3073,"outputTokens":5,"latencyMs":2339.569790999987},{"questionId":"q22","format":"csv","model":"claude-haiku-4-5-20251001","expected":"22","actual":"22","isCorrect":true,"inputTokens":2919,"outputTokens":5,"latencyMs":1645.8104999999923},{"questionId":"q22","format":"xml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":9418,"outputTokens":5,"latencyMs":1110.3421669999952},{"questionId":"q22","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":5824,"outputTokens":5,"latencyMs":1158.1035830000183},{"questionId":"q23","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"12","actual":"11","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":1850.1329160000023},{"questionId":"q23","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"12","actual":"9","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1923.266666999989},{"questionId":"q23","format":"toon","model":"claude-haiku-4-5-20251001","expected":"12","actual":"8","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":1092.6574580000015},{"questionId":"q23","format":"csv","model":"claude-haiku-4-5-20251001","expected":"12","actual":"10","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":993.8929580000113},{"questionId":"q23","format":"xml","model":"claude-haiku-4-5-20251001","expected":"12","actual":"11","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":1253.2862920000043},{"questionId":"q23","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"12","actual":"10","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":1106.7149579999968},{"questionId":"q24","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"11","actual":"6","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":1006.815042000002},{"questionId":"q24","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1115.0337080000027},{"questionId":"q24","format":"toon","model":"claude-haiku-4-5-20251001","expected":"11","actual":"6","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":1048.2592920000025},{"questionId":"q24","format":"csv","model":"claude-haiku-4-5-20251001","expected":"11","actual":"6","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":910.7634580000013},{"questionId":"q24","format":"xml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":119476.61179200001},{"questionId":"q24","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":982.0880420000176},{"questionId":"q25","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"11","actual":"9","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":1282.5784159999748},{"questionId":"q25","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"11","actual":"8","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1083.9630830000096},{"questionId":"q25","format":"toon","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":927.612374999997},{"questionId":"q25","format":"csv","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":954.8622909999976},{"questionId":"q25","format":"xml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"9","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":4318.050334},{"questionId":"q25","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":1295.8421249999956},{"questionId":"q26","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"12","actual":"8","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":1169.7239170000248},{"questionId":"q26","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"12","actual":"8","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1160.2663750000065},{"questionId":"q26","format":"toon","model":"claude-haiku-4-5-20251001","expected":"12","actual":"6","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":1324.3053749999963},{"questionId":"q26","format":"csv","model":"claude-haiku-4-5-20251001","expected":"12","actual":"6","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":1237.4985830000078},{"questionId":"q26","format":"xml","model":"claude-haiku-4-5-20251001","expected":"12","actual":"8","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":2209.9241660000116},{"questionId":"q26","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"12","actual":"7","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":1174.278707999998},{"questionId":"q27","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":1188.8322920000064},{"questionId":"q27","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"11","actual":"8","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1392.668875000003},{"questionId":"q27","format":"toon","model":"claude-haiku-4-5-20251001","expected":"11","actual":"6","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":1144.9836670000223},{"questionId":"q27","format":"csv","model":"claude-haiku-4-5-20251001","expected":"11","actual":"8","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":1185.1800420000218},{"questionId":"q27","format":"xml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"7","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":1109.5572499999835},{"questionId":"q27","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"6","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":1004.8929999999818},{"questionId":"q28","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"63","actual":"72","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":1536.55349999998},{"questionId":"q28","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"63","actual":"67","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1200.943041999999},{"questionId":"q28","format":"toon","model":"claude-haiku-4-5-20251001","expected":"63","actual":"72","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":1219.1552500000107},{"questionId":"q28","format":"csv","model":"claude-haiku-4-5-20251001","expected":"63","actual":"62","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":1005.1289589999942},{"questionId":"q28","format":"xml","model":"claude-haiku-4-5-20251001","expected":"63","actual":"62","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":1041.4887500000186},{"questionId":"q28","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"63","actual":"68","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":1143.78916700001},{"questionId":"q29","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"53","actual":"54","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":1129.352041999984},{"questionId":"q29","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"53","actual":"56","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1346.812042000005},{"questionId":"q29","format":"toon","model":"claude-haiku-4-5-20251001","expected":"53","actual":"57","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":1013.5596249999944},{"questionId":"q29","format":"csv","model":"claude-haiku-4-5-20251001","expected":"53","actual":"45","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":1137.8311660000181},{"questionId":"q29","format":"xml","model":"claude-haiku-4-5-20251001","expected":"53","actual":"57","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":1281.3305829999736},{"questionId":"q29","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"53","actual":"62","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":1146.0332919999782},{"questionId":"q30","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"39","actual":"28","isCorrect":false,"inputTokens":7941,"outputTokens":5,"latencyMs":962.1130420000118},{"questionId":"q30","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"39","actual":"34","isCorrect":false,"inputTokens":4832,"outputTokens":5,"latencyMs":1271.9399580000027},{"questionId":"q30","format":"toon","model":"claude-haiku-4-5-20251001","expected":"39","actual":"33","isCorrect":false,"inputTokens":3081,"outputTokens":5,"latencyMs":1231.0171670000127},{"questionId":"q30","format":"csv","model":"claude-haiku-4-5-20251001","expected":"39","actual":"27","isCorrect":false,"inputTokens":2927,"outputTokens":5,"latencyMs":1907.3603749999893},{"questionId":"q30","format":"xml","model":"claude-haiku-4-5-20251001","expected":"39","actual":"32","isCorrect":false,"inputTokens":9426,"outputTokens":5,"latencyMs":1237.180583999987},{"questionId":"q30","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"39","actual":"34","isCorrect":false,"inputTokens":5832,"outputTokens":5,"latencyMs":1330.3151660000149},{"questionId":"q31","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"11","actual":"8","isCorrect":false,"inputTokens":7942,"outputTokens":5,"latencyMs":1211.030208000011},{"questionId":"q31","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"11","actual":"9","isCorrect":false,"inputTokens":4833,"outputTokens":5,"latencyMs":1125.4293749999779},{"questionId":"q31","format":"toon","model":"claude-haiku-4-5-20251001","expected":"11","actual":"9","isCorrect":false,"inputTokens":3082,"outputTokens":5,"latencyMs":1125.0806660000235},{"questionId":"q31","format":"csv","model":"claude-haiku-4-5-20251001","expected":"11","actual":"10","isCorrect":false,"inputTokens":2928,"outputTokens":5,"latencyMs":1203.0037089999823},{"questionId":"q31","format":"xml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"8","isCorrect":false,"inputTokens":9427,"outputTokens":5,"latencyMs":1321.5858330000192},{"questionId":"q31","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"9","isCorrect":false,"inputTokens":5833,"outputTokens":5,"latencyMs":1174.450708999997},{"questionId":"q32","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"8","actual":"7","isCorrect":false,"inputTokens":7942,"outputTokens":5,"latencyMs":1131.7181249999849},{"questionId":"q32","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"8","actual":"7","isCorrect":false,"inputTokens":4833,"outputTokens":5,"latencyMs":1292.2494589999842},{"questionId":"q32","format":"toon","model":"claude-haiku-4-5-20251001","expected":"8","actual":"8","isCorrect":true,"inputTokens":3082,"outputTokens":5,"latencyMs":1056.2060000000056},{"questionId":"q32","format":"csv","model":"claude-haiku-4-5-20251001","expected":"8","actual":"9","isCorrect":false,"inputTokens":2928,"outputTokens":5,"latencyMs":914.0282920000027},{"questionId":"q32","format":"xml","model":"claude-haiku-4-5-20251001","expected":"8","actual":"8","isCorrect":true,"inputTokens":9427,"outputTokens":5,"latencyMs":1058.9597080000094},{"questionId":"q32","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"8","actual":"9","isCorrect":false,"inputTokens":5833,"outputTokens":5,"latencyMs":1138.2416660000163},{"questionId":"q33","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"15","actual":"9","isCorrect":false,"inputTokens":7942,"outputTokens":5,"latencyMs":1159.8052090000128},{"questionId":"q33","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"15","actual":"8","isCorrect":false,"inputTokens":4833,"outputTokens":5,"latencyMs":1179.3050000000221},{"questionId":"q33","format":"toon","model":"claude-haiku-4-5-20251001","expected":"15","actual":"7","isCorrect":false,"inputTokens":3082,"outputTokens":5,"latencyMs":2072.5861670000013},{"questionId":"q33","format":"csv","model":"claude-haiku-4-5-20251001","expected":"15","actual":"8","isCorrect":false,"inputTokens":2928,"outputTokens":5,"latencyMs":1516.0497909999976},{"questionId":"q33","format":"xml","model":"claude-haiku-4-5-20251001","expected":"15","actual":"9","isCorrect":false,"inputTokens":9427,"outputTokens":5,"latencyMs":1098.749375000014},{"questionId":"q33","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"15","actual":"9","isCorrect":false,"inputTokens":5833,"outputTokens":5,"latencyMs":1028.6647499999963},{"questionId":"q34","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"12","actual":"14","isCorrect":false,"inputTokens":7935,"outputTokens":5,"latencyMs":1599.851416999998},{"questionId":"q34","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"12","actual":"11","isCorrect":false,"inputTokens":4826,"outputTokens":5,"latencyMs":1247.702500000014},{"questionId":"q34","format":"toon","model":"claude-haiku-4-5-20251001","expected":"12","actual":"14","isCorrect":false,"inputTokens":3075,"outputTokens":5,"latencyMs":1222.0808750000142},{"questionId":"q34","format":"csv","model":"claude-haiku-4-5-20251001","expected":"12","actual":"13","isCorrect":false,"inputTokens":2921,"outputTokens":5,"latencyMs":1043.5218340000138},{"questionId":"q34","format":"xml","model":"claude-haiku-4-5-20251001","expected":"12","actual":"13","isCorrect":false,"inputTokens":9420,"outputTokens":5,"latencyMs":1324.776125000004},{"questionId":"q34","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"12","actual":"13","isCorrect":false,"inputTokens":5826,"outputTokens":5,"latencyMs":1299.7890419999894},{"questionId":"q35","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"11","actual":"13","isCorrect":false,"inputTokens":7935,"outputTokens":5,"latencyMs":1194.0861659999937},{"questionId":"q35","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"11","actual":"11","isCorrect":true,"inputTokens":4826,"outputTokens":5,"latencyMs":1254.9089580000145},{"questionId":"q35","format":"toon","model":"claude-haiku-4-5-20251001","expected":"11","actual":"11","isCorrect":true,"inputTokens":3075,"outputTokens":5,"latencyMs":1161.8742499999935},{"questionId":"q35","format":"csv","model":"claude-haiku-4-5-20251001","expected":"11","actual":"11","isCorrect":true,"inputTokens":2921,"outputTokens":5,"latencyMs":1073.0098749999888},{"questionId":"q35","format":"xml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"12","isCorrect":false,"inputTokens":9420,"outputTokens":5,"latencyMs":2201.6162919999915},{"questionId":"q35","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"11","isCorrect":true,"inputTokens":5826,"outputTokens":5,"latencyMs":1558.6932920000108},{"questionId":"q36","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":13071,"outputTokens":7,"latencyMs":1283.411334000004},{"questionId":"q36","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":8032,"outputTokens":7,"latencyMs":1105.1879999999946},{"questionId":"q36","format":"toon","model":"claude-haiku-4-5-20251001","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":8349,"outputTokens":7,"latencyMs":1409.0812499999884},{"questionId":"q36","format":"xml","model":"claude-haiku-4-5-20251001","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":14576,"outputTokens":7,"latencyMs":1401.9225830000069},{"questionId":"q36","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":9474,"outputTokens":7,"latencyMs":1155.835207999975},{"questionId":"q37","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":13071,"outputTokens":4,"latencyMs":1765.247250000015},{"questionId":"q37","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":8032,"outputTokens":4,"latencyMs":1487.960958999989},{"questionId":"q37","format":"toon","model":"claude-haiku-4-5-20251001","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":8349,"outputTokens":4,"latencyMs":1007.5285840000142},{"questionId":"q37","format":"xml","model":"claude-haiku-4-5-20251001","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":14576,"outputTokens":4,"latencyMs":1259.9838750000054},{"questionId":"q37","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":9474,"outputTokens":4,"latencyMs":1105.1101250000065},{"questionId":"q38","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":13071,"outputTokens":7,"latencyMs":1630.5366249999788},{"questionId":"q38","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":8032,"outputTokens":7,"latencyMs":1085.6638749999984},{"questionId":"q38","format":"toon","model":"claude-haiku-4-5-20251001","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":8349,"outputTokens":7,"latencyMs":1176.3057919999992},{"questionId":"q38","format":"xml","model":"claude-haiku-4-5-20251001","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":14576,"outputTokens":7,"latencyMs":1291.9421250000014},{"questionId":"q38","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":9474,"outputTokens":7,"latencyMs":1621.5995840000105},{"questionId":"q39","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":13071,"outputTokens":4,"latencyMs":1089.3430830000143},{"questionId":"q39","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":8032,"outputTokens":4,"latencyMs":1120.2911670000176},{"questionId":"q39","format":"toon","model":"claude-haiku-4-5-20251001","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":8349,"outputTokens":4,"latencyMs":1409.0022079999908},{"questionId":"q39","format":"xml","model":"claude-haiku-4-5-20251001","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":14576,"outputTokens":4,"latencyMs":1679.7288340000086},{"questionId":"q39","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":9474,"outputTokens":4,"latencyMs":1494.9637079999957},{"questionId":"q40","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":13071,"outputTokens":8,"latencyMs":1279.6186250000028},{"questionId":"q40","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":8032,"outputTokens":8,"latencyMs":1300.0924999999988},{"questionId":"q40","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":8349,"outputTokens":8,"latencyMs":1365.5876669999852},{"questionId":"q40","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":14576,"outputTokens":8,"latencyMs":1800.0563749999856},{"questionId":"q40","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":9474,"outputTokens":8,"latencyMs":1211.8039999999746},{"questionId":"q41","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":13071,"outputTokens":4,"latencyMs":1131.8700000000244},{"questionId":"q41","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":8032,"outputTokens":4,"latencyMs":1012.6772080000082},{"questionId":"q41","format":"toon","model":"claude-haiku-4-5-20251001","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":8349,"outputTokens":4,"latencyMs":1297.319542000012},{"questionId":"q41","format":"xml","model":"claude-haiku-4-5-20251001","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":14576,"outputTokens":4,"latencyMs":1182.1698329999927},{"questionId":"q41","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":9474,"outputTokens":4,"latencyMs":1186.7259579999954},{"questionId":"q42","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":13071,"outputTokens":8,"latencyMs":1212.6475830000127},{"questionId":"q42","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":8032,"outputTokens":8,"latencyMs":1013.4181670000253},{"questionId":"q42","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":8349,"outputTokens":8,"latencyMs":1341.400834},{"questionId":"q42","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":14576,"outputTokens":8,"latencyMs":1428.899792000011},{"questionId":"q42","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":9474,"outputTokens":8,"latencyMs":1146.0207080000255},{"questionId":"q43","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":13071,"outputTokens":4,"latencyMs":1413.8295410000137},{"questionId":"q43","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8032,"outputTokens":4,"latencyMs":1371.2625839999819},{"questionId":"q43","format":"toon","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8349,"outputTokens":4,"latencyMs":1177.8167500000272},{"questionId":"q43","format":"xml","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":14576,"outputTokens":4,"latencyMs":1314.6559169999964},{"questionId":"q43","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":9474,"outputTokens":4,"latencyMs":1084.5133339999884},{"questionId":"q44","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":13072,"outputTokens":11,"latencyMs":1845.7391250000219},{"questionId":"q44","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":8033,"outputTokens":11,"latencyMs":1215.7845419999794},{"questionId":"q44","format":"toon","model":"claude-haiku-4-5-20251001","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":8350,"outputTokens":11,"latencyMs":1371.353415999969},{"questionId":"q44","format":"xml","model":"claude-haiku-4-5-20251001","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":14577,"outputTokens":11,"latencyMs":1405.688749999972},{"questionId":"q44","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":9475,"outputTokens":11,"latencyMs":1195.1462080000201},{"questionId":"q45","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":13072,"outputTokens":16,"latencyMs":1444.1725420000148},{"questionId":"q45","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":8033,"outputTokens":16,"latencyMs":1851.9470420000143},{"questionId":"q45","format":"toon","model":"claude-haiku-4-5-20251001","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":8350,"outputTokens":16,"latencyMs":1421.1135419999482},{"questionId":"q45","format":"xml","model":"claude-haiku-4-5-20251001","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":14577,"outputTokens":16,"latencyMs":2023.5860419999808},{"questionId":"q45","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":9475,"outputTokens":16,"latencyMs":1497.3693329999805},{"questionId":"q46","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":13072,"outputTokens":10,"latencyMs":1659.9631250000093},{"questionId":"q46","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":8033,"outputTokens":10,"latencyMs":918.8577919999952},{"questionId":"q46","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":8350,"outputTokens":10,"latencyMs":1379.7277499999618},{"questionId":"q46","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":14577,"outputTokens":10,"latencyMs":1363.199666999979},{"questionId":"q46","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":9475,"outputTokens":10,"latencyMs":1301.9265000000014},{"questionId":"q47","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":13071,"outputTokens":5,"latencyMs":1366.802708000003},{"questionId":"q47","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":8032,"outputTokens":5,"latencyMs":1218.0884169999626},{"questionId":"q47","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":8349,"outputTokens":5,"latencyMs":1183.3402910000295},{"questionId":"q47","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":14576,"outputTokens":5,"latencyMs":1063.356374999974},{"questionId":"q47","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":9474,"outputTokens":5,"latencyMs":1065.5065830000094},{"questionId":"q48","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":13072,"outputTokens":9,"latencyMs":1751.0218330000062},{"questionId":"q48","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":8033,"outputTokens":9,"latencyMs":1108.9815420000232},{"questionId":"q48","format":"toon","model":"claude-haiku-4-5-20251001","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":8350,"outputTokens":9,"latencyMs":1787.948665999982},{"questionId":"q48","format":"xml","model":"claude-haiku-4-5-20251001","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":14577,"outputTokens":9,"latencyMs":1295.9337500000256},{"questionId":"q48","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":9475,"outputTokens":9,"latencyMs":1736.0960830000113},{"questionId":"q49","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":13072,"outputTokens":13,"latencyMs":1199.4784590000054},{"questionId":"q49","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":8033,"outputTokens":13,"latencyMs":1075.0446249999804},{"questionId":"q49","format":"toon","model":"claude-haiku-4-5-20251001","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":8350,"outputTokens":13,"latencyMs":1981.9005830000388},{"questionId":"q49","format":"xml","model":"claude-haiku-4-5-20251001","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":14577,"outputTokens":13,"latencyMs":1281.1696249999804},{"questionId":"q49","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":9475,"outputTokens":13,"latencyMs":1370.431249999965},{"questionId":"q50","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":13072,"outputTokens":10,"latencyMs":1113.5716249999823},{"questionId":"q50","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":8033,"outputTokens":10,"latencyMs":1239.1278750000056},{"questionId":"q50","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":8350,"outputTokens":10,"latencyMs":1279.7286249999888},{"questionId":"q50","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":14577,"outputTokens":10,"latencyMs":1407.9125830000266},{"questionId":"q50","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":9475,"outputTokens":10,"latencyMs":1706.669125000015},{"questionId":"q51","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2","actual":"6","isCorrect":false,"inputTokens":13071,"outputTokens":5,"latencyMs":1270.9225829999777},{"questionId":"q51","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2","actual":"6","isCorrect":false,"inputTokens":8032,"outputTokens":5,"latencyMs":1130.0672090000007},{"questionId":"q51","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":8349,"outputTokens":5,"latencyMs":1345.5521249999874},{"questionId":"q51","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"6","isCorrect":false,"inputTokens":14576,"outputTokens":5,"latencyMs":1226.1289170000236},{"questionId":"q51","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"6","isCorrect":false,"inputTokens":9474,"outputTokens":5,"latencyMs":1119.2856669999892},{"questionId":"q52","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"10","actual":"8","isCorrect":false,"inputTokens":13067,"outputTokens":5,"latencyMs":1196.8826250000275},{"questionId":"q52","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"10","actual":"8","isCorrect":false,"inputTokens":8028,"outputTokens":5,"latencyMs":1126.873499999987},{"questionId":"q52","format":"toon","model":"claude-haiku-4-5-20251001","expected":"10","actual":"7","isCorrect":false,"inputTokens":8345,"outputTokens":5,"latencyMs":980.0860000000102},{"questionId":"q52","format":"xml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"9","isCorrect":false,"inputTokens":14572,"outputTokens":5,"latencyMs":1064.6709169999813},{"questionId":"q52","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"9","isCorrect":false,"inputTokens":9470,"outputTokens":5,"latencyMs":1166.8697909999755},{"questionId":"q53","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"10","actual":"8","isCorrect":false,"inputTokens":13067,"outputTokens":5,"latencyMs":1113.0990410000086},{"questionId":"q53","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"10","actual":"8","isCorrect":false,"inputTokens":8028,"outputTokens":5,"latencyMs":1303.7009589999798},{"questionId":"q53","format":"toon","model":"claude-haiku-4-5-20251001","expected":"10","actual":"7","isCorrect":false,"inputTokens":8345,"outputTokens":5,"latencyMs":1238.1067499999772},{"questionId":"q53","format":"xml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"7","isCorrect":false,"inputTokens":14572,"outputTokens":5,"latencyMs":1333.5686250000144},{"questionId":"q53","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"6","isCorrect":false,"inputTokens":9470,"outputTokens":5,"latencyMs":1225.3654999999562},{"questionId":"q54","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"10","actual":"8","isCorrect":false,"inputTokens":13067,"outputTokens":5,"latencyMs":1399.3465410000063},{"questionId":"q54","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"10","actual":"7","isCorrect":false,"inputTokens":8028,"outputTokens":5,"latencyMs":1113.1435410000267},{"questionId":"q54","format":"toon","model":"claude-haiku-4-5-20251001","expected":"10","actual":"6","isCorrect":false,"inputTokens":8345,"outputTokens":5,"latencyMs":1724.1194590000086},{"questionId":"q54","format":"xml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"7","isCorrect":false,"inputTokens":14572,"outputTokens":5,"latencyMs":1181.9311669999734},{"questionId":"q54","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"9","isCorrect":false,"inputTokens":9470,"outputTokens":5,"latencyMs":1049.9266670000507},{"questionId":"q55","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"34904.81","actual":"42861.75","isCorrect":false,"inputTokens":13067,"outputTokens":8,"latencyMs":1439.9581250000047},{"questionId":"q55","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"34904.81","actual":"40558.41","isCorrect":false,"inputTokens":8028,"outputTokens":8,"latencyMs":1148.0986250000424},{"questionId":"q55","format":"toon","model":"claude-haiku-4-5-20251001","expected":"34904.81","actual":"47834.79","isCorrect":false,"inputTokens":8345,"outputTokens":8,"latencyMs":1271.2527080000145},{"questionId":"q55","format":"xml","model":"claude-haiku-4-5-20251001","expected":"34904.81","actual":"38565.28","isCorrect":false,"inputTokens":14572,"outputTokens":8,"latencyMs":1461.959582999989},{"questionId":"q55","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"34904.81","actual":"41847.94","isCorrect":false,"inputTokens":9470,"outputTokens":8,"latencyMs":1480.8175409999676},{"questionId":"q56","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"698.10","actual":"756.89","isCorrect":false,"inputTokens":13065,"outputTokens":7,"latencyMs":1130.103833000001},{"questionId":"q56","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"698.10","actual":"822.94","isCorrect":false,"inputTokens":8026,"outputTokens":7,"latencyMs":1252.784875000012},{"questionId":"q56","format":"toon","model":"claude-haiku-4-5-20251001","expected":"698.10","actual":"791.84","isCorrect":false,"inputTokens":8343,"outputTokens":7,"latencyMs":1113.3430420000223},{"questionId":"q56","format":"xml","model":"claude-haiku-4-5-20251001","expected":"698.10","actual":"766.89","isCorrect":false,"inputTokens":14570,"outputTokens":7,"latencyMs":2177.307124999992},{"questionId":"q56","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"698.10","actual":"779.77","isCorrect":false,"inputTokens":9468,"outputTokens":7,"latencyMs":1105.3537500000093},{"questionId":"q57","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":13066,"outputTokens":5,"latencyMs":1392.8512499999488},{"questionId":"q57","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":8027,"outputTokens":5,"latencyMs":1126.0595420000027},{"questionId":"q57","format":"toon","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":8344,"outputTokens":5,"latencyMs":1194.2017089999863},{"questionId":"q57","format":"xml","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":14571,"outputTokens":5,"latencyMs":1171.6571669999976},{"questionId":"q57","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":9469,"outputTokens":5,"latencyMs":1206.3512079999782},{"questionId":"q58","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":13065,"outputTokens":8,"latencyMs":1393.6799589999719},{"questionId":"q58","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":8026,"outputTokens":8,"latencyMs":1519.2332499999902},{"questionId":"q58","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":8343,"outputTokens":8,"latencyMs":2102.0287920000264},{"questionId":"q58","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":14570,"outputTokens":8,"latencyMs":1612.3956250000047},{"questionId":"q58","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":9468,"outputTokens":8,"latencyMs":1793.879041999986},{"questionId":"q59","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"43","actual":"49","isCorrect":false,"inputTokens":13069,"outputTokens":5,"latencyMs":1420.1437499999884},{"questionId":"q59","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"43","actual":"47","isCorrect":false,"inputTokens":8030,"outputTokens":5,"latencyMs":1007.740334000031},{"questionId":"q59","format":"toon","model":"claude-haiku-4-5-20251001","expected":"43","actual":"46","isCorrect":false,"inputTokens":8347,"outputTokens":5,"latencyMs":1120.1873340000166},{"questionId":"q59","format":"xml","model":"claude-haiku-4-5-20251001","expected":"43","actual":"47","isCorrect":false,"inputTokens":14574,"outputTokens":5,"latencyMs":1570.4214159999974},{"questionId":"q59","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"43","actual":"49","isCorrect":false,"inputTokens":9472,"outputTokens":5,"latencyMs":1261.9378329999745},{"questionId":"q60","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"37","actual":"41","isCorrect":false,"inputTokens":13069,"outputTokens":5,"latencyMs":1163.1469999999972},{"questionId":"q60","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"37","actual":"35","isCorrect":false,"inputTokens":8030,"outputTokens":5,"latencyMs":1140.09945899999},{"questionId":"q60","format":"toon","model":"claude-haiku-4-5-20251001","expected":"37","actual":"38","isCorrect":false,"inputTokens":8347,"outputTokens":5,"latencyMs":1130.5380000000005},{"questionId":"q60","format":"xml","model":"claude-haiku-4-5-20251001","expected":"37","actual":"38","isCorrect":false,"inputTokens":14574,"outputTokens":5,"latencyMs":1129.3633750000154},{"questionId":"q60","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"37","actual":"36","isCorrect":false,"inputTokens":9472,"outputTokens":5,"latencyMs":1117.113416999986},{"questionId":"q61","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"28","actual":"31","isCorrect":false,"inputTokens":13069,"outputTokens":5,"latencyMs":1169.0952919999836},{"questionId":"q61","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"28","actual":"34","isCorrect":false,"inputTokens":8030,"outputTokens":5,"latencyMs":996.319291999971},{"questionId":"q61","format":"toon","model":"claude-haiku-4-5-20251001","expected":"28","actual":"32","isCorrect":false,"inputTokens":8347,"outputTokens":5,"latencyMs":1430.8997499999823},{"questionId":"q61","format":"xml","model":"claude-haiku-4-5-20251001","expected":"28","actual":"31","isCorrect":false,"inputTokens":14574,"outputTokens":5,"latencyMs":1613.020166000002},{"questionId":"q61","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"28","actual":"34","isCorrect":false,"inputTokens":9472,"outputTokens":5,"latencyMs":1436.6957920000423},{"questionId":"q62","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"8","actual":"8","isCorrect":true,"inputTokens":13073,"outputTokens":5,"latencyMs":1080.898833000043},{"questionId":"q62","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"8","actual":"7","isCorrect":false,"inputTokens":8034,"outputTokens":5,"latencyMs":1029.815249999985},{"questionId":"q62","format":"toon","model":"claude-haiku-4-5-20251001","expected":"8","actual":"7","isCorrect":false,"inputTokens":8351,"outputTokens":5,"latencyMs":1269.5112920000101},{"questionId":"q62","format":"xml","model":"claude-haiku-4-5-20251001","expected":"8","actual":"7","isCorrect":false,"inputTokens":14578,"outputTokens":5,"latencyMs":2015.9492079999764},{"questionId":"q62","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"8","actual":"8","isCorrect":true,"inputTokens":9476,"outputTokens":5,"latencyMs":1004.2100409999839},{"questionId":"q63","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"6","actual":"8","isCorrect":false,"inputTokens":13073,"outputTokens":5,"latencyMs":1069.4494169999962},{"questionId":"q63","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":8034,"outputTokens":5,"latencyMs":1287.5518329999759},{"questionId":"q63","format":"toon","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":8351,"outputTokens":5,"latencyMs":1275.4342079999624},{"questionId":"q63","format":"xml","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":14578,"outputTokens":5,"latencyMs":1156.013666999992},{"questionId":"q63","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"6","actual":"7","isCorrect":false,"inputTokens":9476,"outputTokens":5,"latencyMs":1259.9707500000368},{"questionId":"q64","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"10","actual":"7","isCorrect":false,"inputTokens":13073,"outputTokens":5,"latencyMs":1450.3827080000192},{"questionId":"q64","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"10","actual":"6","isCorrect":false,"inputTokens":8034,"outputTokens":5,"latencyMs":1151.1852920000092},{"questionId":"q64","format":"toon","model":"claude-haiku-4-5-20251001","expected":"10","actual":"6","isCorrect":false,"inputTokens":8351,"outputTokens":5,"latencyMs":1510.7578340000473},{"questionId":"q64","format":"xml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"6","isCorrect":false,"inputTokens":14578,"outputTokens":5,"latencyMs":1430.823583999998},{"questionId":"q64","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"10","actual":"7","isCorrect":false,"inputTokens":9476,"outputTokens":5,"latencyMs":1582.9399579999736},{"questionId":"q65","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"9","actual":"10","isCorrect":false,"inputTokens":13073,"outputTokens":5,"latencyMs":1475.282125000027},{"questionId":"q65","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"9","actual":"7","isCorrect":false,"inputTokens":8034,"outputTokens":5,"latencyMs":1132.6659169999766},{"questionId":"q65","format":"toon","model":"claude-haiku-4-5-20251001","expected":"9","actual":"8","isCorrect":false,"inputTokens":8351,"outputTokens":5,"latencyMs":1348.3977080000332},{"questionId":"q65","format":"xml","model":"claude-haiku-4-5-20251001","expected":"9","actual":"7","isCorrect":false,"inputTokens":14578,"outputTokens":5,"latencyMs":1487.2882499999832},{"questionId":"q65","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"9","actual":"8","isCorrect":false,"inputTokens":9476,"outputTokens":5,"latencyMs":1363.7185419999878},{"questionId":"q66","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3","actual":"4","isCorrect":false,"inputTokens":13074,"outputTokens":5,"latencyMs":1189.3430410000146},{"questionId":"q66","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":8035,"outputTokens":5,"latencyMs":1351.1734999999753},{"questionId":"q66","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3","actual":"4","isCorrect":false,"inputTokens":8352,"outputTokens":5,"latencyMs":1177.2700830000103},{"questionId":"q66","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"5","isCorrect":false,"inputTokens":14579,"outputTokens":5,"latencyMs":1242.0754999999772},{"questionId":"q66","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"4","isCorrect":false,"inputTokens":9477,"outputTokens":5,"latencyMs":1276.0302499999525},{"questionId":"q67","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3","actual":"5","isCorrect":false,"inputTokens":13074,"outputTokens":5,"latencyMs":1344.6504580000183},{"questionId":"q67","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3","actual":"5","isCorrect":false,"inputTokens":8035,"outputTokens":5,"latencyMs":1176.157292000018},{"questionId":"q67","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3","actual":"5","isCorrect":false,"inputTokens":8352,"outputTokens":5,"latencyMs":20701.426457999973},{"questionId":"q67","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"5","isCorrect":false,"inputTokens":14579,"outputTokens":5,"latencyMs":2367.8421249999665},{"questionId":"q67","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"6","isCorrect":false,"inputTokens":9477,"outputTokens":5,"latencyMs":1153.2113339999923},{"questionId":"q68","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":13074,"outputTokens":5,"latencyMs":1657.7776670000167},{"questionId":"q68","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5","actual":"4","isCorrect":false,"inputTokens":8035,"outputTokens":5,"latencyMs":1173.5819999999949},{"questionId":"q68","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":8352,"outputTokens":5,"latencyMs":1232.1174580000225},{"questionId":"q68","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":14579,"outputTokens":5,"latencyMs":1349.014124999987},{"questionId":"q68","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":9477,"outputTokens":5,"latencyMs":1086.6965840000194},{"questionId":"q69","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"20","actual":"23","isCorrect":false,"inputTokens":13077,"outputTokens":5,"latencyMs":1153.7664579999982},{"questionId":"q69","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"20","actual":"18","isCorrect":false,"inputTokens":8038,"outputTokens":5,"latencyMs":1132.6050000000396},{"questionId":"q69","format":"toon","model":"claude-haiku-4-5-20251001","expected":"20","actual":"23","isCorrect":false,"inputTokens":8355,"outputTokens":5,"latencyMs":1340.9702089999919},{"questionId":"q69","format":"xml","model":"claude-haiku-4-5-20251001","expected":"20","actual":"21","isCorrect":false,"inputTokens":14582,"outputTokens":5,"latencyMs":1389.9804579999764},{"questionId":"q69","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"20","actual":"23","isCorrect":false,"inputTokens":9480,"outputTokens":5,"latencyMs":1181.636083999998},{"questionId":"q70","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"19","actual":"15","isCorrect":false,"inputTokens":13077,"outputTokens":5,"latencyMs":1226.459166000015},{"questionId":"q70","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"19","actual":"23","isCorrect":false,"inputTokens":8038,"outputTokens":5,"latencyMs":1483.1681670000544},{"questionId":"q70","format":"toon","model":"claude-haiku-4-5-20251001","expected":"19","actual":"18","isCorrect":false,"inputTokens":8355,"outputTokens":5,"latencyMs":1059.183416999993},{"questionId":"q70","format":"xml","model":"claude-haiku-4-5-20251001","expected":"19","actual":"19","isCorrect":true,"inputTokens":14582,"outputTokens":5,"latencyMs":1345.0946669999976},{"questionId":"q70","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"19","actual":"24","isCorrect":false,"inputTokens":9480,"outputTokens":5,"latencyMs":1219.1354169999831},{"questionId":"q71","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":4146,"outputTokens":6,"latencyMs":1277.316541999986},{"questionId":"q71","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":2457,"outputTokens":6,"latencyMs":2851.0970419999794},{"questionId":"q71","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":1603,"outputTokens":6,"latencyMs":1085.2489160000114},{"questionId":"q71","format":"csv","model":"claude-haiku-4-5-20251001","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":1511,"outputTokens":6,"latencyMs":1086.2770000000019},{"questionId":"q71","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":4848,"outputTokens":6,"latencyMs":1117.6914580000448},{"questionId":"q71","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":3177,"outputTokens":6,"latencyMs":1230.9872919999762},{"questionId":"q72","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":4146,"outputTokens":8,"latencyMs":1094.448000000033},{"questionId":"q72","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":2457,"outputTokens":8,"latencyMs":1426.7915410000132},{"questionId":"q72","format":"toon","model":"claude-haiku-4-5-20251001","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":1603,"outputTokens":8,"latencyMs":900.4749999999767},{"questionId":"q72","format":"csv","model":"claude-haiku-4-5-20251001","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":1511,"outputTokens":8,"latencyMs":1126.0100419999799},{"questionId":"q72","format":"xml","model":"claude-haiku-4-5-20251001","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":4848,"outputTokens":8,"latencyMs":1176.668249999988},{"questionId":"q72","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":3177,"outputTokens":8,"latencyMs":999.8163330000243},{"questionId":"q73","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":4147,"outputTokens":7,"latencyMs":1333.652624999988},{"questionId":"q73","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":2458,"outputTokens":7,"latencyMs":1092.060541999992},{"questionId":"q73","format":"toon","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":1604,"outputTokens":7,"latencyMs":1007.1116670000483},{"questionId":"q73","format":"csv","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":1512,"outputTokens":7,"latencyMs":1013.337332999974},{"questionId":"q73","format":"xml","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":4849,"outputTokens":7,"latencyMs":1650.107040999981},{"questionId":"q73","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":3178,"outputTokens":7,"latencyMs":1439.3979579999577},{"questionId":"q74","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"32","actual":"32","isCorrect":true,"inputTokens":4148,"outputTokens":5,"latencyMs":1417.6148329999996},{"questionId":"q74","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"32","actual":"32","isCorrect":true,"inputTokens":2459,"outputTokens":5,"latencyMs":1242.6064170000027},{"questionId":"q74","format":"toon","model":"claude-haiku-4-5-20251001","expected":"32","actual":"32","isCorrect":true,"inputTokens":1605,"outputTokens":5,"latencyMs":1100.431958000001},{"questionId":"q74","format":"csv","model":"claude-haiku-4-5-20251001","expected":"32","actual":"32","isCorrect":true,"inputTokens":1513,"outputTokens":5,"latencyMs":986.0364159999881},{"questionId":"q74","format":"xml","model":"claude-haiku-4-5-20251001","expected":"32","actual":"32","isCorrect":true,"inputTokens":4850,"outputTokens":5,"latencyMs":1172.132042000012},{"questionId":"q74","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"32","actual":"32","isCorrect":true,"inputTokens":3179,"outputTokens":5,"latencyMs":1304.2830830000457},{"questionId":"q75","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":4146,"outputTokens":6,"latencyMs":1242.7824169999803},{"questionId":"q75","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":2457,"outputTokens":6,"latencyMs":1163.7357910000137},{"questionId":"q75","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":1603,"outputTokens":6,"latencyMs":1151.549875000026},{"questionId":"q75","format":"csv","model":"claude-haiku-4-5-20251001","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":1511,"outputTokens":6,"latencyMs":1063.787249999994},{"questionId":"q75","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":4848,"outputTokens":6,"latencyMs":1115.4328329999698},{"questionId":"q75","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":3177,"outputTokens":6,"latencyMs":1137.960791999998},{"questionId":"q76","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":4146,"outputTokens":8,"latencyMs":1201.6026249999995},{"questionId":"q76","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":2457,"outputTokens":8,"latencyMs":1124.2640000000247},{"questionId":"q76","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":1603,"outputTokens":8,"latencyMs":927.5135000000009},{"questionId":"q76","format":"csv","model":"claude-haiku-4-5-20251001","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":1511,"outputTokens":8,"latencyMs":1195.1520000000019},{"questionId":"q76","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":4848,"outputTokens":8,"latencyMs":1325.3894999999902},{"questionId":"q76","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":3177,"outputTokens":8,"latencyMs":1116.2845840000082},{"questionId":"q77","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":4147,"outputTokens":7,"latencyMs":1075.8688749999856},{"questionId":"q77","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":2458,"outputTokens":7,"latencyMs":1041.189167000004},{"questionId":"q77","format":"toon","model":"claude-haiku-4-5-20251001","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":1604,"outputTokens":7,"latencyMs":861.4979169999715},{"questionId":"q77","format":"csv","model":"claude-haiku-4-5-20251001","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":1512,"outputTokens":7,"latencyMs":1134.7716669999645},{"questionId":"q77","format":"xml","model":"claude-haiku-4-5-20251001","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":4849,"outputTokens":7,"latencyMs":1177.7597500000265},{"questionId":"q77","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":3178,"outputTokens":7,"latencyMs":1119.7470000000321},{"questionId":"q78","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"25","actual":"25","isCorrect":true,"inputTokens":4148,"outputTokens":5,"latencyMs":996.1894999999786},{"questionId":"q78","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"25","actual":"25","isCorrect":true,"inputTokens":2459,"outputTokens":5,"latencyMs":1080.7052919999696},{"questionId":"q78","format":"toon","model":"claude-haiku-4-5-20251001","expected":"25","actual":"25","isCorrect":true,"inputTokens":1605,"outputTokens":5,"latencyMs":935.0501249999506},{"questionId":"q78","format":"csv","model":"claude-haiku-4-5-20251001","expected":"25","actual":"25","isCorrect":true,"inputTokens":1513,"outputTokens":5,"latencyMs":1056.5405419999734},{"questionId":"q78","format":"xml","model":"claude-haiku-4-5-20251001","expected":"25","actual":"25","isCorrect":true,"inputTokens":4850,"outputTokens":5,"latencyMs":1489.8931659999653},{"questionId":"q78","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"25","actual":"25","isCorrect":true,"inputTokens":3179,"outputTokens":5,"latencyMs":79487.06733300001},{"questionId":"q79","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":4146,"outputTokens":6,"latencyMs":1313.9647910000058},{"questionId":"q79","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":2457,"outputTokens":6,"latencyMs":1343.1313749999972},{"questionId":"q79","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":1603,"outputTokens":6,"latencyMs":1334.5621249999967},{"questionId":"q79","format":"csv","model":"claude-haiku-4-5-20251001","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":1511,"outputTokens":6,"latencyMs":1067.4220000000205},{"questionId":"q79","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":4848,"outputTokens":6,"latencyMs":1150.077583999955},{"questionId":"q79","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":3177,"outputTokens":6,"latencyMs":981.6363750000019},{"questionId":"q80","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":4143,"outputTokens":5,"latencyMs":1220.5045420000097},{"questionId":"q80","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":2454,"outputTokens":5,"latencyMs":1120.3278329999885},{"questionId":"q80","format":"toon","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":1600,"outputTokens":5,"latencyMs":1426.4953749999986},{"questionId":"q80","format":"csv","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":1508,"outputTokens":5,"latencyMs":941.414792000025},{"questionId":"q80","format":"xml","model":"claude-haiku-4-5-20251001","expected":"60","actual":"59","isCorrect":false,"inputTokens":4845,"outputTokens":5,"latencyMs":1227.888500000001},{"questionId":"q80","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":3174,"outputTokens":5,"latencyMs":1216.2199170000385},{"questionId":"q81","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"328320","actual":"367840","isCorrect":false,"inputTokens":4144,"outputTokens":6,"latencyMs":1537.3239579999936},{"questionId":"q81","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"328320","actual":"340858","isCorrect":false,"inputTokens":2455,"outputTokens":6,"latencyMs":1150.2108749999898},{"questionId":"q81","format":"toon","model":"claude-haiku-4-5-20251001","expected":"328320","actual":"326657","isCorrect":false,"inputTokens":1601,"outputTokens":6,"latencyMs":804.5737499999814},{"questionId":"q81","format":"csv","model":"claude-haiku-4-5-20251001","expected":"328320","actual":"370775","isCorrect":false,"inputTokens":1509,"outputTokens":6,"latencyMs":840.2609170000069},{"questionId":"q81","format":"xml","model":"claude-haiku-4-5-20251001","expected":"328320","actual":"341506","isCorrect":false,"inputTokens":4846,"outputTokens":6,"latencyMs":1314.695624999993},{"questionId":"q81","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"328320","actual":"176916","isCorrect":false,"inputTokens":3175,"outputTokens":6,"latencyMs":1555.4265829999931},{"questionId":"q82","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1791","actual":"1434","isCorrect":false,"inputTokens":4145,"outputTokens":6,"latencyMs":1159.8954170000507},{"questionId":"q82","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1791","actual":"1389","isCorrect":false,"inputTokens":2456,"outputTokens":6,"latencyMs":1061.4540839999681},{"questionId":"q82","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1791","actual":"1577","isCorrect":false,"inputTokens":1602,"outputTokens":6,"latencyMs":1123.5626250000205},{"questionId":"q82","format":"csv","model":"claude-haiku-4-5-20251001","expected":"1791","actual":"1357","isCorrect":false,"inputTokens":1510,"outputTokens":6,"latencyMs":992.4902909999946},{"questionId":"q82","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1791","actual":"1347","isCorrect":false,"inputTokens":4847,"outputTokens":6,"latencyMs":1159.3279999999795},{"questionId":"q82","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1791","actual":"1325","isCorrect":false,"inputTokens":3176,"outputTokens":6,"latencyMs":1181.7296659999993},{"questionId":"q83","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"311695.88","actual":"343045.86","isCorrect":false,"inputTokens":4142,"outputTokens":8,"latencyMs":1219.903667000006},{"questionId":"q83","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"311695.88","actual":"412256.91","isCorrect":false,"inputTokens":2453,"outputTokens":8,"latencyMs":1145.0830829999177},{"questionId":"q83","format":"toon","model":"claude-haiku-4-5-20251001","expected":"311695.88","actual":"343945.11","isCorrect":false,"inputTokens":1599,"outputTokens":8,"latencyMs":1223.68987500004},{"questionId":"q83","format":"csv","model":"claude-haiku-4-5-20251001","expected":"311695.88","actual":"390626.67","isCorrect":false,"inputTokens":1507,"outputTokens":8,"latencyMs":1040.1668749999953},{"questionId":"q83","format":"xml","model":"claude-haiku-4-5-20251001","expected":"311695.88","actual":"382889.24","isCorrect":false,"inputTokens":4844,"outputTokens":8,"latencyMs":1409.154291999992},{"questionId":"q83","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"311695.88","actual":"356170.34","isCorrect":false,"inputTokens":3173,"outputTokens":8,"latencyMs":956.8994170000078},{"questionId":"q84","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":4140,"outputTokens":7,"latencyMs":1256.6058330000378},{"questionId":"q84","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.537","isCorrect":false,"inputTokens":2451,"outputTokens":7,"latencyMs":1293.1294590000762},{"questionId":"q84","format":"toon","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.5263","isCorrect":true,"inputTokens":1597,"outputTokens":8,"latencyMs":854.5393750000512},{"questionId":"q84","format":"csv","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.5246","isCorrect":false,"inputTokens":1505,"outputTokens":8,"latencyMs":1119.7116249999963},{"questionId":"q84","format":"xml","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":4842,"outputTokens":7,"latencyMs":1132.9079159999965},{"questionId":"q84","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":3171,"outputTokens":7,"latencyMs":1521.2640829999},{"questionId":"q85","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"22","actual":"20","isCorrect":false,"inputTokens":4145,"outputTokens":5,"latencyMs":1171.5495420000516},{"questionId":"q85","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"22","actual":"24","isCorrect":false,"inputTokens":2456,"outputTokens":5,"latencyMs":1003.8035830000881},{"questionId":"q85","format":"toon","model":"claude-haiku-4-5-20251001","expected":"22","actual":"20","isCorrect":false,"inputTokens":1602,"outputTokens":5,"latencyMs":972.5325840000296},{"questionId":"q85","format":"csv","model":"claude-haiku-4-5-20251001","expected":"22","actual":"18","isCorrect":false,"inputTokens":1510,"outputTokens":5,"latencyMs":905.6272500000196},{"questionId":"q85","format":"xml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"20","isCorrect":false,"inputTokens":4847,"outputTokens":5,"latencyMs":1082.7218340000836},{"questionId":"q85","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"20","isCorrect":false,"inputTokens":3176,"outputTokens":5,"latencyMs":997.8206250000512},{"questionId":"q86","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"42","actual":"25","isCorrect":false,"inputTokens":4145,"outputTokens":5,"latencyMs":1060.1338329999708},{"questionId":"q86","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"42","actual":"33","isCorrect":false,"inputTokens":2456,"outputTokens":5,"latencyMs":1412.0989999999292},{"questionId":"q86","format":"toon","model":"claude-haiku-4-5-20251001","expected":"42","actual":"42","isCorrect":true,"inputTokens":1602,"outputTokens":5,"latencyMs":980.2765409999993},{"questionId":"q86","format":"csv","model":"claude-haiku-4-5-20251001","expected":"42","actual":"41","isCorrect":false,"inputTokens":1510,"outputTokens":5,"latencyMs":943.2066659999546},{"questionId":"q86","format":"xml","model":"claude-haiku-4-5-20251001","expected":"42","actual":"24","isCorrect":false,"inputTokens":4847,"outputTokens":5,"latencyMs":1598.437624999904},{"questionId":"q86","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"42","actual":"31","isCorrect":false,"inputTokens":3176,"outputTokens":5,"latencyMs":939.6132920000236},{"questionId":"q87","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"20","actual":"15","isCorrect":false,"inputTokens":4153,"outputTokens":5,"latencyMs":1164.4256670000032},{"questionId":"q87","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"20","actual":"18","isCorrect":false,"inputTokens":2464,"outputTokens":5,"latencyMs":1529.9007079999428},{"questionId":"q87","format":"toon","model":"claude-haiku-4-5-20251001","expected":"20","actual":"16","isCorrect":false,"inputTokens":1610,"outputTokens":5,"latencyMs":1043.3165000000736},{"questionId":"q87","format":"csv","model":"claude-haiku-4-5-20251001","expected":"20","actual":"17","isCorrect":false,"inputTokens":1518,"outputTokens":5,"latencyMs":1014.4737080000341},{"questionId":"q87","format":"xml","model":"claude-haiku-4-5-20251001","expected":"20","actual":"15","isCorrect":false,"inputTokens":4855,"outputTokens":5,"latencyMs":1299.7567919999128},{"questionId":"q87","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"20","actual":"20","isCorrect":true,"inputTokens":3184,"outputTokens":5,"latencyMs":1036.0580000000773},{"questionId":"q88","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"14","actual":"11","isCorrect":false,"inputTokens":4153,"outputTokens":5,"latencyMs":1351.0695419999538},{"questionId":"q88","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"14","actual":"13","isCorrect":false,"inputTokens":2464,"outputTokens":5,"latencyMs":1451.123499999987},{"questionId":"q88","format":"toon","model":"claude-haiku-4-5-20251001","expected":"14","actual":"11","isCorrect":false,"inputTokens":1610,"outputTokens":5,"latencyMs":1011.8816250000382},{"questionId":"q88","format":"csv","model":"claude-haiku-4-5-20251001","expected":"14","actual":"9","isCorrect":false,"inputTokens":1518,"outputTokens":5,"latencyMs":1116.2810419999296},{"questionId":"q88","format":"xml","model":"claude-haiku-4-5-20251001","expected":"14","actual":"9","isCorrect":false,"inputTokens":4855,"outputTokens":5,"latencyMs":1202.6905839999672},{"questionId":"q88","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"14","actual":"10","isCorrect":false,"inputTokens":3184,"outputTokens":5,"latencyMs":1217.9919999999693},{"questionId":"q89","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"22","actual":"16","isCorrect":false,"inputTokens":4151,"outputTokens":5,"latencyMs":1086.266249999986},{"questionId":"q89","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":2462,"outputTokens":5,"latencyMs":1425.2647910000524},{"questionId":"q89","format":"toon","model":"claude-haiku-4-5-20251001","expected":"22","actual":"16","isCorrect":false,"inputTokens":1608,"outputTokens":5,"latencyMs":1165.0251670000143},{"questionId":"q89","format":"csv","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":1516,"outputTokens":5,"latencyMs":1302.6017080000602},{"questionId":"q89","format":"xml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":4853,"outputTokens":5,"latencyMs":1207.5639170000795},{"questionId":"q89","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"13","isCorrect":false,"inputTokens":3182,"outputTokens":5,"latencyMs":1003.2787090000929},{"questionId":"q90","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":4151,"outputTokens":5,"latencyMs":1314.3022080000956},{"questionId":"q90","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":2462,"outputTokens":5,"latencyMs":2278.123583000037},{"questionId":"q90","format":"toon","model":"claude-haiku-4-5-20251001","expected":"22","actual":"13","isCorrect":false,"inputTokens":1608,"outputTokens":5,"latencyMs":1040.3857919999864},{"questionId":"q90","format":"csv","model":"claude-haiku-4-5-20251001","expected":"22","actual":"16","isCorrect":false,"inputTokens":1516,"outputTokens":5,"latencyMs":1609.7861250001006},{"questionId":"q90","format":"xml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":4853,"outputTokens":5,"latencyMs":1177.3617499999236},{"questionId":"q90","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"13","isCorrect":false,"inputTokens":3182,"outputTokens":5,"latencyMs":1123.888500000001},{"questionId":"q91","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"20","actual":"13","isCorrect":false,"inputTokens":4151,"outputTokens":5,"latencyMs":2513.7377080000006},{"questionId":"q91","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"20","actual":"16","isCorrect":false,"inputTokens":2462,"outputTokens":5,"latencyMs":1089.2179999999935},{"questionId":"q91","format":"toon","model":"claude-haiku-4-5-20251001","expected":"20","actual":"16","isCorrect":false,"inputTokens":1608,"outputTokens":5,"latencyMs":1062.1664580000797},{"questionId":"q91","format":"csv","model":"claude-haiku-4-5-20251001","expected":"20","actual":"18","isCorrect":false,"inputTokens":1516,"outputTokens":5,"latencyMs":1236.656958000036},{"questionId":"q91","format":"xml","model":"claude-haiku-4-5-20251001","expected":"20","actual":"16","isCorrect":false,"inputTokens":4853,"outputTokens":5,"latencyMs":1146.2815420000115},{"questionId":"q91","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"20","actual":"14","isCorrect":false,"inputTokens":3182,"outputTokens":5,"latencyMs":1096.0875419999938},{"questionId":"q92","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"32","actual":"25","isCorrect":false,"inputTokens":4152,"outputTokens":5,"latencyMs":987.8946670000441},{"questionId":"q92","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"32","actual":"23","isCorrect":false,"inputTokens":2463,"outputTokens":5,"latencyMs":1220.6643329999642},{"questionId":"q92","format":"toon","model":"claude-haiku-4-5-20251001","expected":"32","actual":"32","isCorrect":true,"inputTokens":1609,"outputTokens":5,"latencyMs":937.5257920000004},{"questionId":"q92","format":"csv","model":"claude-haiku-4-5-20251001","expected":"32","actual":"24","isCorrect":false,"inputTokens":1517,"outputTokens":5,"latencyMs":2063.006832999992},{"questionId":"q92","format":"xml","model":"claude-haiku-4-5-20251001","expected":"32","actual":"26","isCorrect":false,"inputTokens":4854,"outputTokens":5,"latencyMs":1468.0255830000388},{"questionId":"q92","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"32","actual":"24","isCorrect":false,"inputTokens":3183,"outputTokens":5,"latencyMs":966.7985420000041},{"questionId":"q93","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"9","actual":"11","isCorrect":false,"inputTokens":4152,"outputTokens":5,"latencyMs":1278.894708000007},{"questionId":"q93","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"9","actual":"8","isCorrect":false,"inputTokens":2463,"outputTokens":5,"latencyMs":1107.823999999906},{"questionId":"q93","format":"toon","model":"claude-haiku-4-5-20251001","expected":"9","actual":"8","isCorrect":false,"inputTokens":1609,"outputTokens":5,"latencyMs":907.0621670000255},{"questionId":"q93","format":"csv","model":"claude-haiku-4-5-20251001","expected":"9","actual":"10","isCorrect":false,"inputTokens":1517,"outputTokens":5,"latencyMs":1304.2223330000415},{"questionId":"q93","format":"xml","model":"claude-haiku-4-5-20251001","expected":"9","actual":"10","isCorrect":false,"inputTokens":4854,"outputTokens":5,"latencyMs":1006.5030419999966},{"questionId":"q93","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"9","actual":"8","isCorrect":false,"inputTokens":3183,"outputTokens":5,"latencyMs":1042.183209000039},{"questionId":"q94","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":4153,"outputTokens":5,"latencyMs":1480.0997500000522},{"questionId":"q94","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":2464,"outputTokens":5,"latencyMs":1601.9094999999506},{"questionId":"q94","format":"toon","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":1610,"outputTokens":5,"latencyMs":1208.4925000000512},{"questionId":"q94","format":"csv","model":"claude-haiku-4-5-20251001","expected":"22","actual":"16","isCorrect":false,"inputTokens":1518,"outputTokens":5,"latencyMs":1313.0127909999574},{"questionId":"q94","format":"xml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"17","isCorrect":false,"inputTokens":4855,"outputTokens":5,"latencyMs":1156.5654589999467},{"questionId":"q94","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"16","isCorrect":false,"inputTokens":3184,"outputTokens":5,"latencyMs":1046.6653750000987},{"questionId":"q95","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"22","actual":"15","isCorrect":false,"inputTokens":4153,"outputTokens":5,"latencyMs":1009.8270000000484},{"questionId":"q95","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"22","actual":"16","isCorrect":false,"inputTokens":2464,"outputTokens":5,"latencyMs":1112.5791250000475},{"questionId":"q95","format":"toon","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":1610,"outputTokens":5,"latencyMs":1165.6955840000883},{"questionId":"q95","format":"csv","model":"claude-haiku-4-5-20251001","expected":"22","actual":"16","isCorrect":false,"inputTokens":1518,"outputTokens":5,"latencyMs":1050.5519169999752},{"questionId":"q95","format":"xml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":4855,"outputTokens":5,"latencyMs":1023.872166999965},{"questionId":"q95","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"22","actual":"14","isCorrect":false,"inputTokens":3184,"outputTokens":5,"latencyMs":1117.4546669999836},{"questionId":"q96","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":17482,"outputTokens":6,"latencyMs":1361.8696670000209},{"questionId":"q96","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":12617,"outputTokens":6,"latencyMs":1247.9789170000004},{"questionId":"q96","format":"toon","model":"claude-haiku-4-5-20251001","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":9380,"outputTokens":6,"latencyMs":1211.6023749999003},{"questionId":"q96","format":"csv","model":"claude-haiku-4-5-20251001","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":9198,"outputTokens":6,"latencyMs":1307.9147920000833},{"questionId":"q96","format":"xml","model":"claude-haiku-4-5-20251001","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":19872,"outputTokens":6,"latencyMs":1437.3064170000143},{"questionId":"q96","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":14557,"outputTokens":6,"latencyMs":1455.2815420000115},{"questionId":"q97","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":17481,"outputTokens":6,"latencyMs":1353.6013749999693},{"questionId":"q97","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":12616,"outputTokens":6,"latencyMs":1534.4137919999193},{"questionId":"q97","format":"toon","model":"claude-haiku-4-5-20251001","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":9379,"outputTokens":6,"latencyMs":2213.0383339999244},{"questionId":"q97","format":"csv","model":"claude-haiku-4-5-20251001","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":9197,"outputTokens":6,"latencyMs":1201.597165999934},{"questionId":"q97","format":"xml","model":"claude-haiku-4-5-20251001","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":19871,"outputTokens":6,"latencyMs":1513.969000000041},{"questionId":"q97","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":14556,"outputTokens":6,"latencyMs":1353.5847500000382},{"questionId":"q98","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":17476,"outputTokens":6,"latencyMs":1397.7247079999652},{"questionId":"q98","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":12611,"outputTokens":6,"latencyMs":1268.1517080001067},{"questionId":"q98","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":9374,"outputTokens":6,"latencyMs":1237.637166999979},{"questionId":"q98","format":"csv","model":"claude-haiku-4-5-20251001","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":9192,"outputTokens":6,"latencyMs":1559.2043330000015},{"questionId":"q98","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":19866,"outputTokens":6,"latencyMs":1554.237124999985},{"questionId":"q98","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":14551,"outputTokens":6,"latencyMs":4180.1094579999335},{"questionId":"q99","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"master","actual":"master","isCorrect":true,"inputTokens":17481,"outputTokens":4,"latencyMs":1550.9610000000102},{"questionId":"q99","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"master","actual":"master","isCorrect":true,"inputTokens":12616,"outputTokens":4,"latencyMs":1546.9043330000713},{"questionId":"q99","format":"toon","model":"claude-haiku-4-5-20251001","expected":"master","actual":"master","isCorrect":true,"inputTokens":9379,"outputTokens":4,"latencyMs":1625.3927080000285},{"questionId":"q99","format":"csv","model":"claude-haiku-4-5-20251001","expected":"master","actual":"master","isCorrect":true,"inputTokens":9197,"outputTokens":4,"latencyMs":1535.6835000000428},{"questionId":"q99","format":"xml","model":"claude-haiku-4-5-20251001","expected":"master","actual":"master","isCorrect":true,"inputTokens":19871,"outputTokens":4,"latencyMs":2670.4785830000183},{"questionId":"q99","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"master","actual":"master","isCorrect":true,"inputTokens":14556,"outputTokens":4,"latencyMs":1569.8371250000782},{"questionId":"q100","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":17476,"outputTokens":6,"latencyMs":1596.1594999999506},{"questionId":"q100","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":12611,"outputTokens":6,"latencyMs":1386.254958000034},{"questionId":"q100","format":"toon","model":"claude-haiku-4-5-20251001","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":9374,"outputTokens":6,"latencyMs":1336.2282079999568},{"questionId":"q100","format":"csv","model":"claude-haiku-4-5-20251001","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":9192,"outputTokens":6,"latencyMs":1287.5360420000507},{"questionId":"q100","format":"xml","model":"claude-haiku-4-5-20251001","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":19866,"outputTokens":6,"latencyMs":1648.8853339999914},{"questionId":"q100","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":14551,"outputTokens":6,"latencyMs":1202.9672089999076},{"questionId":"q101","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":17481,"outputTokens":6,"latencyMs":1287.3107910000253},{"questionId":"q101","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":12616,"outputTokens":6,"latencyMs":1320.3634160000365},{"questionId":"q101","format":"toon","model":"claude-haiku-4-5-20251001","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":9379,"outputTokens":6,"latencyMs":1191.4255419999827},{"questionId":"q101","format":"csv","model":"claude-haiku-4-5-20251001","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":9197,"outputTokens":6,"latencyMs":1206.390000000014},{"questionId":"q101","format":"xml","model":"claude-haiku-4-5-20251001","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":19871,"outputTokens":6,"latencyMs":1499.8067499999888},{"questionId":"q101","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":14556,"outputTokens":6,"latencyMs":1127.3515840000473},{"questionId":"q102","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"678","actual":"678","isCorrect":true,"inputTokens":17483,"outputTokens":5,"latencyMs":1328.8301249999786},{"questionId":"q102","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"678","actual":"678","isCorrect":true,"inputTokens":12618,"outputTokens":5,"latencyMs":1138.0201249999227},{"questionId":"q102","format":"toon","model":"claude-haiku-4-5-20251001","expected":"678","actual":"678","isCorrect":true,"inputTokens":9381,"outputTokens":5,"latencyMs":980.3800830000546},{"questionId":"q102","format":"csv","model":"claude-haiku-4-5-20251001","expected":"678","actual":"678","isCorrect":true,"inputTokens":9199,"outputTokens":5,"latencyMs":1157.8958750000456},{"questionId":"q102","format":"xml","model":"claude-haiku-4-5-20251001","expected":"678","actual":"678","isCorrect":true,"inputTokens":19873,"outputTokens":5,"latencyMs":1360.5095420000143},{"questionId":"q102","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"678","actual":"678","isCorrect":true,"inputTokens":14558,"outputTokens":5,"latencyMs":1273.2398329999996},{"questionId":"q103","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"main","actual":"main","isCorrect":true,"inputTokens":17477,"outputTokens":4,"latencyMs":1274.5509169999277},{"questionId":"q103","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"main","actual":"main","isCorrect":true,"inputTokens":12612,"outputTokens":4,"latencyMs":1256.6803749999963},{"questionId":"q103","format":"toon","model":"claude-haiku-4-5-20251001","expected":"main","actual":"main","isCorrect":true,"inputTokens":9375,"outputTokens":4,"latencyMs":1433.376416000072},{"questionId":"q103","format":"csv","model":"claude-haiku-4-5-20251001","expected":"main","actual":"main","isCorrect":true,"inputTokens":9193,"outputTokens":4,"latencyMs":1417.8876250000903},{"questionId":"q103","format":"xml","model":"claude-haiku-4-5-20251001","expected":"main","actual":"main","isCorrect":true,"inputTokens":19867,"outputTokens":4,"latencyMs":1639.6358750000363},{"questionId":"q103","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"main","actual":"main","isCorrect":true,"inputTokens":14552,"outputTokens":4,"latencyMs":1522.2459160001017},{"questionId":"q104","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":17484,"outputTokens":6,"latencyMs":1278.353458999889},{"questionId":"q104","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":12619,"outputTokens":6,"latencyMs":1228.4452499999898},{"questionId":"q104","format":"toon","model":"claude-haiku-4-5-20251001","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":9382,"outputTokens":6,"latencyMs":1285.1270830000285},{"questionId":"q104","format":"csv","model":"claude-haiku-4-5-20251001","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":9200,"outputTokens":6,"latencyMs":1185.8712910000468},{"questionId":"q104","format":"xml","model":"claude-haiku-4-5-20251001","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":19874,"outputTokens":6,"latencyMs":1733.4342500000494},{"questionId":"q104","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":14559,"outputTokens":6,"latencyMs":1182.3504170000087},{"questionId":"q105","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":17480,"outputTokens":6,"latencyMs":1328.0590000000084},{"questionId":"q105","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":12615,"outputTokens":6,"latencyMs":1337.2227919999277},{"questionId":"q105","format":"toon","model":"claude-haiku-4-5-20251001","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":9378,"outputTokens":6,"latencyMs":1245.6462499999907},{"questionId":"q105","format":"csv","model":"claude-haiku-4-5-20251001","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":9196,"outputTokens":6,"latencyMs":1152.3198330000741},{"questionId":"q105","format":"xml","model":"claude-haiku-4-5-20251001","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":19870,"outputTokens":6,"latencyMs":1417.1319579998963},{"questionId":"q105","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":14555,"outputTokens":6,"latencyMs":1587.597666000016},{"questionId":"q106","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":17485,"outputTokens":6,"latencyMs":1286.3247500000289},{"questionId":"q106","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":12620,"outputTokens":6,"latencyMs":1243.2615000000224},{"questionId":"q106","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":9383,"outputTokens":6,"latencyMs":1291.9809159999713},{"questionId":"q106","format":"csv","model":"claude-haiku-4-5-20251001","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":9201,"outputTokens":6,"latencyMs":1398.1902080000145},{"questionId":"q106","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":19875,"outputTokens":6,"latencyMs":1624.34620800009},{"questionId":"q106","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":14560,"outputTokens":6,"latencyMs":1721.1688750000903},{"questionId":"q107","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":17473,"outputTokens":5,"latencyMs":1369.6887080000015},{"questionId":"q107","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":12608,"outputTokens":5,"latencyMs":1290.1797500000102},{"questionId":"q107","format":"toon","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":9371,"outputTokens":5,"latencyMs":1811.6780829998897},{"questionId":"q107","format":"csv","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":9189,"outputTokens":5,"latencyMs":1179.5881659999723},{"questionId":"q107","format":"xml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":19863,"outputTokens":5,"latencyMs":2173.616832999978},{"questionId":"q107","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":14548,"outputTokens":5,"latencyMs":1352.0613330000779},{"questionId":"q108","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"15413563","actual":"11475937","isCorrect":false,"inputTokens":17476,"outputTokens":7,"latencyMs":2116.313165999949},{"questionId":"q108","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"15413563","actual":"13947885","isCorrect":false,"inputTokens":12611,"outputTokens":7,"latencyMs":1385.7955830000574},{"questionId":"q108","format":"toon","model":"claude-haiku-4-5-20251001","expected":"15413563","actual":"17348748","isCorrect":false,"inputTokens":9374,"outputTokens":7,"latencyMs":1142.456750000012},{"questionId":"q108","format":"csv","model":"claude-haiku-4-5-20251001","expected":"15413563","actual":"10737833","isCorrect":false,"inputTokens":9192,"outputTokens":7,"latencyMs":1214.3351249999832},{"questionId":"q108","format":"xml","model":"claude-haiku-4-5-20251001","expected":"15413563","actual":"10524881","isCorrect":false,"inputTokens":19866,"outputTokens":7,"latencyMs":1302.2744999999413},{"questionId":"q108","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"15413563","actual":"10959626","isCorrect":false,"inputTokens":14551,"outputTokens":7,"latencyMs":1313.938542000018},{"questionId":"q109","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2528243","actual":"1327621","isCorrect":false,"inputTokens":17477,"outputTokens":7,"latencyMs":1437.121167000034},{"questionId":"q109","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2528243","actual":"1453895","isCorrect":false,"inputTokens":12612,"outputTokens":7,"latencyMs":1452.4355000000214},{"questionId":"q109","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2528243","actual":"1429822","isCorrect":false,"inputTokens":9375,"outputTokens":7,"latencyMs":1270.1647080000257},{"questionId":"q109","format":"csv","model":"claude-haiku-4-5-20251001","expected":"2528243","actual":"1359722","isCorrect":false,"inputTokens":9193,"outputTokens":7,"latencyMs":1112.9368749998976},{"questionId":"q109","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2528243","actual":"1264743","isCorrect":false,"inputTokens":19867,"outputTokens":7,"latencyMs":1466.141500000027},{"questionId":"q109","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2528243","actual":"1434047","isCorrect":false,"inputTokens":14552,"outputTokens":7,"latencyMs":1131.1252089999616},{"questionId":"q110","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"154136","actual":"130896","isCorrect":false,"inputTokens":17475,"outputTokens":6,"latencyMs":1628.9582499999087},{"questionId":"q110","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"154136","actual":"137824","isCorrect":false,"inputTokens":12610,"outputTokens":6,"latencyMs":1084.4367499999935},{"questionId":"q110","format":"toon","model":"claude-haiku-4-5-20251001","expected":"154136","actual":"130658","isCorrect":false,"inputTokens":9373,"outputTokens":6,"latencyMs":1364.9022910000058},{"questionId":"q110","format":"csv","model":"claude-haiku-4-5-20251001","expected":"154136","actual":"130686.16","isCorrect":false,"inputTokens":9191,"outputTokens":8,"latencyMs":1165.2718750000931},{"questionId":"q110","format":"xml","model":"claude-haiku-4-5-20251001","expected":"154136","actual":"132689","isCorrect":false,"inputTokens":19865,"outputTokens":6,"latencyMs":1335.3207089999923},{"questionId":"q110","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"154136","actual":"132673","isCorrect":false,"inputTokens":14550,"outputTokens":6,"latencyMs":1393.0692500000587},{"questionId":"q111","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"41","actual":"26","isCorrect":false,"inputTokens":17477,"outputTokens":5,"latencyMs":2100.8017080000136},{"questionId":"q111","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"41","actual":"25","isCorrect":false,"inputTokens":12612,"outputTokens":5,"latencyMs":1400.2827919999836},{"questionId":"q111","format":"toon","model":"claude-haiku-4-5-20251001","expected":"41","actual":"21","isCorrect":false,"inputTokens":9375,"outputTokens":5,"latencyMs":1358.0510830000276},{"questionId":"q111","format":"csv","model":"claude-haiku-4-5-20251001","expected":"41","actual":"27","isCorrect":false,"inputTokens":9193,"outputTokens":5,"latencyMs":1191.0352500000736},{"questionId":"q111","format":"xml","model":"claude-haiku-4-5-20251001","expected":"41","actual":"26","isCorrect":false,"inputTokens":19867,"outputTokens":5,"latencyMs":3264.8267500000075},{"questionId":"q111","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"41","actual":"27","isCorrect":false,"inputTokens":14552,"outputTokens":5,"latencyMs":1273.1958330000052},{"questionId":"q112","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"53","actual":"56","isCorrect":false,"inputTokens":17477,"outputTokens":5,"latencyMs":1519.110915999976},{"questionId":"q112","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"53","actual":"56","isCorrect":false,"inputTokens":12612,"outputTokens":5,"latencyMs":1058.4677910000319},{"questionId":"q112","format":"toon","model":"claude-haiku-4-5-20251001","expected":"53","actual":"57","isCorrect":false,"inputTokens":9375,"outputTokens":5,"latencyMs":1207.4402080000145},{"questionId":"q112","format":"csv","model":"claude-haiku-4-5-20251001","expected":"53","actual":"53","isCorrect":true,"inputTokens":9193,"outputTokens":5,"latencyMs":1156.1299169999547},{"questionId":"q112","format":"xml","model":"claude-haiku-4-5-20251001","expected":"53","actual":"57","isCorrect":false,"inputTokens":19867,"outputTokens":5,"latencyMs":1534.4429999999702},{"questionId":"q112","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"53","actual":"46","isCorrect":false,"inputTokens":14552,"outputTokens":5,"latencyMs":1102.8952910000226},{"questionId":"q113","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"77","actual":"100","isCorrect":false,"inputTokens":17477,"outputTokens":5,"latencyMs":1363.827082999982},{"questionId":"q113","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"77","actual":"100","isCorrect":false,"inputTokens":12612,"outputTokens":5,"latencyMs":1098.9746250000317},{"questionId":"q113","format":"toon","model":"claude-haiku-4-5-20251001","expected":"77","actual":"100","isCorrect":false,"inputTokens":9375,"outputTokens":5,"latencyMs":1091.201000000001},{"questionId":"q113","format":"csv","model":"claude-haiku-4-5-20251001","expected":"77","actual":"100","isCorrect":false,"inputTokens":9193,"outputTokens":5,"latencyMs":985.0388750000857},{"questionId":"q113","format":"xml","model":"claude-haiku-4-5-20251001","expected":"77","actual":"100","isCorrect":false,"inputTokens":19867,"outputTokens":5,"latencyMs":1350.483332999982},{"questionId":"q113","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"77","actual":"100","isCorrect":false,"inputTokens":14552,"outputTokens":5,"latencyMs":1329.111082999967},{"questionId":"q114","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"37","actual":"41","isCorrect":false,"inputTokens":17477,"outputTokens":5,"latencyMs":1212.424457999994},{"questionId":"q114","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"37","actual":"50","isCorrect":false,"inputTokens":12612,"outputTokens":5,"latencyMs":1182.8672079999233},{"questionId":"q114","format":"toon","model":"claude-haiku-4-5-20251001","expected":"37","actual":"50","isCorrect":false,"inputTokens":9375,"outputTokens":5,"latencyMs":1758.9869160000235},{"questionId":"q114","format":"csv","model":"claude-haiku-4-5-20251001","expected":"37","actual":"45","isCorrect":false,"inputTokens":9193,"outputTokens":5,"latencyMs":1223.081125000026},{"questionId":"q114","format":"xml","model":"claude-haiku-4-5-20251001","expected":"37","actual":"50","isCorrect":false,"inputTokens":19867,"outputTokens":5,"latencyMs":1448.7562919999473},{"questionId":"q114","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"37","actual":"50","isCorrect":false,"inputTokens":14552,"outputTokens":5,"latencyMs":1141.3254169999855},{"questionId":"q115","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"16","actual":"5","isCorrect":false,"inputTokens":17477,"outputTokens":5,"latencyMs":1641.206375000067},{"questionId":"q115","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"16","actual":"31","isCorrect":false,"inputTokens":12612,"outputTokens":5,"latencyMs":1152.969541999977},{"questionId":"q115","format":"toon","model":"claude-haiku-4-5-20251001","expected":"16","actual":"15","isCorrect":false,"inputTokens":9375,"outputTokens":5,"latencyMs":1277.7170410000253},{"questionId":"q115","format":"csv","model":"claude-haiku-4-5-20251001","expected":"16","actual":"21","isCorrect":false,"inputTokens":9193,"outputTokens":5,"latencyMs":1170.6205830000108},{"questionId":"q115","format":"xml","model":"claude-haiku-4-5-20251001","expected":"16","actual":"3","isCorrect":false,"inputTokens":19867,"outputTokens":5,"latencyMs":1850.4669170000125},{"questionId":"q115","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"16","actual":"3","isCorrect":false,"inputTokens":14552,"outputTokens":5,"latencyMs":1451.0770000000484},{"questionId":"q116","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"49","actual":"42","isCorrect":false,"inputTokens":17478,"outputTokens":5,"latencyMs":1355.7432499999413},{"questionId":"q116","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"49","actual":"56","isCorrect":false,"inputTokens":12613,"outputTokens":5,"latencyMs":1275.323333999957},{"questionId":"q116","format":"toon","model":"claude-haiku-4-5-20251001","expected":"49","actual":"47","isCorrect":false,"inputTokens":9376,"outputTokens":5,"latencyMs":1957.939083000063},{"questionId":"q116","format":"csv","model":"claude-haiku-4-5-20251001","expected":"49","actual":"29","isCorrect":false,"inputTokens":9194,"outputTokens":5,"latencyMs":1257.4775420000078},{"questionId":"q116","format":"xml","model":"claude-haiku-4-5-20251001","expected":"49","actual":"25","isCorrect":false,"inputTokens":19868,"outputTokens":5,"latencyMs":1747.7625409999164},{"questionId":"q116","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"49","actual":"29","isCorrect":false,"inputTokens":14553,"outputTokens":5,"latencyMs":1300.0107079999289},{"questionId":"q117","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"23","actual":"26","isCorrect":false,"inputTokens":17478,"outputTokens":5,"latencyMs":1249.3837920000078},{"questionId":"q117","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"23","actual":"20","isCorrect":false,"inputTokens":12613,"outputTokens":5,"latencyMs":1169.6777500000317},{"questionId":"q117","format":"toon","model":"claude-haiku-4-5-20251001","expected":"23","actual":"22","isCorrect":false,"inputTokens":9376,"outputTokens":5,"latencyMs":1138.845290999976},{"questionId":"q117","format":"csv","model":"claude-haiku-4-5-20251001","expected":"23","actual":"22","isCorrect":false,"inputTokens":9194,"outputTokens":5,"latencyMs":1190.8722499998985},{"questionId":"q117","format":"xml","model":"claude-haiku-4-5-20251001","expected":"23","actual":"17","isCorrect":false,"inputTokens":19868,"outputTokens":5,"latencyMs":1297.84612500004},{"questionId":"q117","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"23","actual":"14","isCorrect":false,"inputTokens":14553,"outputTokens":5,"latencyMs":1273.5267079999903},{"questionId":"q118","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4","actual":"10","isCorrect":false,"inputTokens":17478,"outputTokens":5,"latencyMs":1501.3266669999575},{"questionId":"q118","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4","actual":"12","isCorrect":false,"inputTokens":12613,"outputTokens":5,"latencyMs":1335.36412500008},{"questionId":"q118","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4","actual":"15","isCorrect":false,"inputTokens":9376,"outputTokens":5,"latencyMs":1168.816125000012},{"questionId":"q118","format":"csv","model":"claude-haiku-4-5-20251001","expected":"4","actual":"31","isCorrect":false,"inputTokens":9194,"outputTokens":5,"latencyMs":1292.2094170000637},{"questionId":"q118","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"16","isCorrect":false,"inputTokens":19868,"outputTokens":5,"latencyMs":1603.7261660000077},{"questionId":"q118","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"10","isCorrect":false,"inputTokens":14553,"outputTokens":5,"latencyMs":1265.9923339999514},{"questionId":"q119","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"57","actual":"8","isCorrect":false,"inputTokens":17486,"outputTokens":5,"latencyMs":1319.7199580000015},{"questionId":"q119","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"57","actual":"23","isCorrect":false,"inputTokens":12621,"outputTokens":5,"latencyMs":1675.4872079999186},{"questionId":"q119","format":"toon","model":"claude-haiku-4-5-20251001","expected":"57","actual":"12","isCorrect":false,"inputTokens":9384,"outputTokens":5,"latencyMs":1093.7844170000171},{"questionId":"q119","format":"csv","model":"claude-haiku-4-5-20251001","expected":"57","actual":"15","isCorrect":false,"inputTokens":9202,"outputTokens":5,"latencyMs":1534.8674590000883},{"questionId":"q119","format":"xml","model":"claude-haiku-4-5-20251001","expected":"57","actual":"15","isCorrect":false,"inputTokens":19876,"outputTokens":5,"latencyMs":1421.0654590000631},{"questionId":"q119","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"57","actual":"15","isCorrect":false,"inputTokens":14561,"outputTokens":5,"latencyMs":1133.124291999964},{"questionId":"q120","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"43","actual":"38","isCorrect":false,"inputTokens":17486,"outputTokens":5,"latencyMs":3100.8175000000047},{"questionId":"q120","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"43","actual":"37","isCorrect":false,"inputTokens":12621,"outputTokens":5,"latencyMs":1586.5317499999655},{"questionId":"q120","format":"toon","model":"claude-haiku-4-5-20251001","expected":"43","actual":"37","isCorrect":false,"inputTokens":9384,"outputTokens":5,"latencyMs":1087.2246670000022},{"questionId":"q120","format":"csv","model":"claude-haiku-4-5-20251001","expected":"43","actual":"32","isCorrect":false,"inputTokens":9202,"outputTokens":5,"latencyMs":1252.717082999996},{"questionId":"q120","format":"xml","model":"claude-haiku-4-5-20251001","expected":"43","actual":"21","isCorrect":false,"inputTokens":19876,"outputTokens":5,"latencyMs":1433.415833999985},{"questionId":"q120","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"43","actual":"41","isCorrect":false,"inputTokens":14561,"outputTokens":5,"latencyMs":1497.922416999936},{"questionId":"q121","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"25","actual":"13","isCorrect":false,"inputTokens":17486,"outputTokens":5,"latencyMs":1229.641583000077},{"questionId":"q121","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"25","actual":"15","isCorrect":false,"inputTokens":12621,"outputTokens":5,"latencyMs":1479.924708000035},{"questionId":"q121","format":"toon","model":"claude-haiku-4-5-20251001","expected":"25","actual":"22","isCorrect":false,"inputTokens":9384,"outputTokens":5,"latencyMs":1331.8733749999665},{"questionId":"q121","format":"csv","model":"claude-haiku-4-5-20251001","expected":"25","actual":"18","isCorrect":false,"inputTokens":9202,"outputTokens":5,"latencyMs":1499.8951249999227},{"questionId":"q121","format":"xml","model":"claude-haiku-4-5-20251001","expected":"25","actual":"12","isCorrect":false,"inputTokens":19876,"outputTokens":5,"latencyMs":1506.3811669999268},{"questionId":"q121","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"25","actual":"10","isCorrect":false,"inputTokens":14561,"outputTokens":5,"latencyMs":1207.6717090000166},{"questionId":"q122","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"6","actual":"8","isCorrect":false,"inputTokens":17486,"outputTokens":5,"latencyMs":1919.7641669999575},{"questionId":"q122","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"6","actual":"18","isCorrect":false,"inputTokens":12621,"outputTokens":5,"latencyMs":2294.538125000079},{"questionId":"q122","format":"toon","model":"claude-haiku-4-5-20251001","expected":"6","actual":"15","isCorrect":false,"inputTokens":9384,"outputTokens":5,"latencyMs":1619.9265840000007},{"questionId":"q122","format":"csv","model":"claude-haiku-4-5-20251001","expected":"6","actual":"8","isCorrect":false,"inputTokens":9202,"outputTokens":5,"latencyMs":2120.3911249999655},{"questionId":"q122","format":"xml","model":"claude-haiku-4-5-20251001","expected":"6","actual":"17","isCorrect":false,"inputTokens":19876,"outputTokens":5,"latencyMs":1503.0869999999413},{"questionId":"q122","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"6","actual":"13","isCorrect":false,"inputTokens":14561,"outputTokens":5,"latencyMs":1234.3564580000238},{"questionId":"q123","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"8","isCorrect":false,"inputTokens":17486,"outputTokens":5,"latencyMs":1434.706542000058},{"questionId":"q123","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"3","isCorrect":false,"inputTokens":12621,"outputTokens":5,"latencyMs":1561.3982910000486},{"questionId":"q123","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"12","isCorrect":false,"inputTokens":9384,"outputTokens":5,"latencyMs":1187.2939999999944},{"questionId":"q123","format":"csv","model":"claude-haiku-4-5-20251001","expected":"1","actual":"4","isCorrect":false,"inputTokens":9202,"outputTokens":5,"latencyMs":1084.5471249999246},{"questionId":"q123","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"3","isCorrect":false,"inputTokens":19876,"outputTokens":5,"latencyMs":1870.1685000000289},{"questionId":"q123","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"4","isCorrect":false,"inputTokens":14561,"outputTokens":5,"latencyMs":1385.7719580000266},{"questionId":"q124","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":7783,"outputTokens":4,"latencyMs":1268.9927909999387},{"questionId":"q124","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":5221,"outputTokens":4,"latencyMs":1421.6205409999238},{"questionId":"q124","format":"toon","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":6241,"outputTokens":4,"latencyMs":1310.4610000000102},{"questionId":"q124","format":"xml","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":8788,"outputTokens":4,"latencyMs":1550.1076250000624},{"questionId":"q124","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":6187,"outputTokens":4,"latencyMs":1150.0919170000125},{"questionId":"q125","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":7783,"outputTokens":7,"latencyMs":1214.324041999993},{"questionId":"q125","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":5221,"outputTokens":7,"latencyMs":1421.8747919999296},{"questionId":"q125","format":"toon","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":6241,"outputTokens":7,"latencyMs":1067.4452499999898},{"questionId":"q125","format":"xml","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":8788,"outputTokens":7,"latencyMs":1218.4350420000264},{"questionId":"q125","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":6187,"outputTokens":7,"latencyMs":1223.642499999958},{"questionId":"q126","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"424","actual":"424","isCorrect":true,"inputTokens":7784,"outputTokens":5,"latencyMs":1130.6049170000479},{"questionId":"q126","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"424","actual":"424","isCorrect":true,"inputTokens":5222,"outputTokens":5,"latencyMs":1027.4643330000108},{"questionId":"q126","format":"toon","model":"claude-haiku-4-5-20251001","expected":"424","actual":"424","isCorrect":true,"inputTokens":6242,"outputTokens":5,"latencyMs":1244.4015420000069},{"questionId":"q126","format":"xml","model":"claude-haiku-4-5-20251001","expected":"424","actual":"424","isCorrect":true,"inputTokens":8789,"outputTokens":5,"latencyMs":1162.2130830000388},{"questionId":"q126","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"424","actual":"424","isCorrect":true,"inputTokens":6188,"outputTokens":5,"latencyMs":1949.5525829999242},{"questionId":"q127","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":7784,"outputTokens":6,"latencyMs":999.1413750000065},{"questionId":"q127","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":5222,"outputTokens":6,"latencyMs":1027.876125000068},{"questionId":"q127","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":6242,"outputTokens":6,"latencyMs":1985.8047080000397},{"questionId":"q127","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":8789,"outputTokens":6,"latencyMs":1117.451000000001},{"questionId":"q127","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":6188,"outputTokens":6,"latencyMs":1347.6159579999512},{"questionId":"q128","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":7783,"outputTokens":4,"latencyMs":1317.608249999932},{"questionId":"q128","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":5221,"outputTokens":4,"latencyMs":1256.9298749999143},{"questionId":"q128","format":"toon","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":6241,"outputTokens":4,"latencyMs":1083.470083000022},{"questionId":"q128","format":"xml","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":8788,"outputTokens":4,"latencyMs":1015.1183330001077},{"questionId":"q128","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":6187,"outputTokens":4,"latencyMs":955.9129999999423},{"questionId":"q129","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":7783,"outputTokens":7,"latencyMs":1433.7668750000885},{"questionId":"q129","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":5221,"outputTokens":7,"latencyMs":1191.4290000000037},{"questionId":"q129","format":"toon","model":"claude-haiku-4-5-20251001","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":6241,"outputTokens":7,"latencyMs":1086.0024169998942},{"questionId":"q129","format":"xml","model":"claude-haiku-4-5-20251001","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":8788,"outputTokens":7,"latencyMs":1344.1925419999752},{"questionId":"q129","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":6187,"outputTokens":7,"latencyMs":1327.7757920000004},{"questionId":"q130","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"435","actual":"435","isCorrect":true,"inputTokens":7784,"outputTokens":5,"latencyMs":975.5557089999784},{"questionId":"q130","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"435","actual":"435","isCorrect":true,"inputTokens":5222,"outputTokens":5,"latencyMs":1581.8543330000248},{"questionId":"q130","format":"toon","model":"claude-haiku-4-5-20251001","expected":"435","actual":"435","isCorrect":true,"inputTokens":6242,"outputTokens":5,"latencyMs":1049.4542499999516},{"questionId":"q130","format":"xml","model":"claude-haiku-4-5-20251001","expected":"435","actual":"435","isCorrect":true,"inputTokens":8789,"outputTokens":5,"latencyMs":1078.1500829999568},{"questionId":"q130","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"435","actual":"435","isCorrect":true,"inputTokens":6188,"outputTokens":5,"latencyMs":1060.7320830000099},{"questionId":"q131","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"408","actual":"408","isCorrect":true,"inputTokens":7784,"outputTokens":5,"latencyMs":1025.7722079999512},{"questionId":"q131","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"408","actual":"408","isCorrect":true,"inputTokens":5222,"outputTokens":5,"latencyMs":1314.0469169999706},{"questionId":"q131","format":"toon","model":"claude-haiku-4-5-20251001","expected":"408","actual":"408","isCorrect":true,"inputTokens":6242,"outputTokens":5,"latencyMs":1069.5918330000713},{"questionId":"q131","format":"xml","model":"claude-haiku-4-5-20251001","expected":"408","actual":"408","isCorrect":true,"inputTokens":8789,"outputTokens":5,"latencyMs":1142.9486249999609},{"questionId":"q131","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"408","actual":"408","isCorrect":true,"inputTokens":6188,"outputTokens":5,"latencyMs":992.1041250000708},{"questionId":"q132","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":7783,"outputTokens":4,"latencyMs":1161.393875000067},{"questionId":"q132","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":5221,"outputTokens":4,"latencyMs":939.8532920000143},{"questionId":"q132","format":"toon","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":6241,"outputTokens":4,"latencyMs":1038.753625000012},{"questionId":"q132","format":"xml","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":8788,"outputTokens":4,"latencyMs":1365.075458999956},{"questionId":"q132","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"error","actual":"error","isCorrect":true,"inputTokens":6187,"outputTokens":4,"latencyMs":1184.1347500000848},{"questionId":"q133","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":7783,"outputTokens":7,"latencyMs":1443.2086669999408},{"questionId":"q133","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":5221,"outputTokens":7,"latencyMs":1285.2082500000251},{"questionId":"q133","format":"toon","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":6241,"outputTokens":7,"latencyMs":1422.8267079999205},{"questionId":"q133","format":"xml","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":8788,"outputTokens":7,"latencyMs":1320.150208999985},{"questionId":"q133","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":6187,"outputTokens":7,"latencyMs":1196.3562910000328},{"questionId":"q134","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"75","actual":"75","isCorrect":true,"inputTokens":7767,"outputTokens":5,"latencyMs":1288.936875000014},{"questionId":"q134","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"75","actual":"70","isCorrect":false,"inputTokens":5205,"outputTokens":5,"latencyMs":1696.469999999972},{"questionId":"q134","format":"toon","model":"claude-haiku-4-5-20251001","expected":"75","actual":"75","isCorrect":true,"inputTokens":6225,"outputTokens":5,"latencyMs":1488.3868329999968},{"questionId":"q134","format":"xml","model":"claude-haiku-4-5-20251001","expected":"75","actual":"100","isCorrect":false,"inputTokens":8772,"outputTokens":5,"latencyMs":1240.0713750000577},{"questionId":"q134","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"75","actual":"75","isCorrect":true,"inputTokens":6171,"outputTokens":5,"latencyMs":1378.1943749999627},{"questionId":"q135","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2453.41","actual":"2384.74","isCorrect":false,"inputTokens":7768,"outputTokens":8,"latencyMs":1541.0560830000322},{"questionId":"q135","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2453.41","actual":"2458.48","isCorrect":false,"inputTokens":5206,"outputTokens":8,"latencyMs":1120.7529169999762},{"questionId":"q135","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2453.41","actual":"2405.09","isCorrect":false,"inputTokens":6226,"outputTokens":8,"latencyMs":1157.566583000007},{"questionId":"q135","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2453.41","actual":"2445.96","isCorrect":false,"inputTokens":8773,"outputTokens":8,"latencyMs":1159.1351250000298},{"questionId":"q135","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2453.41","actual":"2414.48","isCorrect":false,"inputTokens":6172,"outputTokens":8,"latencyMs":1258.330958000035},{"questionId":"q136","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"29","actual":"33","isCorrect":false,"inputTokens":7768,"outputTokens":5,"latencyMs":1139.8584170000395},{"questionId":"q136","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"29","actual":"32","isCorrect":false,"inputTokens":5206,"outputTokens":5,"latencyMs":1128.9782079999568},{"questionId":"q136","format":"toon","model":"claude-haiku-4-5-20251001","expected":"29","actual":"30","isCorrect":false,"inputTokens":6226,"outputTokens":5,"latencyMs":1076.437042000005},{"questionId":"q136","format":"xml","model":"claude-haiku-4-5-20251001","expected":"29","actual":"31","isCorrect":false,"inputTokens":8773,"outputTokens":5,"latencyMs":1214.0489590000361},{"questionId":"q136","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"29","actual":"26","isCorrect":false,"inputTokens":6172,"outputTokens":5,"latencyMs":1147.1827920000069},{"questionId":"q137","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":7768,"outputTokens":5,"latencyMs":1060.8192499999423},{"questionId":"q137","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":5206,"outputTokens":5,"latencyMs":1000.7439579999773},{"questionId":"q137","format":"toon","model":"claude-haiku-4-5-20251001","expected":"17","actual":"15","isCorrect":false,"inputTokens":6226,"outputTokens":5,"latencyMs":1444.9082920000656},{"questionId":"q137","format":"xml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":8773,"outputTokens":5,"latencyMs":1364.0987090000417},{"questionId":"q137","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"17","actual":"16","isCorrect":false,"inputTokens":6172,"outputTokens":5,"latencyMs":1289.65149999992},{"questionId":"q138","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"29","actual":"24","isCorrect":false,"inputTokens":7768,"outputTokens":5,"latencyMs":1276.8952499999432},{"questionId":"q138","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"29","actual":"24","isCorrect":false,"inputTokens":5206,"outputTokens":5,"latencyMs":1270.1233340000035},{"questionId":"q138","format":"toon","model":"claude-haiku-4-5-20251001","expected":"29","actual":"24","isCorrect":false,"inputTokens":6226,"outputTokens":5,"latencyMs":1226.6909589999123},{"questionId":"q138","format":"xml","model":"claude-haiku-4-5-20251001","expected":"29","actual":"25","isCorrect":false,"inputTokens":8773,"outputTokens":5,"latencyMs":1133.3242500000633},{"questionId":"q138","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"29","actual":"28","isCorrect":false,"inputTokens":6172,"outputTokens":5,"latencyMs":2179.776416000095},{"questionId":"q139","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"11","actual":"10","isCorrect":false,"inputTokens":7771,"outputTokens":5,"latencyMs":1117.800791000016},{"questionId":"q139","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"11","actual":"10","isCorrect":false,"inputTokens":5209,"outputTokens":5,"latencyMs":1069.6763750000391},{"questionId":"q139","format":"toon","model":"claude-haiku-4-5-20251001","expected":"11","actual":"10","isCorrect":false,"inputTokens":6229,"outputTokens":5,"latencyMs":2063.5446249999804},{"questionId":"q139","format":"xml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"12","isCorrect":false,"inputTokens":8776,"outputTokens":5,"latencyMs":1202.8583749999525},{"questionId":"q139","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"9","isCorrect":false,"inputTokens":6175,"outputTokens":5,"latencyMs":1061.2812919999706},{"questionId":"q140","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"18","actual":"15","isCorrect":false,"inputTokens":7771,"outputTokens":5,"latencyMs":1386.753832999966},{"questionId":"q140","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"18","actual":"13","isCorrect":false,"inputTokens":5209,"outputTokens":5,"latencyMs":1034.8489169999957},{"questionId":"q140","format":"toon","model":"claude-haiku-4-5-20251001","expected":"18","actual":"15","isCorrect":false,"inputTokens":6229,"outputTokens":5,"latencyMs":1134.6799170000013},{"questionId":"q140","format":"xml","model":"claude-haiku-4-5-20251001","expected":"18","actual":"20","isCorrect":false,"inputTokens":8776,"outputTokens":5,"latencyMs":1083.533999999985},{"questionId":"q140","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"18","actual":"15","isCorrect":false,"inputTokens":6175,"outputTokens":5,"latencyMs":1075.4867920000106},{"questionId":"q141","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"33","actual":"33","isCorrect":true,"inputTokens":7775,"outputTokens":5,"latencyMs":1596.7704580000136},{"questionId":"q141","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"33","actual":"33","isCorrect":true,"inputTokens":5213,"outputTokens":5,"latencyMs":1052.1449169999687},{"questionId":"q141","format":"toon","model":"claude-haiku-4-5-20251001","expected":"33","actual":"40","isCorrect":false,"inputTokens":6233,"outputTokens":5,"latencyMs":1162.8800829999382},{"questionId":"q141","format":"xml","model":"claude-haiku-4-5-20251001","expected":"33","actual":"37","isCorrect":false,"inputTokens":8780,"outputTokens":5,"latencyMs":1121.927708000061},{"questionId":"q141","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"33","actual":"35","isCorrect":false,"inputTokens":6179,"outputTokens":5,"latencyMs":1078.549040999962},{"questionId":"q142","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"42","actual":"27","isCorrect":false,"inputTokens":7772,"outputTokens":5,"latencyMs":1063.4963330000173},{"questionId":"q142","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"42","actual":"25","isCorrect":false,"inputTokens":5210,"outputTokens":5,"latencyMs":1157.562166999909},{"questionId":"q142","format":"toon","model":"claude-haiku-4-5-20251001","expected":"42","actual":"28","isCorrect":false,"inputTokens":6230,"outputTokens":5,"latencyMs":1122.3327499999432},{"questionId":"q142","format":"xml","model":"claude-haiku-4-5-20251001","expected":"42","actual":"30","isCorrect":false,"inputTokens":8777,"outputTokens":5,"latencyMs":1094.998041999992},{"questionId":"q142","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"42","actual":"28","isCorrect":false,"inputTokens":6176,"outputTokens":5,"latencyMs":1123.9057500000345},{"questionId":"q143","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"25","actual":"24","isCorrect":false,"inputTokens":7769,"outputTokens":5,"latencyMs":1498.7087079999037},{"questionId":"q143","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"25","actual":"30","isCorrect":false,"inputTokens":5207,"outputTokens":5,"latencyMs":1361.3783330000006},{"questionId":"q143","format":"toon","model":"claude-haiku-4-5-20251001","expected":"25","actual":"38","isCorrect":false,"inputTokens":6227,"outputTokens":5,"latencyMs":1208.8371250000782},{"questionId":"q143","format":"xml","model":"claude-haiku-4-5-20251001","expected":"25","actual":"39","isCorrect":false,"inputTokens":8774,"outputTokens":5,"latencyMs":1436.0557080000872},{"questionId":"q143","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"25","actual":"31","isCorrect":false,"inputTokens":6173,"outputTokens":5,"latencyMs":1439.8235000000568},{"questionId":"q144","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"29","actual":"30","isCorrect":false,"inputTokens":7774,"outputTokens":5,"latencyMs":1211.4848749999655},{"questionId":"q144","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"29","actual":"30","isCorrect":false,"inputTokens":5212,"outputTokens":5,"latencyMs":1158.1638749999693},{"questionId":"q144","format":"toon","model":"claude-haiku-4-5-20251001","expected":"29","actual":"43","isCorrect":false,"inputTokens":6232,"outputTokens":5,"latencyMs":1706.8831250000512},{"questionId":"q144","format":"xml","model":"claude-haiku-4-5-20251001","expected":"29","actual":"38","isCorrect":false,"inputTokens":8779,"outputTokens":5,"latencyMs":1376.859208000009},{"questionId":"q144","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"29","actual":"31","isCorrect":false,"inputTokens":6178,"outputTokens":5,"latencyMs":1091.6000830000266},{"questionId":"q145","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4","actual":"8","isCorrect":false,"inputTokens":7774,"outputTokens":5,"latencyMs":1214.2384999999776},{"questionId":"q145","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4","actual":"4","isCorrect":true,"inputTokens":5212,"outputTokens":5,"latencyMs":1169.8784999999916},{"questionId":"q145","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4","actual":"3","isCorrect":false,"inputTokens":6232,"outputTokens":5,"latencyMs":1438.8369160000002},{"questionId":"q145","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"5","isCorrect":false,"inputTokens":8779,"outputTokens":5,"latencyMs":1235.8554580000928},{"questionId":"q145","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"5","isCorrect":false,"inputTokens":6178,"outputTokens":5,"latencyMs":1283.048208000022},{"questionId":"q146","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5","actual":"7","isCorrect":false,"inputTokens":7777,"outputTokens":5,"latencyMs":1316.5421670000069},{"questionId":"q146","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5","actual":"7","isCorrect":false,"inputTokens":5215,"outputTokens":5,"latencyMs":1155.4379169999156},{"questionId":"q146","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5","actual":"8","isCorrect":false,"inputTokens":6235,"outputTokens":5,"latencyMs":973.9992499999935},{"questionId":"q146","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":8782,"outputTokens":5,"latencyMs":1181.0223749999423},{"questionId":"q146","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":6181,"outputTokens":5,"latencyMs":1189.6436249999097},{"questionId":"q147","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2","actual":"6","isCorrect":false,"inputTokens":7777,"outputTokens":5,"latencyMs":1120.4472499999683},{"questionId":"q147","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2","actual":"4","isCorrect":false,"inputTokens":5215,"outputTokens":5,"latencyMs":1058.7860420000507},{"questionId":"q147","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2","actual":"5","isCorrect":false,"inputTokens":6235,"outputTokens":5,"latencyMs":1214.7113749999553},{"questionId":"q147","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"7","isCorrect":false,"inputTokens":8782,"outputTokens":5,"latencyMs":2272.7187089999206},{"questionId":"q147","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"3","isCorrect":false,"inputTokens":6181,"outputTokens":5,"latencyMs":1153.4284159999806},{"questionId":"q148","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3","actual":"4","isCorrect":false,"inputTokens":7777,"outputTokens":5,"latencyMs":1250.0174589999951},{"questionId":"q148","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3","actual":"5","isCorrect":false,"inputTokens":5215,"outputTokens":5,"latencyMs":1667.0216249999357},{"questionId":"q148","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3","actual":"4","isCorrect":false,"inputTokens":6235,"outputTokens":5,"latencyMs":1195.937874999945},{"questionId":"q148","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"5","isCorrect":false,"inputTokens":8782,"outputTokens":5,"latencyMs":1373.8929169999901},{"questionId":"q148","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"4","isCorrect":false,"inputTokens":6181,"outputTokens":5,"latencyMs":1219.8368749999208},{"questionId":"q149","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4","actual":"7","isCorrect":false,"inputTokens":7776,"outputTokens":5,"latencyMs":1261.3687080000527},{"questionId":"q149","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4","actual":"6","isCorrect":false,"inputTokens":5214,"outputTokens":5,"latencyMs":1096.3175000000047},{"questionId":"q149","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4","actual":"7","isCorrect":false,"inputTokens":6234,"outputTokens":5,"latencyMs":1464.7897079999093},{"questionId":"q149","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"6","isCorrect":false,"inputTokens":8781,"outputTokens":5,"latencyMs":1314.154084000038},{"questionId":"q149","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"6","isCorrect":false,"inputTokens":6180,"outputTokens":5,"latencyMs":1648.5650410000235},{"questionId":"q150","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5","actual":"7","isCorrect":false,"inputTokens":7776,"outputTokens":5,"latencyMs":1204.6717499999795},{"questionId":"q150","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":5214,"outputTokens":5,"latencyMs":1045.0933749999385},{"questionId":"q150","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":6234,"outputTokens":5,"latencyMs":1071.8020000000251},{"questionId":"q150","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"7","isCorrect":false,"inputTokens":8781,"outputTokens":5,"latencyMs":1080.7611669999314},{"questionId":"q150","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"8","isCorrect":false,"inputTokens":6180,"outputTokens":5,"latencyMs":1317.7093749999767},{"questionId":"q151","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"development","actual":"development","isCorrect":true,"inputTokens":1244,"outputTokens":4,"latencyMs":929.0065000000177},{"questionId":"q151","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"development","actual":"development","isCorrect":true,"inputTokens":785,"outputTokens":4,"latencyMs":1663.7153750000289},{"questionId":"q151","format":"toon","model":"claude-haiku-4-5-20251001","expected":"development","actual":"development","isCorrect":true,"inputTokens":879,"outputTokens":4,"latencyMs":731.0117499999469},{"questionId":"q151","format":"xml","model":"claude-haiku-4-5-20251001","expected":"development","actual":"development","isCorrect":true,"inputTokens":1314,"outputTokens":4,"latencyMs":883.2615830000723},{"questionId":"q151","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"development","actual":"development","isCorrect":true,"inputTokens":899,"outputTokens":4,"latencyMs":933.3807080000406},{"questionId":"q152","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":1242,"outputTokens":8,"latencyMs":1309.4723340000492},{"questionId":"q152","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":783,"outputTokens":8,"latencyMs":1349.013917000033},{"questionId":"q152","format":"toon","model":"claude-haiku-4-5-20251001","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":877,"outputTokens":8,"latencyMs":2834.810291999951},{"questionId":"q152","format":"xml","model":"claude-haiku-4-5-20251001","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":1312,"outputTokens":8,"latencyMs":1079.063041999936},{"questionId":"q152","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":897,"outputTokens":8,"latencyMs":2015.544249999919},{"questionId":"q153","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1242,"outputTokens":6,"latencyMs":1069.0238330001011},{"questionId":"q153","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":783,"outputTokens":6,"latencyMs":980.3079999999609},{"questionId":"q153","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":877,"outputTokens":6,"latencyMs":1044.9361250000075},{"questionId":"q153","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1312,"outputTokens":6,"latencyMs":2413.873292000033},{"questionId":"q153","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":897,"outputTokens":6,"latencyMs":873.3523750000168},{"questionId":"q154","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"37","actual":"37","isCorrect":true,"inputTokens":1244,"outputTokens":5,"latencyMs":942.2453750000568},{"questionId":"q154","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"37","actual":"37","isCorrect":true,"inputTokens":785,"outputTokens":5,"latencyMs":887.3804999999702},{"questionId":"q154","format":"toon","model":"claude-haiku-4-5-20251001","expected":"37","actual":"37","isCorrect":true,"inputTokens":879,"outputTokens":5,"latencyMs":875.3322920000646},{"questionId":"q154","format":"xml","model":"claude-haiku-4-5-20251001","expected":"37","actual":"37","isCorrect":true,"inputTokens":1314,"outputTokens":5,"latencyMs":980.2426670000423},{"questionId":"q154","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"37","actual":"37","isCorrect":true,"inputTokens":899,"outputTokens":5,"latencyMs":1153.6150420000777},{"questionId":"q155","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1242,"outputTokens":6,"latencyMs":1241.1024170001037},{"questionId":"q155","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":783,"outputTokens":6,"latencyMs":1081.2751660000067},{"questionId":"q155","format":"toon","model":"claude-haiku-4-5-20251001","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":877,"outputTokens":6,"latencyMs":771.7234170000302},{"questionId":"q155","format":"xml","model":"claude-haiku-4-5-20251001","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1312,"outputTokens":6,"latencyMs":825.690082999994},{"questionId":"q155","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":897,"outputTokens":6,"latencyMs":752.5629590000026},{"questionId":"q156","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1244,"outputTokens":5,"latencyMs":1425.01554199995},{"questionId":"q156","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":785,"outputTokens":5,"latencyMs":1021.5733329999493},{"questionId":"q156","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":879,"outputTokens":5,"latencyMs":888.623041999992},{"questionId":"q156","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1314,"outputTokens":5,"latencyMs":920.717166999937},{"questionId":"q156","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":899,"outputTokens":5,"latencyMs":799.5221250000177},{"questionId":"q157","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1244,"outputTokens":6,"latencyMs":955.9951249998994},{"questionId":"q157","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":785,"outputTokens":6,"latencyMs":794.2132920000004},{"questionId":"q157","format":"toon","model":"claude-haiku-4-5-20251001","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":879,"outputTokens":6,"latencyMs":981.5377080000471},{"questionId":"q157","format":"xml","model":"claude-haiku-4-5-20251001","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1314,"outputTokens":6,"latencyMs":1138.1192919999594},{"questionId":"q157","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":899,"outputTokens":6,"latencyMs":856.9616249999963},{"questionId":"q158","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"real","actual":"real","isCorrect":true,"inputTokens":1242,"outputTokens":4,"latencyMs":838.4199159999844},{"questionId":"q158","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"real","actual":"real","isCorrect":true,"inputTokens":783,"outputTokens":4,"latencyMs":1141.6517499999609},{"questionId":"q158","format":"toon","model":"claude-haiku-4-5-20251001","expected":"real","actual":"real","isCorrect":true,"inputTokens":877,"outputTokens":4,"latencyMs":1043.2275830000872},{"questionId":"q158","format":"xml","model":"claude-haiku-4-5-20251001","expected":"real","actual":"real","isCorrect":true,"inputTokens":1312,"outputTokens":4,"latencyMs":1111.2398749999702},{"questionId":"q158","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"real","actual":"real","isCorrect":true,"inputTokens":897,"outputTokens":4,"latencyMs":863.2164999999804},{"questionId":"q159","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1243,"outputTokens":6,"latencyMs":880.7821669999976},{"questionId":"q159","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":784,"outputTokens":6,"latencyMs":895.2262499999488},{"questionId":"q159","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":878,"outputTokens":6,"latencyMs":1003.9393329999875},{"questionId":"q159","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1313,"outputTokens":6,"latencyMs":907.4466250000987},{"questionId":"q159","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":898,"outputTokens":6,"latencyMs":1019.0216660000151},{"questionId":"q160","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":1244,"outputTokens":9,"latencyMs":1012.7389589999802},{"questionId":"q160","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":785,"outputTokens":9,"latencyMs":1003.1793339999858},{"questionId":"q160","format":"toon","model":"claude-haiku-4-5-20251001","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":879,"outputTokens":9,"latencyMs":916.7758330000797},{"questionId":"q160","format":"xml","model":"claude-haiku-4-5-20251001","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":1314,"outputTokens":9,"latencyMs":1208.6454590000212},{"questionId":"q160","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":899,"outputTokens":9,"latencyMs":923.5587499999674},{"questionId":"q161","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":1244,"outputTokens":5,"latencyMs":826.9563749999506},{"questionId":"q161","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":785,"outputTokens":5,"latencyMs":753.3855420000618},{"questionId":"q161","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":879,"outputTokens":5,"latencyMs":1089.936457999982},{"questionId":"q161","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":1314,"outputTokens":5,"latencyMs":795.5758750000969},{"questionId":"q161","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":899,"outputTokens":5,"latencyMs":872.4575829999521},{"questionId":"q162","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1244,"outputTokens":5,"latencyMs":887.2722920000087},{"questionId":"q162","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":785,"outputTokens":5,"latencyMs":900.1268749999581},{"questionId":"q162","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":879,"outputTokens":5,"latencyMs":812.1885420000181},{"questionId":"q162","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1314,"outputTokens":5,"latencyMs":839.9153749999823},{"questionId":"q162","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":899,"outputTokens":5,"latencyMs":871.6134580000071},{"questionId":"q163","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1243,"outputTokens":5,"latencyMs":902.4298330000602},{"questionId":"q163","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":784,"outputTokens":5,"latencyMs":889.4039999999804},{"questionId":"q163","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":878,"outputTokens":5,"latencyMs":1126.9705829999875},{"questionId":"q163","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1313,"outputTokens":5,"latencyMs":958.2488329999615},{"questionId":"q163","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":898,"outputTokens":5,"latencyMs":1385.0525420000777},{"questionId":"q164","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1243,"outputTokens":5,"latencyMs":1185.5424160000402},{"questionId":"q164","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":784,"outputTokens":5,"latencyMs":883.8527500000782},{"questionId":"q164","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":878,"outputTokens":5,"latencyMs":1052.9344580001198},{"questionId":"q164","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":1313,"outputTokens":5,"latencyMs":911.154957999941},{"questionId":"q164","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2","actual":"2","isCorrect":true,"inputTokens":898,"outputTokens":5,"latencyMs":1121.9202919998206},{"questionId":"q165","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":1244,"outputTokens":5,"latencyMs":879.8078749999404},{"questionId":"q165","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":785,"outputTokens":5,"latencyMs":1288.6490829999093},{"questionId":"q165","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":879,"outputTokens":5,"latencyMs":786.6004580000881},{"questionId":"q165","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":1314,"outputTokens":5,"latencyMs":974.0200000000186},{"questionId":"q165","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":899,"outputTokens":5,"latencyMs":1045.2869170000777},{"questionId":"q166","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1247,"outputTokens":5,"latencyMs":824.5860000001267},{"questionId":"q166","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"0","isCorrect":false,"inputTokens":788,"outputTokens":5,"latencyMs":974.435499999905},{"questionId":"q166","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":882,"outputTokens":5,"latencyMs":804.1913339998573},{"questionId":"q166","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1317,"outputTokens":5,"latencyMs":1016.0534169999883},{"questionId":"q166","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":902,"outputTokens":5,"latencyMs":962.7041670000181},{"questionId":"q167","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":1243,"outputTokens":5,"latencyMs":1005.984332999913},{"questionId":"q167","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":784,"outputTokens":5,"latencyMs":1055.2427499999758},{"questionId":"q167","format":"toon","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":878,"outputTokens":5,"latencyMs":945.2598330001347},{"questionId":"q167","format":"xml","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":1313,"outputTokens":5,"latencyMs":1155.7412080001086},{"questionId":"q167","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":898,"outputTokens":5,"latencyMs":814.8985409999732},{"questionId":"q168","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":1245,"outputTokens":5,"latencyMs":1001.1619170000777},{"questionId":"q168","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":786,"outputTokens":5,"latencyMs":973.6378339999355},{"questionId":"q168","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":880,"outputTokens":5,"latencyMs":906.6011659998912},{"questionId":"q168","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":1315,"outputTokens":5,"latencyMs":5450.461332999868},{"questionId":"q168","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"5","isCorrect":true,"inputTokens":900,"outputTokens":5,"latencyMs":893.4252499998547},{"questionId":"q169","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"8","actual":"9","isCorrect":false,"inputTokens":1247,"outputTokens":5,"latencyMs":913.8683329999913},{"questionId":"q169","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"8","actual":"8","isCorrect":true,"inputTokens":788,"outputTokens":5,"latencyMs":1247.810541999992},{"questionId":"q169","format":"toon","model":"claude-haiku-4-5-20251001","expected":"8","actual":"8","isCorrect":true,"inputTokens":882,"outputTokens":5,"latencyMs":927.667708999943},{"questionId":"q169","format":"xml","model":"claude-haiku-4-5-20251001","expected":"8","actual":"9","isCorrect":false,"inputTokens":1317,"outputTokens":5,"latencyMs":1024.6397919999436},{"questionId":"q169","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"8","actual":"8","isCorrect":true,"inputTokens":902,"outputTokens":5,"latencyMs":808.8294999999925},{"questionId":"q170","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"5","actual":"7","isCorrect":false,"inputTokens":1246,"outputTokens":5,"latencyMs":948.8401660001837},{"questionId":"q170","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":787,"outputTokens":5,"latencyMs":807.7621660002042},{"questionId":"q170","format":"toon","model":"claude-haiku-4-5-20251001","expected":"5","actual":"7","isCorrect":false,"inputTokens":881,"outputTokens":5,"latencyMs":995.4957500000019},{"questionId":"q170","format":"xml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":1316,"outputTokens":5,"latencyMs":948.4754999999423},{"questionId":"q170","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"5","actual":"6","isCorrect":false,"inputTokens":901,"outputTokens":5,"latencyMs":915.8271250000689},{"questionId":"q171","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":1248,"outputTokens":5,"latencyMs":923.2111250001471},{"questionId":"q171","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":789,"outputTokens":5,"latencyMs":780.6261670000385},{"questionId":"q171","format":"toon","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":883,"outputTokens":5,"latencyMs":923.018459000159},{"questionId":"q171","format":"xml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":1318,"outputTokens":5,"latencyMs":993.0462080000434},{"questionId":"q171","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"3","actual":"3","isCorrect":true,"inputTokens":903,"outputTokens":5,"latencyMs":1245.815000000177},{"questionId":"q172","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1249,"outputTokens":5,"latencyMs":1095.3125},{"questionId":"q172","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":790,"outputTokens":5,"latencyMs":709.230874999892},{"questionId":"q172","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":884,"outputTokens":5,"latencyMs":1246.6792499998119},{"questionId":"q172","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1319,"outputTokens":5,"latencyMs":870.7104170001112},{"questionId":"q172","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":904,"outputTokens":5,"latencyMs":974.5198749999981},{"questionId":"q173","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":1250,"outputTokens":5,"latencyMs":985.8480829999316},{"questionId":"q173","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":791,"outputTokens":5,"latencyMs":1002.6233339998871},{"questionId":"q173","format":"toon","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":885,"outputTokens":5,"latencyMs":1006.6666669999249},{"questionId":"q173","format":"xml","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":1320,"outputTokens":5,"latencyMs":1024.3824579999782},{"questionId":"q173","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":905,"outputTokens":5,"latencyMs":2409.9704589999747},{"questionId":"q174","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1246,"outputTokens":5,"latencyMs":1009.0951249999925},{"questionId":"q174","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"2","isCorrect":false,"inputTokens":787,"outputTokens":5,"latencyMs":916.1399580000434},{"questionId":"q174","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":881,"outputTokens":5,"latencyMs":1165.979916999815},{"questionId":"q174","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1316,"outputTokens":5,"latencyMs":864.8515409999527},{"questionId":"q174","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"2","isCorrect":false,"inputTokens":901,"outputTokens":5,"latencyMs":916.4436249998398},{"questionId":"q175","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":1251,"outputTokens":5,"latencyMs":1029.9872919998597},{"questionId":"q175","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":792,"outputTokens":5,"latencyMs":1144.0288329999894},{"questionId":"q175","format":"toon","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":886,"outputTokens":5,"latencyMs":822.7032080001663},{"questionId":"q175","format":"xml","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":1321,"outputTokens":5,"latencyMs":1031.0812089999672},{"questionId":"q175","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"0","actual":"0","isCorrect":true,"inputTokens":906,"outputTokens":5,"latencyMs":937.500250000041},{"questionId":"q176","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1244,"outputTokens":5,"latencyMs":911.2006250000559},{"questionId":"q176","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":785,"outputTokens":5,"latencyMs":791.9767080000602},{"questionId":"q176","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":879,"outputTokens":5,"latencyMs":817.9461670001037},{"questionId":"q176","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1314,"outputTokens":5,"latencyMs":800.3201659999322},{"questionId":"q176","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":899,"outputTokens":5,"latencyMs":1166.106958999997},{"questionId":"q177","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1254,"outputTokens":5,"latencyMs":1153.844291999936},{"questionId":"q177","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":795,"outputTokens":5,"latencyMs":772.196958999848},{"questionId":"q177","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":889,"outputTokens":5,"latencyMs":761.2225830000825},{"questionId":"q177","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1324,"outputTokens":5,"latencyMs":1030.3278750001919},{"questionId":"q177","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":909,"outputTokens":5,"latencyMs":956.0868329999503},{"questionId":"q178","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1249,"outputTokens":5,"latencyMs":1061.2030420000665},{"questionId":"q178","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":790,"outputTokens":5,"latencyMs":888.081167000113},{"questionId":"q178","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":884,"outputTokens":5,"latencyMs":1051.8333749999292},{"questionId":"q178","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1319,"outputTokens":5,"latencyMs":799.0097499999683},{"questionId":"q178","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":904,"outputTokens":5,"latencyMs":905.2197910000104},{"questionId":"q179","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1247,"outputTokens":5,"latencyMs":988.6873749999795},{"questionId":"q179","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":788,"outputTokens":5,"latencyMs":723.2364169999491},{"questionId":"q179","format":"toon","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":882,"outputTokens":5,"latencyMs":728.2359160000924},{"questionId":"q179","format":"xml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":1317,"outputTokens":5,"latencyMs":977.7162500000559},{"questionId":"q179","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"1","actual":"1","isCorrect":true,"inputTokens":902,"outputTokens":5,"latencyMs":1027.893291000044},{"questionId":"q180","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":7935,"outputTokens":5,"latencyMs":1120.7492909999564},{"questionId":"q180","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":4826,"outputTokens":5,"latencyMs":1454.5087910001166},{"questionId":"q180","format":"toon","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":3075,"outputTokens":5,"latencyMs":1035.1915830001235},{"questionId":"q180","format":"csv","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":2921,"outputTokens":5,"latencyMs":931.953375000041},{"questionId":"q180","format":"xml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":9420,"outputTokens":5,"latencyMs":941.3573339998256},{"questionId":"q180","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":5826,"outputTokens":5,"latencyMs":986.0300420001149},{"questionId":"q181","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":7941,"outputTokens":20,"latencyMs":1113.5572500000708},{"questionId":"q181","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":4832,"outputTokens":20,"latencyMs":993.531415999867},{"questionId":"q181","format":"toon","model":"claude-haiku-4-5-20251001","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":3081,"outputTokens":20,"latencyMs":949.0614160001278},{"questionId":"q181","format":"csv","model":"claude-haiku-4-5-20251001","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":2927,"outputTokens":20,"latencyMs":1220.1684170002118},{"questionId":"q181","format":"xml","model":"claude-haiku-4-5-20251001","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":9426,"outputTokens":20,"latencyMs":2677.9535000000615},{"questionId":"q181","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":5832,"outputTokens":20,"latencyMs":1150.8704589998815},{"questionId":"q182","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"email","actual":"email","isCorrect":true,"inputTokens":7938,"outputTokens":4,"latencyMs":1143.8894159998745},{"questionId":"q182","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"email","actual":"email","isCorrect":true,"inputTokens":4829,"outputTokens":4,"latencyMs":939.9587090001442},{"questionId":"q182","format":"toon","model":"claude-haiku-4-5-20251001","expected":"email","actual":"email","isCorrect":true,"inputTokens":3078,"outputTokens":4,"latencyMs":1002.7564159999602},{"questionId":"q182","format":"csv","model":"claude-haiku-4-5-20251001","expected":"email","actual":"email","isCorrect":true,"inputTokens":2924,"outputTokens":4,"latencyMs":1499.9509160001762},{"questionId":"q182","format":"xml","model":"claude-haiku-4-5-20251001","expected":"email","actual":"email","isCorrect":true,"inputTokens":9423,"outputTokens":4,"latencyMs":1206.003415999934},{"questionId":"q182","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"email","actual":"email","isCorrect":true,"inputTokens":5829,"outputTokens":4,"latencyMs":1156.9789579999633},{"questionId":"q183","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":7939,"outputTokens":4,"latencyMs":1133.3445830000564},{"questionId":"q183","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":4830,"outputTokens":4,"latencyMs":1083.2715829999652},{"questionId":"q183","format":"toon","model":"claude-haiku-4-5-20251001","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":3079,"outputTokens":4,"latencyMs":1054.9281669999473},{"questionId":"q183","format":"csv","model":"claude-haiku-4-5-20251001","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":2925,"outputTokens":4,"latencyMs":1052.3783750000875},{"questionId":"q183","format":"xml","model":"claude-haiku-4-5-20251001","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":9424,"outputTokens":4,"latencyMs":1340.8406249999534},{"questionId":"q183","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":5830,"outputTokens":4,"latencyMs":1075.3891660000663},{"questionId":"q184","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":7939,"outputTokens":12,"latencyMs":2124.266124999849},{"questionId":"q184","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":4830,"outputTokens":12,"latencyMs":1068.5028750000056},{"questionId":"q184","format":"toon","model":"claude-haiku-4-5-20251001","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":3079,"outputTokens":12,"latencyMs":1167.4424999998882},{"questionId":"q184","format":"csv","model":"claude-haiku-4-5-20251001","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":2925,"outputTokens":12,"latencyMs":1302.0248749998864},{"questionId":"q184","format":"xml","model":"claude-haiku-4-5-20251001","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":9424,"outputTokens":12,"latencyMs":1186.8695000000298},{"questionId":"q184","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":5830,"outputTokens":12,"latencyMs":1270.5893329998944},{"questionId":"q185","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"7","actual":"7","isCorrect":true,"inputTokens":7936,"outputTokens":5,"latencyMs":1134.64995799982},{"questionId":"q185","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"7","actual":"7","isCorrect":true,"inputTokens":4827,"outputTokens":5,"latencyMs":1194.1974160000682},{"questionId":"q185","format":"toon","model":"claude-haiku-4-5-20251001","expected":"7","actual":"7","isCorrect":true,"inputTokens":3076,"outputTokens":5,"latencyMs":1283.6654169999529},{"questionId":"q185","format":"csv","model":"claude-haiku-4-5-20251001","expected":"7","actual":"7","isCorrect":true,"inputTokens":2922,"outputTokens":5,"latencyMs":1176.9219159998465},{"questionId":"q185","format":"xml","model":"claude-haiku-4-5-20251001","expected":"7","actual":"7","isCorrect":true,"inputTokens":9421,"outputTokens":5,"latencyMs":1174.4477500000503},{"questionId":"q185","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"7","actual":"6","isCorrect":false,"inputTokens":5827,"outputTokens":5,"latencyMs":1267.2275419998914},{"questionId":"q186","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":13066,"outputTokens":5,"latencyMs":1317.5971669999417},{"questionId":"q186","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":8027,"outputTokens":5,"latencyMs":1233.12991600018},{"questionId":"q186","format":"toon","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":8344,"outputTokens":5,"latencyMs":1202.2103329999372},{"questionId":"q186","format":"xml","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":14571,"outputTokens":5,"latencyMs":1269.4371249999385},{"questionId":"q186","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"50","actual":"50","isCorrect":true,"inputTokens":9469,"outputTokens":5,"latencyMs":1300.4245419998188},{"questionId":"q187","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":13075,"outputTokens":21,"latencyMs":1696.230416999897},{"questionId":"q187","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":8036,"outputTokens":21,"latencyMs":1352.8969999998808},{"questionId":"q187","format":"toon","model":"claude-haiku-4-5-20251001","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":8353,"outputTokens":21,"latencyMs":1110.8835419998504},{"questionId":"q187","format":"xml","model":"claude-haiku-4-5-20251001","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":14580,"outputTokens":21,"latencyMs":1534.4549169999082},{"questionId":"q187","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":9478,"outputTokens":21,"latencyMs":1164.3688749999274},{"questionId":"q188","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"4","actual":"4","isCorrect":true,"inputTokens":13071,"outputTokens":5,"latencyMs":1336.527957999846},{"questionId":"q188","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"4","actual":"4","isCorrect":true,"inputTokens":8032,"outputTokens":5,"latencyMs":1103.1468749998603},{"questionId":"q188","format":"toon","model":"claude-haiku-4-5-20251001","expected":"4","actual":"4","isCorrect":true,"inputTokens":8349,"outputTokens":5,"latencyMs":1129.3212500000373},{"questionId":"q188","format":"xml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"4","isCorrect":true,"inputTokens":14576,"outputTokens":5,"latencyMs":1354.4471249999478},{"questionId":"q188","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"4","actual":"4","isCorrect":true,"inputTokens":9474,"outputTokens":5,"latencyMs":1144.939916000003},{"questionId":"q189","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":13075,"outputTokens":11,"latencyMs":1416.0433340000454},{"questionId":"q189","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":8036,"outputTokens":11,"latencyMs":1131.6960839999374},{"questionId":"q189","format":"toon","model":"claude-haiku-4-5-20251001","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":8353,"outputTokens":11,"latencyMs":1228.2068749999162},{"questionId":"q189","format":"xml","model":"claude-haiku-4-5-20251001","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":14580,"outputTokens":11,"latencyMs":1415.090707999887},{"questionId":"q189","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":9478,"outputTokens":11,"latencyMs":1161.2281250001397},{"questionId":"q190","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":13070,"outputTokens":4,"latencyMs":2074.3852499998175},{"questionId":"q190","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8031,"outputTokens":4,"latencyMs":1235.8749160000589},{"questionId":"q190","format":"toon","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8348,"outputTokens":4,"latencyMs":1108.9501659998205},{"questionId":"q190","format":"xml","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":14575,"outputTokens":4,"latencyMs":1624.1051250000019},{"questionId":"q190","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":9473,"outputTokens":4,"latencyMs":1303.3145830000285},{"questionId":"q191","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":13076,"outputTokens":10,"latencyMs":1195.093499999959},{"questionId":"q191","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":8037,"outputTokens":10,"latencyMs":1125.0091250000987},{"questionId":"q191","format":"toon","model":"claude-haiku-4-5-20251001","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":8354,"outputTokens":10,"latencyMs":1002.9523330000229},{"questionId":"q191","format":"xml","model":"claude-haiku-4-5-20251001","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":14581,"outputTokens":10,"latencyMs":1123.1422500000335},{"questionId":"q191","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":9479,"outputTokens":10,"latencyMs":1390.2729579999577},{"questionId":"q192","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":4142,"outputTokens":5,"latencyMs":1418.0205409999471},{"questionId":"q192","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":2453,"outputTokens":5,"latencyMs":1059.0376249998808},{"questionId":"q192","format":"toon","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":1599,"outputTokens":5,"latencyMs":1186.7520419999491},{"questionId":"q192","format":"csv","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":1507,"outputTokens":5,"latencyMs":1620.152291999897},{"questionId":"q192","format":"xml","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":4844,"outputTokens":5,"latencyMs":1310.0510420000646},{"questionId":"q192","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"60","actual":"60","isCorrect":true,"inputTokens":3173,"outputTokens":5,"latencyMs":1221.876249999972},{"questionId":"q193","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":4147,"outputTokens":17,"latencyMs":1487.5997919999063},{"questionId":"q193","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":2458,"outputTokens":17,"latencyMs":1157.4452500001062},{"questionId":"q193","format":"toon","model":"claude-haiku-4-5-20251001","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":1604,"outputTokens":17,"latencyMs":1520.7116659998428},{"questionId":"q193","format":"csv","model":"claude-haiku-4-5-20251001","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":1512,"outputTokens":17,"latencyMs":1203.7664580000564},{"questionId":"q193","format":"xml","model":"claude-haiku-4-5-20251001","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":4849,"outputTokens":17,"latencyMs":1226.0437910000328},{"questionId":"q193","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":3178,"outputTokens":17,"latencyMs":977.1910840000492},{"questionId":"q194","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"revenue","actual":"bounceRate","isCorrect":false,"inputTokens":4145,"outputTokens":6,"latencyMs":1073.1372919999994},{"questionId":"q194","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"revenue","actual":"bounceRate","isCorrect":false,"inputTokens":2456,"outputTokens":6,"latencyMs":1266.1767920001876},{"questionId":"q194","format":"toon","model":"claude-haiku-4-5-20251001","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":1602,"outputTokens":4,"latencyMs":1342.251207999885},{"questionId":"q194","format":"csv","model":"claude-haiku-4-5-20251001","expected":"revenue","actual":"bounceRate","isCorrect":false,"inputTokens":1510,"outputTokens":6,"latencyMs":1179.91620899993},{"questionId":"q194","format":"xml","model":"claude-haiku-4-5-20251001","expected":"revenue","actual":"bounceRate","isCorrect":false,"inputTokens":4847,"outputTokens":6,"latencyMs":1468.4425409999676},{"questionId":"q194","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"revenue","actual":"bounceRate","isCorrect":false,"inputTokens":3176,"outputTokens":6,"latencyMs":900.0507499999367},{"questionId":"q195","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":4146,"outputTokens":10,"latencyMs":1087.1228330000304},{"questionId":"q195","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":2457,"outputTokens":10,"latencyMs":1056.3863329999149},{"questionId":"q195","format":"toon","model":"claude-haiku-4-5-20251001","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":1603,"outputTokens":10,"latencyMs":1167.88495899993},{"questionId":"q195","format":"csv","model":"claude-haiku-4-5-20251001","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":1511,"outputTokens":10,"latencyMs":984.4359160000458},{"questionId":"q195","format":"xml","model":"claude-haiku-4-5-20251001","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":4848,"outputTokens":10,"latencyMs":1138.7140420000069},{"questionId":"q195","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":3177,"outputTokens":10,"latencyMs":1117.2731670001522},{"questionId":"q196","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":4142,"outputTokens":5,"latencyMs":1177.2991659999825},{"questionId":"q196","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":2453,"outputTokens":5,"latencyMs":965.145041000098},{"questionId":"q196","format":"toon","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":1599,"outputTokens":5,"latencyMs":1326.4698749999516},{"questionId":"q196","format":"csv","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":1507,"outputTokens":5,"latencyMs":1118.3482919998933},{"questionId":"q196","format":"xml","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":4844,"outputTokens":5,"latencyMs":1536.1617910000496},{"questionId":"q196","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"6","actual":"6","isCorrect":true,"inputTokens":3173,"outputTokens":5,"latencyMs":930.9039169999305},{"questionId":"q197","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":17473,"outputTokens":5,"latencyMs":1286.1768749998882},{"questionId":"q197","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":12608,"outputTokens":5,"latencyMs":1507.926083000144},{"questionId":"q197","format":"toon","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":9371,"outputTokens":5,"latencyMs":1098.5005830000155},{"questionId":"q197","format":"csv","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":9189,"outputTokens":5,"latencyMs":11181.550583999837},{"questionId":"q197","format":"xml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":19863,"outputTokens":5,"latencyMs":1288.1963339999784},{"questionId":"q197","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"100","actual":"100","isCorrect":true,"inputTokens":14548,"outputTokens":5,"latencyMs":1512.4048330001533},{"questionId":"q198","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":17479,"outputTokens":35,"latencyMs":1934.8035840000957},{"questionId":"q198","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":12614,"outputTokens":35,"latencyMs":1396.143374999985},{"questionId":"q198","format":"toon","model":"claude-haiku-4-5-20251001","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":9377,"outputTokens":35,"latencyMs":1337.9257079998497},{"questionId":"q198","format":"csv","model":"claude-haiku-4-5-20251001","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":9195,"outputTokens":35,"latencyMs":1317.0077919999603},{"questionId":"q198","format":"xml","model":"claude-haiku-4-5-20251001","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":19869,"outputTokens":35,"latencyMs":2428.95404099999},{"questionId":"q198","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":14554,"outputTokens":35,"latencyMs":1332.3844579998404},{"questionId":"q199","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":17477,"outputTokens":6,"latencyMs":1435.5708749999758},{"questionId":"q199","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":12612,"outputTokens":6,"latencyMs":1323.1899170000106},{"questionId":"q199","format":"toon","model":"claude-haiku-4-5-20251001","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":9375,"outputTokens":6,"latencyMs":1109.1495000000577},{"questionId":"q199","format":"csv","model":"claude-haiku-4-5-20251001","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":9193,"outputTokens":6,"latencyMs":1622.9887500000186},{"questionId":"q199","format":"xml","model":"claude-haiku-4-5-20251001","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":19867,"outputTokens":6,"latencyMs":1395.366083000088},{"questionId":"q199","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":14552,"outputTokens":6,"latencyMs":1273.513874999946},{"questionId":"q200","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":17477,"outputTokens":7,"latencyMs":1235.034042000072},{"questionId":"q200","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":12612,"outputTokens":7,"latencyMs":1452.7786670001224},{"questionId":"q200","format":"toon","model":"claude-haiku-4-5-20251001","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":9375,"outputTokens":7,"latencyMs":1009.8154169998597},{"questionId":"q200","format":"csv","model":"claude-haiku-4-5-20251001","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":9193,"outputTokens":7,"latencyMs":1215.5075840000063},{"questionId":"q200","format":"xml","model":"claude-haiku-4-5-20251001","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":19867,"outputTokens":7,"latencyMs":1331.6306249999907},{"questionId":"q200","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":14552,"outputTokens":7,"latencyMs":1234.5180420000106},{"questionId":"q201","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"11","actual":"11","isCorrect":true,"inputTokens":17474,"outputTokens":5,"latencyMs":1365.1134999999776},{"questionId":"q201","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"11","actual":"10","isCorrect":false,"inputTokens":12609,"outputTokens":5,"latencyMs":1297.5984169999138},{"questionId":"q201","format":"toon","model":"claude-haiku-4-5-20251001","expected":"11","actual":"11","isCorrect":true,"inputTokens":9372,"outputTokens":5,"latencyMs":1234.3033330000471},{"questionId":"q201","format":"csv","model":"claude-haiku-4-5-20251001","expected":"11","actual":"11","isCorrect":true,"inputTokens":9190,"outputTokens":5,"latencyMs":1500.0707079998683},{"questionId":"q201","format":"xml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"12","isCorrect":false,"inputTokens":19864,"outputTokens":5,"latencyMs":1370.718208000064},{"questionId":"q201","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"11","actual":"13","isCorrect":false,"inputTokens":14549,"outputTokens":5,"latencyMs":1321.9931669998914},{"questionId":"q202","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"75","actual":"75","isCorrect":true,"inputTokens":7767,"outputTokens":5,"latencyMs":1315.0292080000509},{"questionId":"q202","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"75","actual":"70","isCorrect":false,"inputTokens":5205,"outputTokens":5,"latencyMs":918.2240830000956},{"questionId":"q202","format":"toon","model":"claude-haiku-4-5-20251001","expected":"75","actual":"75","isCorrect":true,"inputTokens":6225,"outputTokens":5,"latencyMs":993.3855000000913},{"questionId":"q202","format":"xml","model":"claude-haiku-4-5-20251001","expected":"75","actual":"100","isCorrect":false,"inputTokens":8772,"outputTokens":5,"latencyMs":998.8321670000441},{"questionId":"q202","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"75","actual":"74","isCorrect":false,"inputTokens":6171,"outputTokens":5,"latencyMs":1022.2879580000881},{"questionId":"q203","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":7777,"outputTokens":18,"latencyMs":1319.1331670000218},{"questionId":"q203","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error,message,stack,retryable","isCorrect":false,"inputTokens":5215,"outputTokens":26,"latencyMs":1044.1837079999968},{"questionId":"q203","format":"toon","model":"claude-haiku-4-5-20251001","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error,message,stack,retryable","isCorrect":false,"inputTokens":6235,"outputTokens":26,"latencyMs":1229.3462499999441},{"questionId":"q203","format":"xml","model":"claude-haiku-4-5-20251001","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error,message,stack,retryable","isCorrect":false,"inputTokens":8782,"outputTokens":26,"latencyMs":1153.167082999833},{"questionId":"q203","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error,message,stack,retryable","isCorrect":false,"inputTokens":6181,"outputTokens":26,"latencyMs":1140.1483329997864},{"questionId":"q204","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":7771,"outputTokens":4,"latencyMs":1057.0545830000192},{"questionId":"q204","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":5209,"outputTokens":4,"latencyMs":1038.1022499999963},{"questionId":"q204","format":"toon","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":6229,"outputTokens":4,"latencyMs":951.9464999998454},{"questionId":"q204","format":"xml","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":8776,"outputTokens":4,"latencyMs":1045.921832999913},{"questionId":"q204","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"info","actual":"info","isCorrect":true,"inputTokens":6175,"outputTokens":4,"latencyMs":1206.6149999999907},{"questionId":"q205","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1685,"outputTokens":4,"latencyMs":781.0249159999657},{"questionId":"q205","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1057,"outputTokens":4,"latencyMs":865.4773750000168},{"questionId":"q205","format":"toon","model":"claude-haiku-4-5-20251001","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":745,"outputTokens":4,"latencyMs":847.1748330001719},{"questionId":"q205","format":"csv","model":"claude-haiku-4-5-20251001","expected":"YES","actual":"NO","isCorrect":false,"inputTokens":688,"outputTokens":4,"latencyMs":1022.7747920001857},{"questionId":"q205","format":"xml","model":"claude-haiku-4-5-20251001","expected":"YES","actual":"NO","isCorrect":false,"inputTokens":1967,"outputTokens":4,"latencyMs":788.3179999999702},{"questionId":"q205","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1257,"outputTokens":4,"latencyMs":746.1863339999691},{"questionId":"q206","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1459,"outputTokens":4,"latencyMs":1047.0943330000155},{"questionId":"q206","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":924,"outputTokens":4,"latencyMs":919.9484170000069},{"questionId":"q206","format":"toon","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":666,"outputTokens":4,"latencyMs":907.2270830001216},{"questionId":"q206","format":"csv","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":612,"outputTokens":4,"latencyMs":845.8464999999851},{"questionId":"q206","format":"xml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1696,"outputTokens":4,"latencyMs":946.0020830000285},{"questionId":"q206","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1094,"outputTokens":4,"latencyMs":920.9464169999119},{"questionId":"q207","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1925,"outputTokens":4,"latencyMs":1408.2781249999534},{"questionId":"q207","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1204,"outputTokens":4,"latencyMs":1325.2302080001682},{"questionId":"q207","format":"toon","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":838,"outputTokens":4,"latencyMs":939.0019169999287},{"questionId":"q207","format":"csv","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":778,"outputTokens":4,"latencyMs":822.6255419999361},{"questionId":"q207","format":"xml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":2255,"outputTokens":4,"latencyMs":857.5038749999367},{"questionId":"q207","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1434,"outputTokens":4,"latencyMs":850.0120409999508},{"questionId":"q208","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1676,"outputTokens":4,"latencyMs":913.5827909999061},{"questionId":"q208","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1052,"outputTokens":4,"latencyMs":1083.7319999998435},{"questionId":"q208","format":"toon","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1280,"outputTokens":4,"latencyMs":1061.7704580000136},{"questionId":"q208","format":"csv","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":685,"outputTokens":4,"latencyMs":736.6914170000236},{"questionId":"q208","format":"xml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1957,"outputTokens":4,"latencyMs":909.4639159999788},{"questionId":"q208","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1250,"outputTokens":4,"latencyMs":1399.6871670000255},{"questionId":"q209","format":"json-pretty","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1619,"outputTokens":4,"latencyMs":856.7064160001464},{"questionId":"q209","format":"json-compact","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1007,"outputTokens":4,"latencyMs":794.2346660001203},{"questionId":"q209","format":"toon","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1233,"outputTokens":4,"latencyMs":1252.9911249999423},{"questionId":"q209","format":"csv","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":472,"outputTokens":4,"latencyMs":1058.458375000162},{"questionId":"q209","format":"xml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1897,"outputTokens":4,"latencyMs":972.7352500001434},{"questionId":"q209","format":"yaml","model":"claude-haiku-4-5-20251001","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1203,"outputTokens":4,"latencyMs":910.6434579999186}] ================================================ FILE: benchmarks/results/accuracy/models/gemini-3-flash-preview ================================================ [{"questionId":"q1","format":"json-pretty","model":"gemini-3-flash-preview","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":7954,"outputTokens":239,"latencyMs":3625.455249999999},{"questionId":"q1","format":"json-compact","model":"gemini-3-flash-preview","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":4744,"outputTokens":202,"latencyMs":2635.1330420000004},{"questionId":"q1","format":"toon","model":"gemini-3-flash-preview","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":3387,"outputTokens":259,"latencyMs":2909.5863329999993},{"questionId":"q1","format":"csv","model":"gemini-3-flash-preview","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":3239,"outputTokens":295,"latencyMs":3192.4188330000006},{"questionId":"q1","format":"xml","model":"gemini-3-flash-preview","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":9141,"outputTokens":250,"latencyMs":3300.681708},{"questionId":"q1","format":"yaml","model":"gemini-3-flash-preview","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":5794,"outputTokens":189,"latencyMs":2525.3320000000003},{"questionId":"q2","format":"json-pretty","model":"gemini-3-flash-preview","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":7954,"outputTokens":192,"latencyMs":2552.432},{"questionId":"q2","format":"json-compact","model":"gemini-3-flash-preview","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":4744,"outputTokens":168,"latencyMs":4588.639333},{"questionId":"q2","format":"toon","model":"gemini-3-flash-preview","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":3387,"outputTokens":357,"latencyMs":3214.033292},{"questionId":"q2","format":"csv","model":"gemini-3-flash-preview","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":3239,"outputTokens":265,"latencyMs":3336.27775},{"questionId":"q2","format":"xml","model":"gemini-3-flash-preview","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":9141,"outputTokens":271,"latencyMs":3296.883},{"questionId":"q2","format":"yaml","model":"gemini-3-flash-preview","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":5794,"outputTokens":186,"latencyMs":1830.6979580000007},{"questionId":"q3","format":"json-pretty","model":"gemini-3-flash-preview","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":7954,"outputTokens":206,"latencyMs":1814.1679999999997},{"questionId":"q3","format":"json-compact","model":"gemini-3-flash-preview","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":4744,"outputTokens":156,"latencyMs":1816.5712080000012},{"questionId":"q3","format":"toon","model":"gemini-3-flash-preview","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":3387,"outputTokens":467,"latencyMs":3315.0727499999994},{"questionId":"q3","format":"csv","model":"gemini-3-flash-preview","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":3239,"outputTokens":317,"latencyMs":2661.6989170000015},{"questionId":"q3","format":"xml","model":"gemini-3-flash-preview","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":9141,"outputTokens":275,"latencyMs":3456.165792},{"questionId":"q3","format":"yaml","model":"gemini-3-flash-preview","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":5794,"outputTokens":203,"latencyMs":1919.311416999999},{"questionId":"q4","format":"json-pretty","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":7955,"outputTokens":174,"latencyMs":2340.148166000001},{"questionId":"q4","format":"json-compact","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":4745,"outputTokens":118,"latencyMs":1578.8430000000008},{"questionId":"q4","format":"toon","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":3388,"outputTokens":245,"latencyMs":2972.7989159999997},{"questionId":"q4","format":"csv","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":3240,"outputTokens":220,"latencyMs":2409.4155420000006},{"questionId":"q4","format":"xml","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":9142,"outputTokens":202,"latencyMs":2596.7662090000013},{"questionId":"q4","format":"yaml","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":5795,"outputTokens":167,"latencyMs":1865.9715830000005},{"questionId":"q5","format":"json-pretty","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":7952,"outputTokens":240,"latencyMs":2259.9251249999998},{"questionId":"q5","format":"json-compact","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":4742,"outputTokens":162,"latencyMs":2379.219333000001},{"questionId":"q5","format":"toon","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":3385,"outputTokens":249,"latencyMs":3043.0672090000007},{"questionId":"q5","format":"csv","model":"gemini-3-flash-preview","expected":"yes","actual":"1","isCorrect":true,"inputTokens":3237,"outputTokens":274,"latencyMs":2661.173041999995},{"questionId":"q5","format":"xml","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":9139,"outputTokens":202,"latencyMs":2767.9813749999885},{"questionId":"q5","format":"yaml","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":5792,"outputTokens":231,"latencyMs":2197.264167000001},{"questionId":"q6","format":"json-pretty","model":"gemini-3-flash-preview","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":7954,"outputTokens":251,"latencyMs":2468.244166999997},{"questionId":"q6","format":"json-compact","model":"gemini-3-flash-preview","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":4744,"outputTokens":127,"latencyMs":1833.99820799999},{"questionId":"q6","format":"toon","model":"gemini-3-flash-preview","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":3387,"outputTokens":477,"latencyMs":3602.21712500001},{"questionId":"q6","format":"csv","model":"gemini-3-flash-preview","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":3239,"outputTokens":346,"latencyMs":3165.0912090000056},{"questionId":"q6","format":"xml","model":"gemini-3-flash-preview","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":9141,"outputTokens":297,"latencyMs":3294.3596249999973},{"questionId":"q6","format":"yaml","model":"gemini-3-flash-preview","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":5794,"outputTokens":172,"latencyMs":1667.5077500000043},{"questionId":"q7","format":"json-pretty","model":"gemini-3-flash-preview","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":7956,"outputTokens":268,"latencyMs":2391.997583000004},{"questionId":"q7","format":"json-compact","model":"gemini-3-flash-preview","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":4746,"outputTokens":217,"latencyMs":2120.4972500000003},{"questionId":"q7","format":"toon","model":"gemini-3-flash-preview","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":3389,"outputTokens":585,"latencyMs":3673.572166999991},{"questionId":"q7","format":"csv","model":"gemini-3-flash-preview","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":3241,"outputTokens":383,"latencyMs":3456.103833000001},{"questionId":"q7","format":"xml","model":"gemini-3-flash-preview","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":9143,"outputTokens":229,"latencyMs":2590.3361659999937},{"questionId":"q7","format":"yaml","model":"gemini-3-flash-preview","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":5796,"outputTokens":166,"latencyMs":1763.4515410000022},{"questionId":"q8","format":"json-pretty","model":"gemini-3-flash-preview","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":7955,"outputTokens":187,"latencyMs":1945.595417000004},{"questionId":"q8","format":"json-compact","model":"gemini-3-flash-preview","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":4745,"outputTokens":151,"latencyMs":2032.5836249999993},{"questionId":"q8","format":"toon","model":"gemini-3-flash-preview","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":3388,"outputTokens":405,"latencyMs":2813.043042000005},{"questionId":"q8","format":"csv","model":"gemini-3-flash-preview","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":3240,"outputTokens":362,"latencyMs":3427.3383750000066},{"questionId":"q8","format":"xml","model":"gemini-3-flash-preview","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":9142,"outputTokens":235,"latencyMs":2655.8687500000087},{"questionId":"q8","format":"yaml","model":"gemini-3-flash-preview","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":5795,"outputTokens":209,"latencyMs":1998.403999999995},{"questionId":"q9","format":"json-pretty","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":7956,"outputTokens":231,"latencyMs":2156.807958000005},{"questionId":"q9","format":"json-compact","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":4746,"outputTokens":153,"latencyMs":2193.381708000001},{"questionId":"q9","format":"toon","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":3389,"outputTokens":285,"latencyMs":2569.0447079999867},{"questionId":"q9","format":"csv","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":3241,"outputTokens":184,"latencyMs":2225.6912079999893},{"questionId":"q9","format":"xml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":9143,"outputTokens":258,"latencyMs":3088.273749999993},{"questionId":"q9","format":"yaml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":5796,"outputTokens":245,"latencyMs":2433.3254999999917},{"questionId":"q10","format":"json-pretty","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":7952,"outputTokens":192,"latencyMs":1999.0586670000048},{"questionId":"q10","format":"json-compact","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":4742,"outputTokens":172,"latencyMs":2230.6631659999985},{"questionId":"q10","format":"toon","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":3385,"outputTokens":472,"latencyMs":3616.934208999999},{"questionId":"q10","format":"csv","model":"gemini-3-flash-preview","expected":"yes","actual":"1","isCorrect":true,"inputTokens":3237,"outputTokens":5802,"latencyMs":32870.668957999995},{"questionId":"q10","format":"xml","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":9139,"outputTokens":251,"latencyMs":2970.3544590000092},{"questionId":"q10","format":"yaml","model":"gemini-3-flash-preview","expected":"yes","actual":"true","isCorrect":true,"inputTokens":5792,"outputTokens":223,"latencyMs":2275.9059169999964},{"questionId":"q11","format":"json-pretty","model":"gemini-3-flash-preview","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":7952,"outputTokens":208,"latencyMs":2240.154291999992},{"questionId":"q11","format":"json-compact","model":"gemini-3-flash-preview","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":4742,"outputTokens":220,"latencyMs":2019.8260420000006},{"questionId":"q11","format":"toon","model":"gemini-3-flash-preview","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":3385,"outputTokens":279,"latencyMs":2217.485499999995},{"questionId":"q11","format":"csv","model":"gemini-3-flash-preview","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":3237,"outputTokens":233,"latencyMs":2372.3970419999823},{"questionId":"q11","format":"xml","model":"gemini-3-flash-preview","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":9139,"outputTokens":286,"latencyMs":3144.522540999984},{"questionId":"q11","format":"yaml","model":"gemini-3-flash-preview","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":5792,"outputTokens":182,"latencyMs":1776.9458330000052},{"questionId":"q12","format":"json-pretty","model":"gemini-3-flash-preview","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":7954,"outputTokens":199,"latencyMs":2068.243167000008},{"questionId":"q12","format":"json-compact","model":"gemini-3-flash-preview","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":4744,"outputTokens":249,"latencyMs":2556.309041000015},{"questionId":"q12","format":"toon","model":"gemini-3-flash-preview","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":3387,"outputTokens":197,"latencyMs":2067.33312499999},{"questionId":"q12","format":"csv","model":"gemini-3-flash-preview","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":3239,"outputTokens":330,"latencyMs":2840.658457999991},{"questionId":"q12","format":"xml","model":"gemini-3-flash-preview","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":9141,"outputTokens":202,"latencyMs":2776.487707999986},{"questionId":"q12","format":"yaml","model":"gemini-3-flash-preview","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":5794,"outputTokens":141,"latencyMs":2206.415458000003},{"questionId":"q13","format":"json-pretty","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":7951,"outputTokens":2213,"latencyMs":11963.009374999994},{"questionId":"q13","format":"json-compact","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":4741,"outputTokens":3743,"latencyMs":19159.714540999994},{"questionId":"q13","format":"toon","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":3384,"outputTokens":2734,"latencyMs":14160.47195799998},{"questionId":"q13","format":"csv","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":3236,"outputTokens":2330,"latencyMs":12950.263708000013},{"questionId":"q13","format":"xml","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":9138,"outputTokens":3771,"latencyMs":24574.40391699999},{"questionId":"q13","format":"yaml","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":5791,"outputTokens":1533,"latencyMs":7880.103917},{"questionId":"q14","format":"json-pretty","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":7951,"outputTokens":1852,"latencyMs":9905.873375000025},{"questionId":"q14","format":"json-compact","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":4741,"outputTokens":1573,"latencyMs":9463.203749999986},{"questionId":"q14","format":"toon","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":3384,"outputTokens":3383,"latencyMs":25641.172249999974},{"questionId":"q14","format":"csv","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":3236,"outputTokens":6869,"latencyMs":34513.120375},{"questionId":"q14","format":"xml","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":9138,"outputTokens":4196,"latencyMs":26234.407166999998},{"questionId":"q14","format":"yaml","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":5791,"outputTokens":2150,"latencyMs":12435.973457999993},{"questionId":"q15","format":"json-pretty","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":7951,"outputTokens":2007,"latencyMs":10856.205459000019},{"questionId":"q15","format":"json-compact","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":4741,"outputTokens":830,"latencyMs":5377.5375420000055},{"questionId":"q15","format":"toon","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":3384,"outputTokens":2670,"latencyMs":20326.423833000008},{"questionId":"q15","format":"csv","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":3236,"outputTokens":1251,"latencyMs":7192.888832999975},{"questionId":"q15","format":"xml","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":9138,"outputTokens":1183,"latencyMs":8130.2351250000065},{"questionId":"q15","format":"yaml","model":"gemini-3-flash-preview","expected":"17","actual":"17","isCorrect":true,"inputTokens":5791,"outputTokens":1565,"latencyMs":8948.246167000005},{"questionId":"q16","format":"json-pretty","model":"gemini-3-flash-preview","expected":"86","actual":"86","isCorrect":true,"inputTokens":7959,"outputTokens":11526,"latencyMs":54927.654708999995},{"questionId":"q16","format":"json-compact","model":"gemini-3-flash-preview","expected":"86","actual":"86","isCorrect":true,"inputTokens":4749,"outputTokens":14129,"latencyMs":69444.23004200001},{"questionId":"q16","format":"toon","model":"gemini-3-flash-preview","expected":"86","actual":"86","isCorrect":true,"inputTokens":3392,"outputTokens":17813,"latencyMs":85544.84624999997},{"questionId":"q16","format":"csv","model":"gemini-3-flash-preview","expected":"86","actual":"86","isCorrect":true,"inputTokens":3244,"outputTokens":10237,"latencyMs":48699.181625},{"questionId":"q16","format":"xml","model":"gemini-3-flash-preview","expected":"86","actual":"86","isCorrect":true,"inputTokens":9146,"outputTokens":15144,"latencyMs":88944.13062499999},{"questionId":"q16","format":"yaml","model":"gemini-3-flash-preview","expected":"86","actual":"86","isCorrect":true,"inputTokens":5799,"outputTokens":11901,"latencyMs":58236.14600000004},{"questionId":"q17","format":"json-pretty","model":"gemini-3-flash-preview","expected":"65","actual":"65","isCorrect":true,"inputTokens":7959,"outputTokens":9828,"latencyMs":46873.048624999996},{"questionId":"q17","format":"json-compact","model":"gemini-3-flash-preview","expected":"65","actual":"65","isCorrect":true,"inputTokens":4749,"outputTokens":5737,"latencyMs":28472.17749999999},{"questionId":"q17","format":"toon","model":"gemini-3-flash-preview","expected":"65","actual":"65","isCorrect":true,"inputTokens":3392,"outputTokens":13246,"latencyMs":64685.196417},{"questionId":"q17","format":"csv","model":"gemini-3-flash-preview","expected":"65","actual":"65","isCorrect":true,"inputTokens":3244,"outputTokens":8302,"latencyMs":39618.234958999994},{"questionId":"q17","format":"xml","model":"gemini-3-flash-preview","expected":"65","actual":"65","isCorrect":true,"inputTokens":9146,"outputTokens":5772,"latencyMs":34000.402166000014},{"questionId":"q17","format":"yaml","model":"gemini-3-flash-preview","expected":"65","actual":"65","isCorrect":true,"inputTokens":5799,"outputTokens":11352,"latencyMs":54563.97954099998},{"questionId":"q18","format":"json-pretty","model":"gemini-3-flash-preview","expected":"47","actual":"47","isCorrect":true,"inputTokens":7960,"outputTokens":7910,"latencyMs":38451.564625},{"questionId":"q18","format":"json-compact","model":"gemini-3-flash-preview","expected":"47","actual":"47","isCorrect":true,"inputTokens":4750,"outputTokens":4783,"latencyMs":23734.33158399997},{"questionId":"q18","format":"toon","model":"gemini-3-flash-preview","expected":"47","actual":"47","isCorrect":true,"inputTokens":3393,"outputTokens":7694,"latencyMs":38817.53254200003},{"questionId":"q18","format":"csv","model":"gemini-3-flash-preview","expected":"47","actual":"47","isCorrect":true,"inputTokens":3245,"outputTokens":8402,"latencyMs":40587.71158299997},{"questionId":"q18","format":"xml","model":"gemini-3-flash-preview","expected":"47","actual":"47","isCorrect":true,"inputTokens":9147,"outputTokens":9972,"latencyMs":61279.913833},{"questionId":"q18","format":"yaml","model":"gemini-3-flash-preview","expected":"47","actual":"47","isCorrect":true,"inputTokens":5800,"outputTokens":10034,"latencyMs":48386.947958000004},{"questionId":"q19","format":"json-pretty","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":7952,"outputTokens":233,"latencyMs":2216.1940419999883},{"questionId":"q19","format":"json-compact","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":4742,"outputTokens":187,"latencyMs":2281.4948330000043},{"questionId":"q19","format":"toon","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":3385,"outputTokens":421,"latencyMs":3185.9499589999905},{"questionId":"q19","format":"csv","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":3237,"outputTokens":299,"latencyMs":2735.214833999984},{"questionId":"q19","format":"xml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":9139,"outputTokens":249,"latencyMs":2865.5949580000015},{"questionId":"q19","format":"yaml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":5792,"outputTokens":575,"latencyMs":4232.22866600001},{"questionId":"q20","format":"json-pretty","model":"gemini-3-flash-preview","expected":"96825","actual":"96825.46","isCorrect":true,"inputTokens":7953,"outputTokens":16645,"latencyMs":78783.37699999998},{"questionId":"q20","format":"json-compact","model":"gemini-3-flash-preview","expected":"96825","actual":"96825.46","isCorrect":true,"inputTokens":4743,"outputTokens":20007,"latencyMs":96595.14683399996},{"questionId":"q20","format":"toon","model":"gemini-3-flash-preview","expected":"96825","actual":"96825.46","isCorrect":true,"inputTokens":3386,"outputTokens":16125,"latencyMs":76973.349583},{"questionId":"q20","format":"csv","model":"gemini-3-flash-preview","expected":"96825","actual":"96825.46","isCorrect":true,"inputTokens":3238,"outputTokens":12697,"latencyMs":61811.191666},{"questionId":"q20","format":"xml","model":"gemini-3-flash-preview","expected":"96825","actual":"96825.46","isCorrect":true,"inputTokens":9140,"outputTokens":14755,"latencyMs":86098.75858299999},{"questionId":"q20","format":"yaml","model":"gemini-3-flash-preview","expected":"96825","actual":"96825.46","isCorrect":true,"inputTokens":5793,"outputTokens":25603,"latencyMs":120761.245833},{"questionId":"q21","format":"json-pretty","model":"gemini-3-flash-preview","expected":"79","actual":"79","isCorrect":true,"inputTokens":7950,"outputTokens":6329,"latencyMs":30479.19987499999},{"questionId":"q21","format":"json-compact","model":"gemini-3-flash-preview","expected":"79","actual":"79","isCorrect":true,"inputTokens":4740,"outputTokens":4927,"latencyMs":24148.572749999992},{"questionId":"q21","format":"toon","model":"gemini-3-flash-preview","expected":"79","actual":"79","isCorrect":true,"inputTokens":3383,"outputTokens":5128,"latencyMs":24522.518415999948},{"questionId":"q21","format":"csv","model":"gemini-3-flash-preview","expected":"79","actual":"79","isCorrect":true,"inputTokens":3235,"outputTokens":11060,"latencyMs":54258.82816600002},{"questionId":"q21","format":"xml","model":"gemini-3-flash-preview","expected":"79","actual":"79","isCorrect":true,"inputTokens":9137,"outputTokens":7237,"latencyMs":44614.00195800001},{"questionId":"q21","format":"yaml","model":"gemini-3-flash-preview","expected":"79","actual":"79","isCorrect":true,"inputTokens":5790,"outputTokens":5987,"latencyMs":30460.19395799999},{"questionId":"q22","format":"json-pretty","model":"gemini-3-flash-preview","expected":"21","actual":"21","isCorrect":true,"inputTokens":7950,"outputTokens":1978,"latencyMs":10332.646833000006},{"questionId":"q22","format":"json-compact","model":"gemini-3-flash-preview","expected":"21","actual":"21","isCorrect":true,"inputTokens":4740,"outputTokens":1626,"latencyMs":8706.571708999982},{"questionId":"q22","format":"toon","model":"gemini-3-flash-preview","expected":"21","actual":"21","isCorrect":true,"inputTokens":3383,"outputTokens":8494,"latencyMs":40063.24837499997},{"questionId":"q22","format":"csv","model":"gemini-3-flash-preview","expected":"21","actual":"21","isCorrect":true,"inputTokens":3235,"outputTokens":11763,"latencyMs":56610.87650000001},{"questionId":"q22","format":"xml","model":"gemini-3-flash-preview","expected":"21","actual":"21","isCorrect":true,"inputTokens":9137,"outputTokens":2207,"latencyMs":14080.183291999972},{"questionId":"q22","format":"yaml","model":"gemini-3-flash-preview","expected":"21","actual":"21","isCorrect":true,"inputTokens":5790,"outputTokens":8848,"latencyMs":43156.040875000006},{"questionId":"q23","format":"json-pretty","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":7961,"outputTokens":1568,"latencyMs":8185.19537500001},{"questionId":"q23","format":"json-compact","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":4751,"outputTokens":5473,"latencyMs":25986.331999999995},{"questionId":"q23","format":"toon","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3394,"outputTokens":6014,"latencyMs":29227.068333000003},{"questionId":"q23","format":"csv","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3246,"outputTokens":4512,"latencyMs":22080.70795900002},{"questionId":"q23","format":"xml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":9148,"outputTokens":1228,"latencyMs":8106.303375000018},{"questionId":"q23","format":"yaml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":5801,"outputTokens":4065,"latencyMs":21301.958958000003},{"questionId":"q24","format":"json-pretty","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":7961,"outputTokens":2448,"latencyMs":12367.767333000025},{"questionId":"q24","format":"json-compact","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":4751,"outputTokens":3160,"latencyMs":15658.321832999995},{"questionId":"q24","format":"toon","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":3394,"outputTokens":7522,"latencyMs":35942.96525000001},{"questionId":"q24","format":"csv","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":3246,"outputTokens":1981,"latencyMs":10421.97308299999},{"questionId":"q24","format":"xml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":9148,"outputTokens":9091,"latencyMs":57416.60583299998},{"questionId":"q24","format":"yaml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":5801,"outputTokens":4041,"latencyMs":20619.26762499998},{"questionId":"q25","format":"json-pretty","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":7961,"outputTokens":1164,"latencyMs":6491.6565829999745},{"questionId":"q25","format":"json-compact","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":4751,"outputTokens":1045,"latencyMs":5745.743167000008},{"questionId":"q25","format":"toon","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":3394,"outputTokens":1595,"latencyMs":8288.561583999952},{"questionId":"q25","format":"csv","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":3246,"outputTokens":2395,"latencyMs":12089.373750000028},{"questionId":"q25","format":"xml","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":9148,"outputTokens":1616,"latencyMs":10385.165582999995},{"questionId":"q25","format":"yaml","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":5801,"outputTokens":5484,"latencyMs":26458.59583299997},{"questionId":"q26","format":"json-pretty","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":7961,"outputTokens":2459,"latencyMs":12580.541666999983},{"questionId":"q26","format":"json-compact","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":4751,"outputTokens":1588,"latencyMs":8502.766125000024},{"questionId":"q26","format":"toon","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3394,"outputTokens":6893,"latencyMs":33344.51425000001},{"questionId":"q26","format":"csv","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3246,"outputTokens":2091,"latencyMs":11260.58600000001},{"questionId":"q26","format":"xml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":9148,"outputTokens":2587,"latencyMs":16338.016667000018},{"questionId":"q26","format":"yaml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":5801,"outputTokens":2567,"latencyMs":13451.73641699995},{"questionId":"q27","format":"json-pretty","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":7961,"outputTokens":2407,"latencyMs":12215.841707999993},{"questionId":"q27","format":"json-compact","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":4751,"outputTokens":2037,"latencyMs":10651.695333999989},{"questionId":"q27","format":"toon","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":3394,"outputTokens":5106,"latencyMs":34497.624583999976},{"questionId":"q27","format":"csv","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":3246,"outputTokens":4975,"latencyMs":24697.602375000017},{"questionId":"q27","format":"xml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":9148,"outputTokens":4362,"latencyMs":27752.959124999994},{"questionId":"q27","format":"yaml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":5801,"outputTokens":3987,"latencyMs":19767.733332999982},{"questionId":"q28","format":"json-pretty","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":7957,"outputTokens":16748,"latencyMs":79600.15991699998},{"questionId":"q28","format":"json-compact","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":4747,"outputTokens":14052,"latencyMs":66176.12862500001},{"questionId":"q28","format":"toon","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":3390,"outputTokens":13157,"latencyMs":61450.89712499996},{"questionId":"q28","format":"csv","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":3242,"outputTokens":15232,"latencyMs":71720.73720800004},{"questionId":"q28","format":"xml","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":9144,"outputTokens":15125,"latencyMs":89182.87950000004},{"questionId":"q28","format":"yaml","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":5797,"outputTokens":8460,"latencyMs":40369.170458999986},{"questionId":"q29","format":"json-pretty","model":"gemini-3-flash-preview","expected":"48","actual":"48","isCorrect":true,"inputTokens":7958,"outputTokens":16386,"latencyMs":77503.73624999996},{"questionId":"q29","format":"json-compact","model":"gemini-3-flash-preview","expected":"48","actual":"48","isCorrect":true,"inputTokens":4748,"outputTokens":11887,"latencyMs":56601.65512500005},{"questionId":"q29","format":"toon","model":"gemini-3-flash-preview","expected":"48","actual":"48","isCorrect":true,"inputTokens":3391,"outputTokens":16959,"latencyMs":78704.543458},{"questionId":"q29","format":"csv","model":"gemini-3-flash-preview","expected":"48","actual":"48","isCorrect":true,"inputTokens":3243,"outputTokens":23609,"latencyMs":112825.481584},{"questionId":"q29","format":"xml","model":"gemini-3-flash-preview","expected":"48","actual":"48","isCorrect":true,"inputTokens":9145,"outputTokens":20310,"latencyMs":120850.20487499994},{"questionId":"q29","format":"yaml","model":"gemini-3-flash-preview","expected":"48","actual":"48","isCorrect":true,"inputTokens":5798,"outputTokens":14498,"latencyMs":68155.17616600002},{"questionId":"q30","format":"json-pretty","model":"gemini-3-flash-preview","expected":"36","actual":"36","isCorrect":true,"inputTokens":7958,"outputTokens":10569,"latencyMs":49480.454290999915},{"questionId":"q30","format":"json-compact","model":"gemini-3-flash-preview","expected":"36","actual":"36","isCorrect":true,"inputTokens":4748,"outputTokens":15329,"latencyMs":71282.15554099996},{"questionId":"q30","format":"toon","model":"gemini-3-flash-preview","expected":"36","actual":"36","isCorrect":true,"inputTokens":3391,"outputTokens":15211,"latencyMs":71150.65308300004},{"questionId":"q30","format":"csv","model":"gemini-3-flash-preview","expected":"36","actual":"36","isCorrect":true,"inputTokens":3243,"outputTokens":19407,"latencyMs":92346.22900000005},{"questionId":"q30","format":"xml","model":"gemini-3-flash-preview","expected":"36","actual":"36","isCorrect":true,"inputTokens":9145,"outputTokens":13208,"latencyMs":80364.869083},{"questionId":"q30","format":"yaml","model":"gemini-3-flash-preview","expected":"36","actual":"36","isCorrect":true,"inputTokens":5798,"outputTokens":16422,"latencyMs":77469.96666700009},{"questionId":"q31","format":"json-pretty","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":7959,"outputTokens":1455,"latencyMs":7871.520207999973},{"questionId":"q31","format":"json-compact","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":4749,"outputTokens":3337,"latencyMs":16940.903041999904},{"questionId":"q31","format":"toon","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3392,"outputTokens":7698,"latencyMs":37917.3464579999},{"questionId":"q31","format":"csv","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3244,"outputTokens":2777,"latencyMs":14377.056125000003},{"questionId":"q31","format":"xml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":9146,"outputTokens":4019,"latencyMs":26194.21516699996},{"questionId":"q31","format":"yaml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":5799,"outputTokens":4811,"latencyMs":24804.60845900001},{"questionId":"q32","format":"json-pretty","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":7959,"outputTokens":1267,"latencyMs":6935.964540999965},{"questionId":"q32","format":"json-compact","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":4749,"outputTokens":2935,"latencyMs":14397.04920800007},{"questionId":"q32","format":"toon","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":3392,"outputTokens":1792,"latencyMs":9424.540042000008},{"questionId":"q32","format":"csv","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":3244,"outputTokens":7315,"latencyMs":36771.32283399999},{"questionId":"q32","format":"xml","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":9146,"outputTokens":4903,"latencyMs":30213.814125000034},{"questionId":"q32","format":"yaml","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":5799,"outputTokens":2639,"latencyMs":14096.540874999948},{"questionId":"q33","format":"json-pretty","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":7959,"outputTokens":3301,"latencyMs":16739.186916000093},{"questionId":"q33","format":"json-compact","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":4749,"outputTokens":1826,"latencyMs":9530.344500000007},{"questionId":"q33","format":"toon","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3392,"outputTokens":8386,"latencyMs":40940.272916999995},{"questionId":"q33","format":"csv","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":3244,"outputTokens":5507,"latencyMs":27193.604374999995},{"questionId":"q33","format":"xml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":9146,"outputTokens":1722,"latencyMs":10933.165042000008},{"questionId":"q33","format":"yaml","model":"gemini-3-flash-preview","expected":"12","actual":"12","isCorrect":true,"inputTokens":5799,"outputTokens":4603,"latencyMs":23554.350582999992},{"questionId":"q34","format":"json-pretty","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":7952,"outputTokens":3164,"latencyMs":16076.625875000027},{"questionId":"q34","format":"json-compact","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":4742,"outputTokens":2649,"latencyMs":13420.916791999945},{"questionId":"q34","format":"toon","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":3385,"outputTokens":13444,"latencyMs":65856.00358400005},{"questionId":"q34","format":"csv","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":3237,"outputTokens":10681,"latencyMs":52920.23595900007},{"questionId":"q34","format":"xml","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":9139,"outputTokens":4409,"latencyMs":27524.992582999985},{"questionId":"q34","format":"yaml","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":5792,"outputTokens":8537,"latencyMs":42930.85120799998},{"questionId":"q35","format":"json-pretty","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":7952,"outputTokens":2509,"latencyMs":12793.079041999998},{"questionId":"q35","format":"json-compact","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":4742,"outputTokens":9923,"latencyMs":46836.51587500004},{"questionId":"q35","format":"toon","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":3385,"outputTokens":11447,"latencyMs":55717.83683299995},{"questionId":"q35","format":"csv","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":3237,"outputTokens":6299,"latencyMs":31261.50887499994},{"questionId":"q35","format":"xml","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":9139,"outputTokens":8239,"latencyMs":48178.13370800007},{"questionId":"q35","format":"yaml","model":"gemini-3-flash-preview","expected":"14","actual":"14","isCorrect":true,"inputTokens":5792,"outputTokens":10789,"latencyMs":48658.011833},{"questionId":"q36","format":"json-pretty","model":"gemini-3-flash-preview","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":14520,"outputTokens":239,"latencyMs":4084.1892080000835},{"questionId":"q36","format":"json-compact","model":"gemini-3-flash-preview","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":8893,"outputTokens":213,"latencyMs":2221.47795900004},{"questionId":"q36","format":"toon","model":"gemini-3-flash-preview","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":9426,"outputTokens":330,"latencyMs":2917.066125000012},{"questionId":"q36","format":"xml","model":"gemini-3-flash-preview","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":16023,"outputTokens":405,"latencyMs":3279.3449170000385},{"questionId":"q36","format":"yaml","model":"gemini-3-flash-preview","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":10588,"outputTokens":186,"latencyMs":2375.2371660000645},{"questionId":"q37","format":"json-pretty","model":"gemini-3-flash-preview","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":14520,"outputTokens":269,"latencyMs":4300.670500000007},{"questionId":"q37","format":"json-compact","model":"gemini-3-flash-preview","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":8893,"outputTokens":120,"latencyMs":2138.3337090000277},{"questionId":"q37","format":"toon","model":"gemini-3-flash-preview","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":9426,"outputTokens":379,"latencyMs":4250.551541999914},{"questionId":"q37","format":"xml","model":"gemini-3-flash-preview","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":16023,"outputTokens":348,"latencyMs":3051.399666000041},{"questionId":"q37","format":"yaml","model":"gemini-3-flash-preview","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":10588,"outputTokens":365,"latencyMs":2959.8823329999577},{"questionId":"q38","format":"json-pretty","model":"gemini-3-flash-preview","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":14520,"outputTokens":401,"latencyMs":3398.0299589999486},{"questionId":"q38","format":"json-compact","model":"gemini-3-flash-preview","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":8893,"outputTokens":262,"latencyMs":2614.450458000065},{"questionId":"q38","format":"toon","model":"gemini-3-flash-preview","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":9426,"outputTokens":1033,"latencyMs":7634.493875000044},{"questionId":"q38","format":"xml","model":"gemini-3-flash-preview","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":16023,"outputTokens":257,"latencyMs":2810.2027499999385},{"questionId":"q38","format":"yaml","model":"gemini-3-flash-preview","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":10588,"outputTokens":271,"latencyMs":3648.3497920000227},{"questionId":"q39","format":"json-pretty","model":"gemini-3-flash-preview","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":14520,"outputTokens":332,"latencyMs":4216.776208000025},{"questionId":"q39","format":"json-compact","model":"gemini-3-flash-preview","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":8893,"outputTokens":323,"latencyMs":2974.3484999999637},{"questionId":"q39","format":"toon","model":"gemini-3-flash-preview","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":9426,"outputTokens":347,"latencyMs":4259.04241600004},{"questionId":"q39","format":"xml","model":"gemini-3-flash-preview","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":16023,"outputTokens":310,"latencyMs":2637.323708000011},{"questionId":"q39","format":"yaml","model":"gemini-3-flash-preview","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":10588,"outputTokens":385,"latencyMs":3122.3812909999397},{"questionId":"q40","format":"json-pretty","model":"gemini-3-flash-preview","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":14520,"outputTokens":114,"latencyMs":1757.4957500000019},{"questionId":"q40","format":"json-compact","model":"gemini-3-flash-preview","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":8893,"outputTokens":376,"latencyMs":3298.5533750000177},{"questionId":"q40","format":"toon","model":"gemini-3-flash-preview","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":9426,"outputTokens":220,"latencyMs":2433.519541000016},{"questionId":"q40","format":"xml","model":"gemini-3-flash-preview","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":16023,"outputTokens":519,"latencyMs":4017.63049999997},{"questionId":"q40","format":"yaml","model":"gemini-3-flash-preview","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":10588,"outputTokens":356,"latencyMs":4236.816583999898},{"questionId":"q41","format":"json-pretty","model":"gemini-3-flash-preview","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":14520,"outputTokens":337,"latencyMs":3656.2743330000667},{"questionId":"q41","format":"json-compact","model":"gemini-3-flash-preview","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":8893,"outputTokens":299,"latencyMs":3071.092665999895},{"questionId":"q41","format":"toon","model":"gemini-3-flash-preview","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":9426,"outputTokens":312,"latencyMs":3423.856124999933},{"questionId":"q41","format":"xml","model":"gemini-3-flash-preview","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":16023,"outputTokens":384,"latencyMs":3164.3665000000037},{"questionId":"q41","format":"yaml","model":"gemini-3-flash-preview","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":10588,"outputTokens":258,"latencyMs":2742.6081249999115},{"questionId":"q42","format":"json-pretty","model":"gemini-3-flash-preview","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":14520,"outputTokens":370,"latencyMs":6332.384084000019},{"questionId":"q42","format":"json-compact","model":"gemini-3-flash-preview","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":8893,"outputTokens":247,"latencyMs":2596.109124999959},{"questionId":"q42","format":"toon","model":"gemini-3-flash-preview","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":9426,"outputTokens":287,"latencyMs":3813.7137079999084},{"questionId":"q42","format":"xml","model":"gemini-3-flash-preview","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":16023,"outputTokens":517,"latencyMs":4750.313916999963},{"questionId":"q42","format":"yaml","model":"gemini-3-flash-preview","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":10588,"outputTokens":368,"latencyMs":3190.7336670000805},{"questionId":"q43","format":"json-pretty","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":14520,"outputTokens":208,"latencyMs":3772.5327089999337},{"questionId":"q43","format":"json-compact","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8893,"outputTokens":383,"latencyMs":3755.0355419999687},{"questionId":"q43","format":"toon","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":9426,"outputTokens":356,"latencyMs":3920.7263749999693},{"questionId":"q43","format":"xml","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":16023,"outputTokens":524,"latencyMs":4781.3879169999855},{"questionId":"q43","format":"yaml","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":10588,"outputTokens":213,"latencyMs":2596.978000000003},{"questionId":"q44","format":"json-pretty","model":"gemini-3-flash-preview","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":14521,"outputTokens":219,"latencyMs":3901.433541999897},{"questionId":"q44","format":"json-compact","model":"gemini-3-flash-preview","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":8894,"outputTokens":153,"latencyMs":2440.811333000078},{"questionId":"q44","format":"toon","model":"gemini-3-flash-preview","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":9427,"outputTokens":225,"latencyMs":3439.9409160000505},{"questionId":"q44","format":"xml","model":"gemini-3-flash-preview","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":16024,"outputTokens":206,"latencyMs":2855.7029999999795},{"questionId":"q44","format":"yaml","model":"gemini-3-flash-preview","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":10589,"outputTokens":213,"latencyMs":2617.3762919999426},{"questionId":"q45","format":"json-pretty","model":"gemini-3-flash-preview","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":14521,"outputTokens":131,"latencyMs":1981.949041999993},{"questionId":"q45","format":"json-compact","model":"gemini-3-flash-preview","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":8894,"outputTokens":223,"latencyMs":2614.310708999983},{"questionId":"q45","format":"toon","model":"gemini-3-flash-preview","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":9427,"outputTokens":481,"latencyMs":5567.526500000036},{"questionId":"q45","format":"xml","model":"gemini-3-flash-preview","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":16024,"outputTokens":255,"latencyMs":2444.7313330000034},{"questionId":"q45","format":"yaml","model":"gemini-3-flash-preview","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":10589,"outputTokens":201,"latencyMs":2859.697374999989},{"questionId":"q46","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":14521,"outputTokens":438,"latencyMs":6194.675749999937},{"questionId":"q46","format":"json-compact","model":"gemini-3-flash-preview","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":8894,"outputTokens":419,"latencyMs":3519.895957999979},{"questionId":"q46","format":"toon","model":"gemini-3-flash-preview","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":9427,"outputTokens":370,"latencyMs":4239.617625000072},{"questionId":"q46","format":"xml","model":"gemini-3-flash-preview","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":16024,"outputTokens":441,"latencyMs":3473.413541999995},{"questionId":"q46","format":"yaml","model":"gemini-3-flash-preview","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":10589,"outputTokens":218,"latencyMs":2793.329832999967},{"questionId":"q47","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":14520,"outputTokens":517,"latencyMs":6171.0984580001095},{"questionId":"q47","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":8893,"outputTokens":493,"latencyMs":4513.148999999976},{"questionId":"q47","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":9426,"outputTokens":357,"latencyMs":3426.631167000043},{"questionId":"q47","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":16023,"outputTokens":715,"latencyMs":5319.806500000064},{"questionId":"q47","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":10588,"outputTokens":753,"latencyMs":5722.641249999986},{"questionId":"q48","format":"json-pretty","model":"gemini-3-flash-preview","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":14521,"outputTokens":214,"latencyMs":2704.503792000003},{"questionId":"q48","format":"json-compact","model":"gemini-3-flash-preview","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":8894,"outputTokens":226,"latencyMs":3192.3600840000436},{"questionId":"q48","format":"toon","model":"gemini-3-flash-preview","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":9427,"outputTokens":337,"latencyMs":3822.5321669999976},{"questionId":"q48","format":"xml","model":"gemini-3-flash-preview","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":16024,"outputTokens":253,"latencyMs":4071.4312500000233},{"questionId":"q48","format":"yaml","model":"gemini-3-flash-preview","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":10589,"outputTokens":202,"latencyMs":2787.2034579999745},{"questionId":"q49","format":"json-pretty","model":"gemini-3-flash-preview","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":14521,"outputTokens":172,"latencyMs":2478.8660420000087},{"questionId":"q49","format":"json-compact","model":"gemini-3-flash-preview","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":8894,"outputTokens":266,"latencyMs":3291.44908400008},{"questionId":"q49","format":"toon","model":"gemini-3-flash-preview","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":9427,"outputTokens":303,"latencyMs":3379.1073340000585},{"questionId":"q49","format":"xml","model":"gemini-3-flash-preview","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":16024,"outputTokens":215,"latencyMs":3803.1052909999853},{"questionId":"q49","format":"yaml","model":"gemini-3-flash-preview","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":10589,"outputTokens":242,"latencyMs":3215.7367089999607},{"questionId":"q50","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":14521,"outputTokens":534,"latencyMs":5678.362208000035},{"questionId":"q50","format":"json-compact","model":"gemini-3-flash-preview","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":8894,"outputTokens":351,"latencyMs":3124.625166999991},{"questionId":"q50","format":"toon","model":"gemini-3-flash-preview","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":9427,"outputTokens":383,"latencyMs":3374.228000000003},{"questionId":"q50","format":"xml","model":"gemini-3-flash-preview","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":16024,"outputTokens":513,"latencyMs":4168.5925419999985},{"questionId":"q50","format":"yaml","model":"gemini-3-flash-preview","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":10589,"outputTokens":221,"latencyMs":2439.0822499999776},{"questionId":"q51","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":14520,"outputTokens":1894,"latencyMs":14970.280374999973},{"questionId":"q51","format":"json-compact","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":8893,"outputTokens":282,"latencyMs":2762.680416999967},{"questionId":"q51","format":"toon","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":9426,"outputTokens":510,"latencyMs":5080.336041999981},{"questionId":"q51","format":"xml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":16023,"outputTokens":737,"latencyMs":5118.358458999894},{"questionId":"q51","format":"yaml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":10588,"outputTokens":547,"latencyMs":4121.631540999981},{"questionId":"q52","format":"json-pretty","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":14515,"outputTokens":1586,"latencyMs":10084.839665999985},{"questionId":"q52","format":"json-compact","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":8888,"outputTokens":939,"latencyMs":6117.736290999921},{"questionId":"q52","format":"toon","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":9421,"outputTokens":3970,"latencyMs":22298.110333000077},{"questionId":"q52","format":"xml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":16018,"outputTokens":1824,"latencyMs":10803.940333000035},{"questionId":"q52","format":"yaml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":10583,"outputTokens":1312,"latencyMs":8204.72112500004},{"questionId":"q53","format":"json-pretty","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":14515,"outputTokens":1293,"latencyMs":8477.8419590001},{"questionId":"q53","format":"json-compact","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":8888,"outputTokens":897,"latencyMs":6555.321166999987},{"questionId":"q53","format":"toon","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":9421,"outputTokens":1737,"latencyMs":11308.854500000016},{"questionId":"q53","format":"xml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":16018,"outputTokens":1534,"latencyMs":9507.72520799993},{"questionId":"q53","format":"yaml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":10583,"outputTokens":1579,"latencyMs":9510.461666999967},{"questionId":"q54","format":"json-pretty","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":14516,"outputTokens":1855,"latencyMs":11689.714666999993},{"questionId":"q54","format":"json-compact","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":8889,"outputTokens":1595,"latencyMs":10057.804125000024},{"questionId":"q54","format":"toon","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":9422,"outputTokens":919,"latencyMs":6539.259709000005},{"questionId":"q54","format":"xml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":16019,"outputTokens":1549,"latencyMs":9686.657083999948},{"questionId":"q54","format":"yaml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":10584,"outputTokens":1009,"latencyMs":6737.831374999951},{"questionId":"q55","format":"json-pretty","model":"gemini-3-flash-preview","expected":"38069.93","actual":"38069.93","isCorrect":true,"inputTokens":14516,"outputTokens":22046,"latencyMs":119814.9747919999},{"questionId":"q55","format":"json-compact","model":"gemini-3-flash-preview","expected":"38069.93","actual":"38069.93","isCorrect":true,"inputTokens":8889,"outputTokens":17747,"latencyMs":94282.35662500001},{"questionId":"q55","format":"toon","model":"gemini-3-flash-preview","expected":"38069.93","actual":"Answer: 38069.93","isCorrect":true,"inputTokens":9422,"outputTokens":26876,"latencyMs":144182.03554200009},{"questionId":"q55","format":"xml","model":"gemini-3-flash-preview","expected":"38069.93","actual":"38069.93","isCorrect":true,"inputTokens":16019,"outputTokens":15952,"latencyMs":83522.74987499998},{"questionId":"q55","format":"yaml","model":"gemini-3-flash-preview","expected":"38069.93","actual":"38069.93","isCorrect":true,"inputTokens":10584,"outputTokens":15993,"latencyMs":83854.517291},{"questionId":"q56","format":"json-pretty","model":"gemini-3-flash-preview","expected":"761.40","actual":"761.3986","isCorrect":true,"inputTokens":14514,"outputTokens":17232,"latencyMs":93816.99558300001},{"questionId":"q56","format":"json-compact","model":"gemini-3-flash-preview","expected":"761.40","actual":"761.3986","isCorrect":true,"inputTokens":8887,"outputTokens":10439,"latencyMs":55391.57041699998},{"questionId":"q56","format":"toon","model":"gemini-3-flash-preview","expected":"761.40","actual":"761.3986","isCorrect":true,"inputTokens":9420,"outputTokens":26949,"latencyMs":147609.17754199996},{"questionId":"q56","format":"xml","model":"gemini-3-flash-preview","expected":"761.40","actual":"761.3986","isCorrect":true,"inputTokens":16017,"outputTokens":14824,"latencyMs":78400.49437500001},{"questionId":"q56","format":"yaml","model":"gemini-3-flash-preview","expected":"761.40","actual":"761.3986","isCorrect":true,"inputTokens":10582,"outputTokens":21639,"latencyMs":115665.42524999997},{"questionId":"q57","format":"json-pretty","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":14515,"outputTokens":279,"latencyMs":2675.1986250000773},{"questionId":"q57","format":"json-compact","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":8888,"outputTokens":716,"latencyMs":5042.823250000016},{"questionId":"q57","format":"toon","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":9421,"outputTokens":424,"latencyMs":3740.129125000094},{"questionId":"q57","format":"xml","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":16018,"outputTokens":776,"latencyMs":5229.144583000103},{"questionId":"q57","format":"yaml","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":10583,"outputTokens":620,"latencyMs":4741.052417000057},{"questionId":"q58","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":14514,"outputTokens":8780,"latencyMs":50699.300667},{"questionId":"q58","format":"json-compact","model":"gemini-3-flash-preview","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":8887,"outputTokens":1805,"latencyMs":10487.411750000087},{"questionId":"q58","format":"toon","model":"gemini-3-flash-preview","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":9420,"outputTokens":5850,"latencyMs":33373.52466699993},{"questionId":"q58","format":"xml","model":"gemini-3-flash-preview","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":16017,"outputTokens":8972,"latencyMs":49852.997083999915},{"questionId":"q58","format":"yaml","model":"gemini-3-flash-preview","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":10582,"outputTokens":8969,"latencyMs":48664.47791699995},{"questionId":"q59","format":"json-pretty","model":"gemini-3-flash-preview","expected":"46","actual":"46","isCorrect":true,"inputTokens":14520,"outputTokens":4700,"latencyMs":27796.357416999992},{"questionId":"q59","format":"json-compact","model":"gemini-3-flash-preview","expected":"46","actual":"46","isCorrect":true,"inputTokens":8893,"outputTokens":15868,"latencyMs":83459.89466699993},{"questionId":"q59","format":"toon","model":"gemini-3-flash-preview","expected":"46","actual":"46","isCorrect":true,"inputTokens":9426,"outputTokens":1723,"latencyMs":10277.792625000002},{"questionId":"q59","format":"xml","model":"gemini-3-flash-preview","expected":"46","actual":"46","isCorrect":true,"inputTokens":16023,"outputTokens":4585,"latencyMs":24014.985666999943},{"questionId":"q59","format":"yaml","model":"gemini-3-flash-preview","expected":"46","actual":"46","isCorrect":true,"inputTokens":10588,"outputTokens":6603,"latencyMs":34684.088292},{"questionId":"q60","format":"json-pretty","model":"gemini-3-flash-preview","expected":"38","actual":"38","isCorrect":true,"inputTokens":14520,"outputTokens":9913,"latencyMs":55103.00254100002},{"questionId":"q60","format":"json-compact","model":"gemini-3-flash-preview","expected":"38","actual":"38","isCorrect":true,"inputTokens":8893,"outputTokens":8949,"latencyMs":46694.73845800001},{"questionId":"q60","format":"toon","model":"gemini-3-flash-preview","expected":"38","actual":"38","isCorrect":true,"inputTokens":9426,"outputTokens":12668,"latencyMs":67138.90345800004},{"questionId":"q60","format":"xml","model":"gemini-3-flash-preview","expected":"38","actual":"38","isCorrect":true,"inputTokens":16023,"outputTokens":13794,"latencyMs":71987.96762500005},{"questionId":"q60","format":"yaml","model":"gemini-3-flash-preview","expected":"38","actual":"38","isCorrect":true,"inputTokens":10588,"outputTokens":4639,"latencyMs":24705.85929199995},{"questionId":"q61","format":"json-pretty","model":"gemini-3-flash-preview","expected":"29","actual":"29","isCorrect":true,"inputTokens":14520,"outputTokens":6175,"latencyMs":33881.228292000014},{"questionId":"q61","format":"json-compact","model":"gemini-3-flash-preview","expected":"29","actual":"29","isCorrect":true,"inputTokens":8893,"outputTokens":2873,"latencyMs":15629.773790999898},{"questionId":"q61","format":"toon","model":"gemini-3-flash-preview","expected":"29","actual":"29","isCorrect":true,"inputTokens":9426,"outputTokens":2590,"latencyMs":15352.111291000037},{"questionId":"q61","format":"xml","model":"gemini-3-flash-preview","expected":"29","actual":"29","isCorrect":true,"inputTokens":16023,"outputTokens":3560,"latencyMs":19004.292000000016},{"questionId":"q61","format":"yaml","model":"gemini-3-flash-preview","expected":"29","actual":"29","isCorrect":true,"inputTokens":10588,"outputTokens":3757,"latencyMs":20216.709333000006},{"questionId":"q62","format":"json-pretty","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":14524,"outputTokens":2701,"latencyMs":16061.990833000047},{"questionId":"q62","format":"json-compact","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":8897,"outputTokens":3692,"latencyMs":20302.256417000084},{"questionId":"q62","format":"toon","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":9430,"outputTokens":3430,"latencyMs":19275.306500000064},{"questionId":"q62","format":"xml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":16027,"outputTokens":3723,"latencyMs":20844.68087499996},{"questionId":"q62","format":"yaml","model":"gemini-3-flash-preview","expected":"10","actual":"10","isCorrect":true,"inputTokens":10592,"outputTokens":2730,"latencyMs":15592.86608399998},{"questionId":"q63","format":"json-pretty","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":14524,"outputTokens":1698,"latencyMs":10112.519083000021},{"questionId":"q63","format":"json-compact","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":8897,"outputTokens":2376,"latencyMs":13232.432625000016},{"questionId":"q63","format":"toon","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":9430,"outputTokens":5256,"latencyMs":29218.475041999947},{"questionId":"q63","format":"xml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":16027,"outputTokens":4564,"latencyMs":24987.84912499995},{"questionId":"q63","format":"yaml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":10592,"outputTokens":2961,"latencyMs":16846.877332999953},{"questionId":"q64","format":"json-pretty","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":14525,"outputTokens":2356,"latencyMs":13198.135666999966},{"questionId":"q64","format":"json-compact","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":8898,"outputTokens":4747,"latencyMs":26265.633375000092},{"questionId":"q64","format":"toon","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":9431,"outputTokens":2256,"latencyMs":13048.221834000084},{"questionId":"q64","format":"xml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":16028,"outputTokens":1678,"latencyMs":9641.471332999994},{"questionId":"q64","format":"yaml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":10593,"outputTokens":1215,"latencyMs":7445.436791999964},{"questionId":"q65","format":"json-pretty","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":14524,"outputTokens":1490,"latencyMs":8881.846499999985},{"questionId":"q65","format":"json-compact","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":8897,"outputTokens":2284,"latencyMs":13075.758625000017},{"questionId":"q65","format":"toon","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":9430,"outputTokens":2322,"latencyMs":13604.233208999969},{"questionId":"q65","format":"xml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":16027,"outputTokens":2710,"latencyMs":14792.38137499988},{"questionId":"q65","format":"yaml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":10592,"outputTokens":4272,"latencyMs":23129.41341700009},{"questionId":"q66","format":"json-pretty","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":14522,"outputTokens":2084,"latencyMs":11989.847166999942},{"questionId":"q66","format":"json-compact","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":8895,"outputTokens":1664,"latencyMs":10901.901291999966},{"questionId":"q66","format":"toon","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":9428,"outputTokens":1219,"latencyMs":7919.164124999894},{"questionId":"q66","format":"xml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":16025,"outputTokens":6934,"latencyMs":38142.17258400004},{"questionId":"q66","format":"yaml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":10590,"outputTokens":1495,"latencyMs":10440.66483299993},{"questionId":"q67","format":"json-pretty","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":14522,"outputTokens":1507,"latencyMs":9200.867458999855},{"questionId":"q67","format":"json-compact","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":8895,"outputTokens":2148,"latencyMs":12279.845625000075},{"questionId":"q67","format":"toon","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":9428,"outputTokens":1610,"latencyMs":9950.602542000124},{"questionId":"q67","format":"xml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":16025,"outputTokens":6010,"latencyMs":32675.77766599995},{"questionId":"q67","format":"yaml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":10590,"outputTokens":1839,"latencyMs":10577.224958000006},{"questionId":"q68","format":"json-pretty","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":14523,"outputTokens":1878,"latencyMs":11117.630457999883},{"questionId":"q68","format":"json-compact","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":8896,"outputTokens":10461,"latencyMs":56053.61800000002},{"questionId":"q68","format":"toon","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":9429,"outputTokens":4414,"latencyMs":25176.159292000113},{"questionId":"q68","format":"xml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":16026,"outputTokens":1915,"latencyMs":11221.173333999934},{"questionId":"q68","format":"yaml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":10591,"outputTokens":1413,"latencyMs":8690.182666999986},{"questionId":"q69","format":"json-pretty","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":14526,"outputTokens":3831,"latencyMs":21001.882874999894},{"questionId":"q69","format":"json-compact","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":8899,"outputTokens":4165,"latencyMs":22770.965709000127},{"questionId":"q69","format":"toon","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":9432,"outputTokens":18550,"latencyMs":100414.03024999984},{"questionId":"q69","format":"xml","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":16029,"outputTokens":17310,"latencyMs":91830.33833399997},{"questionId":"q69","format":"yaml","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":10594,"outputTokens":13016,"latencyMs":68778.63179200003},{"questionId":"q70","format":"json-pretty","model":"gemini-3-flash-preview","expected":"22","actual":"22","isCorrect":true,"inputTokens":14526,"outputTokens":5235,"latencyMs":28447.320374999894},{"questionId":"q70","format":"json-compact","model":"gemini-3-flash-preview","expected":"22","actual":"22","isCorrect":true,"inputTokens":8899,"outputTokens":15466,"latencyMs":82281.10062499996},{"questionId":"q70","format":"toon","model":"gemini-3-flash-preview","expected":"22","actual":"22","isCorrect":true,"inputTokens":9432,"outputTokens":18011,"latencyMs":98886.45154100005},{"questionId":"q70","format":"xml","model":"gemini-3-flash-preview","expected":"22","actual":"22","isCorrect":true,"inputTokens":16029,"outputTokens":13188,"latencyMs":69947.36058399989},{"questionId":"q70","format":"yaml","model":"gemini-3-flash-preview","expected":"22","actual":"22","isCorrect":true,"inputTokens":10594,"outputTokens":11280,"latencyMs":59114.20470799995},{"questionId":"q71","format":"json-pretty","model":"gemini-3-flash-preview","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":4850,"outputTokens":119,"latencyMs":1829.5722499999683},{"questionId":"q71","format":"json-compact","model":"gemini-3-flash-preview","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":3098,"outputTokens":169,"latencyMs":2557.803041000152},{"questionId":"q71","format":"toon","model":"gemini-3-flash-preview","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":2361,"outputTokens":245,"latencyMs":2140.7205409999005},{"questionId":"q71","format":"csv","model":"gemini-3-flash-preview","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":2275,"outputTokens":214,"latencyMs":1957.9020410000812},{"questionId":"q71","format":"xml","model":"gemini-3-flash-preview","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":5493,"outputTokens":233,"latencyMs":2409.1820829999633},{"questionId":"q71","format":"yaml","model":"gemini-3-flash-preview","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":3879,"outputTokens":280,"latencyMs":2552.6536250000354},{"questionId":"q72","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":4850,"outputTokens":223,"latencyMs":2256.7499589999206},{"questionId":"q72","format":"json-compact","model":"gemini-3-flash-preview","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":3098,"outputTokens":299,"latencyMs":2469.9167500000913},{"questionId":"q72","format":"toon","model":"gemini-3-flash-preview","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":2361,"outputTokens":487,"latencyMs":3384.175290999934},{"questionId":"q72","format":"csv","model":"gemini-3-flash-preview","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":2275,"outputTokens":306,"latencyMs":2464.7807089998387},{"questionId":"q72","format":"xml","model":"gemini-3-flash-preview","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":5493,"outputTokens":248,"latencyMs":2633.3309579999186},{"questionId":"q72","format":"yaml","model":"gemini-3-flash-preview","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":3879,"outputTokens":301,"latencyMs":2412.069792000111},{"questionId":"q73","format":"json-pretty","model":"gemini-3-flash-preview","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":4851,"outputTokens":184,"latencyMs":2531.2841250000056},{"questionId":"q73","format":"json-compact","model":"gemini-3-flash-preview","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":3099,"outputTokens":338,"latencyMs":2906.40533400001},{"questionId":"q73","format":"toon","model":"gemini-3-flash-preview","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":2362,"outputTokens":336,"latencyMs":3120.241541000083},{"questionId":"q73","format":"csv","model":"gemini-3-flash-preview","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":2276,"outputTokens":221,"latencyMs":2187.43429200002},{"questionId":"q73","format":"xml","model":"gemini-3-flash-preview","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":5494,"outputTokens":269,"latencyMs":2550.5684999998193},{"questionId":"q73","format":"yaml","model":"gemini-3-flash-preview","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":3880,"outputTokens":564,"latencyMs":3829.89783300017},{"questionId":"q74","format":"json-pretty","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":4851,"outputTokens":212,"latencyMs":2191.561499999836},{"questionId":"q74","format":"json-compact","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":3099,"outputTokens":161,"latencyMs":2047.781084000133},{"questionId":"q74","format":"toon","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":2362,"outputTokens":416,"latencyMs":3263.763874999946},{"questionId":"q74","format":"csv","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":2276,"outputTokens":259,"latencyMs":2059.760958999861},{"questionId":"q74","format":"xml","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":5494,"outputTokens":249,"latencyMs":2466.5045409998856},{"questionId":"q74","format":"yaml","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":3880,"outputTokens":189,"latencyMs":1927.4447920001112},{"questionId":"q75","format":"json-pretty","model":"gemini-3-flash-preview","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":4850,"outputTokens":168,"latencyMs":1590.1414999999106},{"questionId":"q75","format":"json-compact","model":"gemini-3-flash-preview","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":3098,"outputTokens":233,"latencyMs":2454.150749999797},{"questionId":"q75","format":"toon","model":"gemini-3-flash-preview","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":2361,"outputTokens":294,"latencyMs":2953.3149170000106},{"questionId":"q75","format":"csv","model":"gemini-3-flash-preview","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":2275,"outputTokens":190,"latencyMs":1742.4772909998428},{"questionId":"q75","format":"xml","model":"gemini-3-flash-preview","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":5493,"outputTokens":230,"latencyMs":2560.6022919998504},{"questionId":"q75","format":"yaml","model":"gemini-3-flash-preview","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":3879,"outputTokens":225,"latencyMs":2538.289208999835},{"questionId":"q76","format":"json-pretty","model":"gemini-3-flash-preview","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":4850,"outputTokens":276,"latencyMs":2100.7069999999367},{"questionId":"q76","format":"json-compact","model":"gemini-3-flash-preview","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":3098,"outputTokens":171,"latencyMs":2313.077791000018},{"questionId":"q76","format":"toon","model":"gemini-3-flash-preview","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":2361,"outputTokens":1265,"latencyMs":7686.007291999878},{"questionId":"q76","format":"csv","model":"gemini-3-flash-preview","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":2275,"outputTokens":316,"latencyMs":2579.2038340000436},{"questionId":"q76","format":"xml","model":"gemini-3-flash-preview","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":5493,"outputTokens":205,"latencyMs":2630.3139589999337},{"questionId":"q76","format":"yaml","model":"gemini-3-flash-preview","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":3879,"outputTokens":334,"latencyMs":3260.627499999944},{"questionId":"q77","format":"json-pretty","model":"gemini-3-flash-preview","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":4851,"outputTokens":210,"latencyMs":2122.3630420002155},{"questionId":"q77","format":"json-compact","model":"gemini-3-flash-preview","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":3099,"outputTokens":362,"latencyMs":2923.6872910000384},{"questionId":"q77","format":"toon","model":"gemini-3-flash-preview","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":2362,"outputTokens":231,"latencyMs":2338.994457999943},{"questionId":"q77","format":"csv","model":"gemini-3-flash-preview","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":2276,"outputTokens":195,"latencyMs":2056.686915999977},{"questionId":"q77","format":"xml","model":"gemini-3-flash-preview","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":5494,"outputTokens":185,"latencyMs":2833.9936659999657},{"questionId":"q77","format":"yaml","model":"gemini-3-flash-preview","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":3880,"outputTokens":279,"latencyMs":2919.6189160000067},{"questionId":"q78","format":"json-pretty","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":4851,"outputTokens":176,"latencyMs":1762.5695419998374},{"questionId":"q78","format":"json-compact","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":3099,"outputTokens":344,"latencyMs":2663.3037089998834},{"questionId":"q78","format":"toon","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":2362,"outputTokens":492,"latencyMs":3573.057499999879},{"questionId":"q78","format":"csv","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":2276,"outputTokens":353,"latencyMs":2462.9716250000056},{"questionId":"q78","format":"xml","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":5494,"outputTokens":209,"latencyMs":2461.564041999867},{"questionId":"q78","format":"yaml","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":3880,"outputTokens":173,"latencyMs":2031.1732079999056},{"questionId":"q79","format":"json-pretty","model":"gemini-3-flash-preview","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":4850,"outputTokens":179,"latencyMs":1812.0257910001092},{"questionId":"q79","format":"json-compact","model":"gemini-3-flash-preview","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":3098,"outputTokens":165,"latencyMs":1910.7827920001},{"questionId":"q79","format":"toon","model":"gemini-3-flash-preview","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":2361,"outputTokens":478,"latencyMs":4064.633916999912},{"questionId":"q79","format":"csv","model":"gemini-3-flash-preview","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":2275,"outputTokens":363,"latencyMs":2539.4810419999994},{"questionId":"q79","format":"xml","model":"gemini-3-flash-preview","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":5493,"outputTokens":235,"latencyMs":2455.422833000077},{"questionId":"q79","format":"yaml","model":"gemini-3-flash-preview","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":3879,"outputTokens":150,"latencyMs":2093.886832999997},{"questionId":"q80","format":"json-pretty","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":4843,"outputTokens":840,"latencyMs":4959.74383299984},{"questionId":"q80","format":"json-compact","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":3091,"outputTokens":1262,"latencyMs":10007.206290999893},{"questionId":"q80","format":"toon","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":2354,"outputTokens":1385,"latencyMs":7940.294125000015},{"questionId":"q80","format":"csv","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":2268,"outputTokens":1884,"latencyMs":9444.756959000137},{"questionId":"q80","format":"xml","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":5486,"outputTokens":1489,"latencyMs":9292.004332999932},{"questionId":"q80","format":"yaml","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":3872,"outputTokens":1235,"latencyMs":7389.324040999869},{"questionId":"q81","format":"json-pretty","model":"gemini-3-flash-preview","expected":"338580","actual":"338580","isCorrect":true,"inputTokens":4844,"outputTokens":9907,"latencyMs":44819.90987500013},{"questionId":"q81","format":"json-compact","model":"gemini-3-flash-preview","expected":"338580","actual":"338580","isCorrect":true,"inputTokens":3092,"outputTokens":6790,"latencyMs":33174.798957999796},{"questionId":"q81","format":"toon","model":"gemini-3-flash-preview","expected":"338580","actual":"338580","isCorrect":true,"inputTokens":2355,"outputTokens":16999,"latencyMs":83272.93574999995},{"questionId":"q81","format":"csv","model":"gemini-3-flash-preview","expected":"338580","actual":"338580","isCorrect":true,"inputTokens":2269,"outputTokens":18298,"latencyMs":83436.81708399998},{"questionId":"q81","format":"xml","model":"gemini-3-flash-preview","expected":"338580","actual":"338580","isCorrect":true,"inputTokens":5487,"outputTokens":16309,"latencyMs":79165.26470799977},{"questionId":"q81","format":"yaml","model":"gemini-3-flash-preview","expected":"338580","actual":"338580","isCorrect":true,"inputTokens":3873,"outputTokens":5834,"latencyMs":29618.839625000022},{"questionId":"q82","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1666","actual":"1666","isCorrect":true,"inputTokens":4844,"outputTokens":10427,"latencyMs":47558.13629200007},{"questionId":"q82","format":"json-compact","model":"gemini-3-flash-preview","expected":"1666","actual":"1666","isCorrect":true,"inputTokens":3092,"outputTokens":10931,"latencyMs":52990.450666000135},{"questionId":"q82","format":"toon","model":"gemini-3-flash-preview","expected":"1666","actual":"1666","isCorrect":true,"inputTokens":2355,"outputTokens":5785,"latencyMs":28527.226582999807},{"questionId":"q82","format":"csv","model":"gemini-3-flash-preview","expected":"1666","actual":"1666","isCorrect":true,"inputTokens":2269,"outputTokens":12442,"latencyMs":56605.5457919999},{"questionId":"q82","format":"xml","model":"gemini-3-flash-preview","expected":"1666","actual":"1666","isCorrect":true,"inputTokens":5487,"outputTokens":8100,"latencyMs":40539.29049999989},{"questionId":"q82","format":"yaml","model":"gemini-3-flash-preview","expected":"1666","actual":"1666","isCorrect":true,"inputTokens":3873,"outputTokens":8493,"latencyMs":42170.64012500015},{"questionId":"q83","format":"json-pretty","model":"gemini-3-flash-preview","expected":"278050.98","actual":"278050.98","isCorrect":true,"inputTokens":4842,"outputTokens":26424,"latencyMs":120804.7117920001},{"questionId":"q83","format":"json-compact","model":"gemini-3-flash-preview","expected":"278050.98","actual":"278050.98","isCorrect":true,"inputTokens":3090,"outputTokens":26383,"latencyMs":126429.28150000004},{"questionId":"q83","format":"toon","model":"gemini-3-flash-preview","expected":"278050.98","actual":"278050.98","isCorrect":true,"inputTokens":2353,"outputTokens":17386,"latencyMs":84437.76829200005},{"questionId":"q83","format":"csv","model":"gemini-3-flash-preview","expected":"278050.98","actual":"278050.98","isCorrect":true,"inputTokens":2267,"outputTokens":18943,"latencyMs":85890.99399999995},{"questionId":"q83","format":"xml","model":"gemini-3-flash-preview","expected":"278050.98","actual":"278050.98","isCorrect":true,"inputTokens":5485,"outputTokens":17626,"latencyMs":85216.78054199996},{"questionId":"q83","format":"yaml","model":"gemini-3-flash-preview","expected":"278050.98","actual":"278050.98","isCorrect":true,"inputTokens":3871,"outputTokens":17143,"latencyMs":83208.28020799998},{"questionId":"q84","format":"json-pretty","model":"gemini-3-flash-preview","expected":"0.49","actual":"0.4858333333333333","isCorrect":true,"inputTokens":4840,"outputTokens":23834,"latencyMs":111591.6606660001},{"questionId":"q84","format":"json-compact","model":"gemini-3-flash-preview","expected":"0.49","actual":"0.4858333333333333","isCorrect":true,"inputTokens":3088,"outputTokens":21038,"latencyMs":103479.88100000005},{"questionId":"q84","format":"toon","model":"gemini-3-flash-preview","expected":"0.49","actual":"0.4858333333333333","isCorrect":true,"inputTokens":2351,"outputTokens":35851,"latencyMs":174881.03937500017},{"questionId":"q84","format":"csv","model":"gemini-3-flash-preview","expected":"0.49","actual":"0.4858333333333333","isCorrect":true,"inputTokens":2265,"outputTokens":16542,"latencyMs":74305.61962499982},{"questionId":"q84","format":"xml","model":"gemini-3-flash-preview","expected":"0.49","actual":"0.4858333333333333","isCorrect":true,"inputTokens":5483,"outputTokens":29239,"latencyMs":143886.16129099997},{"questionId":"q84","format":"yaml","model":"gemini-3-flash-preview","expected":"0.49","actual":"0.4858333333333333","isCorrect":true,"inputTokens":3869,"outputTokens":22128,"latencyMs":108770.82458300004},{"questionId":"q85","format":"json-pretty","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":4846,"outputTokens":5096,"latencyMs":23482.037666999968},{"questionId":"q85","format":"json-compact","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":3094,"outputTokens":2796,"latencyMs":14461.010040999856},{"questionId":"q85","format":"toon","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":2357,"outputTokens":5121,"latencyMs":26287.597333999816},{"questionId":"q85","format":"csv","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":2271,"outputTokens":2990,"latencyMs":14215.18995800009},{"questionId":"q85","format":"xml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":5489,"outputTokens":2621,"latencyMs":13538.900291000027},{"questionId":"q85","format":"yaml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":3875,"outputTokens":9585,"latencyMs":47650.92799999984},{"questionId":"q86","format":"json-pretty","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":4844,"outputTokens":8114,"latencyMs":37666.09054100001},{"questionId":"q86","format":"json-compact","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":3092,"outputTokens":4856,"latencyMs":23527.82224999997},{"questionId":"q86","format":"toon","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":2355,"outputTokens":12702,"latencyMs":60716.7359999998},{"questionId":"q86","format":"csv","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":2269,"outputTokens":3749,"latencyMs":17356.627792000072},{"questionId":"q86","format":"xml","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":5487,"outputTokens":9591,"latencyMs":46486.24741699989},{"questionId":"q86","format":"yaml","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":3873,"outputTokens":3137,"latencyMs":16645.47083300003},{"questionId":"q87","format":"json-pretty","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":4853,"outputTokens":4768,"latencyMs":22222.0095840001},{"questionId":"q87","format":"json-compact","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":3101,"outputTokens":10763,"latencyMs":52359.5263749999},{"questionId":"q87","format":"toon","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":2364,"outputTokens":8655,"latencyMs":43057.93550000014},{"questionId":"q87","format":"csv","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":2278,"outputTokens":8486,"latencyMs":40806.75987499999},{"questionId":"q87","format":"xml","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":5496,"outputTokens":6805,"latencyMs":32607.476540999953},{"questionId":"q87","format":"yaml","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":3882,"outputTokens":14007,"latencyMs":69418.0892080001},{"questionId":"q88","format":"json-pretty","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":4853,"outputTokens":3753,"latencyMs":17887.077249999857},{"questionId":"q88","format":"json-compact","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":3101,"outputTokens":4513,"latencyMs":21889.44045799994},{"questionId":"q88","format":"toon","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":2364,"outputTokens":12240,"latencyMs":58832.42224999983},{"questionId":"q88","format":"csv","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":2278,"outputTokens":5194,"latencyMs":26775.547833000077},{"questionId":"q88","format":"xml","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":5496,"outputTokens":4922,"latencyMs":25243.463334000204},{"questionId":"q88","format":"yaml","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":3882,"outputTokens":5731,"latencyMs":29080.502792000072},{"questionId":"q89","format":"json-pretty","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":4854,"outputTokens":5849,"latencyMs":26516.438708999893},{"questionId":"q89","format":"json-compact","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":3102,"outputTokens":16173,"latencyMs":77878.715875},{"questionId":"q89","format":"toon","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":2365,"outputTokens":12760,"latencyMs":61007.31995799998},{"questionId":"q89","format":"csv","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":2279,"outputTokens":3727,"latencyMs":18021.589042000007},{"questionId":"q89","format":"xml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":5497,"outputTokens":13472,"latencyMs":64319.46916700015},{"questionId":"q89","format":"yaml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":3883,"outputTokens":10570,"latencyMs":50606.915917000035},{"questionId":"q90","format":"json-pretty","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":4854,"outputTokens":9662,"latencyMs":43878.04604199994},{"questionId":"q90","format":"json-compact","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":3102,"outputTokens":11573,"latencyMs":56116.81316699996},{"questionId":"q90","format":"toon","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":2365,"outputTokens":8494,"latencyMs":41614.19895899994},{"questionId":"q90","format":"csv","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":2279,"outputTokens":13149,"latencyMs":61570.56495800009},{"questionId":"q90","format":"xml","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":5497,"outputTokens":12169,"latencyMs":57587.416958000045},{"questionId":"q90","format":"yaml","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":3883,"outputTokens":8472,"latencyMs":41254.44212500006},{"questionId":"q91","format":"json-pretty","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":4854,"outputTokens":8273,"latencyMs":37649.36825000006},{"questionId":"q91","format":"json-compact","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":3102,"outputTokens":13135,"latencyMs":62559.60337499995},{"questionId":"q91","format":"toon","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":2365,"outputTokens":12544,"latencyMs":60352.34270899999},{"questionId":"q91","format":"csv","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":2279,"outputTokens":8737,"latencyMs":40803.06487499992},{"questionId":"q91","format":"xml","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":5497,"outputTokens":10767,"latencyMs":52170.04133299994},{"questionId":"q91","format":"yaml","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":3883,"outputTokens":16281,"latencyMs":76947.36479100003},{"questionId":"q92","format":"json-pretty","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":4852,"outputTokens":6738,"latencyMs":30578.224417000078},{"questionId":"q92","format":"json-compact","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":3100,"outputTokens":8966,"latencyMs":42782.286208000034},{"questionId":"q92","format":"toon","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":2363,"outputTokens":11119,"latencyMs":53578.49137499998},{"questionId":"q92","format":"csv","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":2277,"outputTokens":8610,"latencyMs":41204.4491669999},{"questionId":"q92","format":"xml","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":5495,"outputTokens":12674,"latencyMs":59980.40558399982},{"questionId":"q92","format":"yaml","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":3881,"outputTokens":5423,"latencyMs":27335.205374999903},{"questionId":"q93","format":"json-pretty","model":"gemini-3-flash-preview","expected":"9","actual":"9","isCorrect":true,"inputTokens":4852,"outputTokens":3484,"latencyMs":16504.15283300006},{"questionId":"q93","format":"json-compact","model":"gemini-3-flash-preview","expected":"9","actual":"9","isCorrect":true,"inputTokens":3100,"outputTokens":3330,"latencyMs":16408.33737500012},{"questionId":"q93","format":"toon","model":"gemini-3-flash-preview","expected":"9","actual":"9","isCorrect":true,"inputTokens":2363,"outputTokens":6409,"latencyMs":31941.1357499999},{"questionId":"q93","format":"csv","model":"gemini-3-flash-preview","expected":"9","actual":"9","isCorrect":true,"inputTokens":2277,"outputTokens":2998,"latencyMs":15110.609875000082},{"questionId":"q93","format":"xml","model":"gemini-3-flash-preview","expected":"9","actual":"9","isCorrect":true,"inputTokens":5495,"outputTokens":4221,"latencyMs":21624.425083999988},{"questionId":"q93","format":"yaml","model":"gemini-3-flash-preview","expected":"9","actual":"9","isCorrect":true,"inputTokens":3881,"outputTokens":5008,"latencyMs":25177.73974999995},{"questionId":"q94","format":"json-pretty","model":"gemini-3-flash-preview","expected":"31","actual":"31","isCorrect":true,"inputTokens":4854,"outputTokens":12810,"latencyMs":58066.875459},{"questionId":"q94","format":"json-compact","model":"gemini-3-flash-preview","expected":"31","actual":"31","isCorrect":true,"inputTokens":3102,"outputTokens":17452,"latencyMs":83096.03012500005},{"questionId":"q94","format":"toon","model":"gemini-3-flash-preview","expected":"31","actual":"31","isCorrect":true,"inputTokens":2365,"outputTokens":11515,"latencyMs":55915.19383300003},{"questionId":"q94","format":"csv","model":"gemini-3-flash-preview","expected":"31","actual":"31","isCorrect":true,"inputTokens":2279,"outputTokens":9584,"latencyMs":45677.51520899986},{"questionId":"q94","format":"xml","model":"gemini-3-flash-preview","expected":"31","actual":"31","isCorrect":true,"inputTokens":5497,"outputTokens":12458,"latencyMs":59882.34912499995},{"questionId":"q94","format":"yaml","model":"gemini-3-flash-preview","expected":"31","actual":"31","isCorrect":true,"inputTokens":3883,"outputTokens":10155,"latencyMs":49421.32391600008},{"questionId":"q95","format":"json-pretty","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":4854,"outputTokens":19415,"latencyMs":88079.928541},{"questionId":"q95","format":"json-compact","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":3102,"outputTokens":12142,"latencyMs":59118.61595800007},{"questionId":"q95","format":"toon","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":2365,"outputTokens":18314,"latencyMs":87911.21258299984},{"questionId":"q95","format":"csv","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":2279,"outputTokens":5962,"latencyMs":29477.809167},{"questionId":"q95","format":"xml","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":5497,"outputTokens":10896,"latencyMs":53725.84250000003},{"questionId":"q95","format":"yaml","model":"gemini-3-flash-preview","expected":"28","actual":"28","isCorrect":true,"inputTokens":3883,"outputTokens":9581,"latencyMs":47041.31004200014},{"questionId":"q96","format":"json-pretty","model":"gemini-3-flash-preview","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":20061,"outputTokens":368,"latencyMs":3275.9754170000087},{"questionId":"q96","format":"json-compact","model":"gemini-3-flash-preview","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":15099,"outputTokens":339,"latencyMs":2886.0949589998927},{"questionId":"q96","format":"toon","model":"gemini-3-flash-preview","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":12431,"outputTokens":587,"latencyMs":4363.1266250000335},{"questionId":"q96","format":"csv","model":"gemini-3-flash-preview","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":12278,"outputTokens":287,"latencyMs":3419.5997910001315},{"questionId":"q96","format":"xml","model":"gemini-3-flash-preview","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":21947,"outputTokens":364,"latencyMs":3803.6397500000894},{"questionId":"q96","format":"yaml","model":"gemini-3-flash-preview","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":17145,"outputTokens":128,"latencyMs":2026.3678749999963},{"questionId":"q97","format":"json-pretty","model":"gemini-3-flash-preview","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":20064,"outputTokens":210,"latencyMs":2619.5515419999138},{"questionId":"q97","format":"json-compact","model":"gemini-3-flash-preview","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":15102,"outputTokens":331,"latencyMs":2843.4305420001037},{"questionId":"q97","format":"toon","model":"gemini-3-flash-preview","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":12434,"outputTokens":833,"latencyMs":7584.43200000003},{"questionId":"q97","format":"csv","model":"gemini-3-flash-preview","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":12281,"outputTokens":590,"latencyMs":7030.619375000009},{"questionId":"q97","format":"xml","model":"gemini-3-flash-preview","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":21950,"outputTokens":377,"latencyMs":3602.929834000068},{"questionId":"q97","format":"yaml","model":"gemini-3-flash-preview","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":17148,"outputTokens":297,"latencyMs":3195.7054999999236},{"questionId":"q98","format":"json-pretty","model":"gemini-3-flash-preview","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":20058,"outputTokens":331,"latencyMs":3507.5251670000143},{"questionId":"q98","format":"json-compact","model":"gemini-3-flash-preview","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":15096,"outputTokens":279,"latencyMs":2730.177417000057},{"questionId":"q98","format":"toon","model":"gemini-3-flash-preview","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":12428,"outputTokens":487,"latencyMs":3778.1009579999372},{"questionId":"q98","format":"csv","model":"gemini-3-flash-preview","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":12275,"outputTokens":559,"latencyMs":7012.587832999881},{"questionId":"q98","format":"xml","model":"gemini-3-flash-preview","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":21944,"outputTokens":362,"latencyMs":3486.3610419998877},{"questionId":"q98","format":"yaml","model":"gemini-3-flash-preview","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":17142,"outputTokens":308,"latencyMs":3335.2515829999465},{"questionId":"q99","format":"json-pretty","model":"gemini-3-flash-preview","expected":"master","actual":"master","isCorrect":true,"inputTokens":20065,"outputTokens":279,"latencyMs":3803.778750000056},{"questionId":"q99","format":"json-compact","model":"gemini-3-flash-preview","expected":"master","actual":"master","isCorrect":true,"inputTokens":15103,"outputTokens":269,"latencyMs":2556.105082999915},{"questionId":"q99","format":"toon","model":"gemini-3-flash-preview","expected":"master","actual":"master","isCorrect":true,"inputTokens":12435,"outputTokens":530,"latencyMs":4303.937750000041},{"questionId":"q99","format":"csv","model":"gemini-3-flash-preview","expected":"master","actual":"master","isCorrect":true,"inputTokens":12282,"outputTokens":489,"latencyMs":5632.182875000173},{"questionId":"q99","format":"xml","model":"gemini-3-flash-preview","expected":"master","actual":"master","isCorrect":true,"inputTokens":21951,"outputTokens":437,"latencyMs":4391.142166000092},{"questionId":"q99","format":"yaml","model":"gemini-3-flash-preview","expected":"master","actual":"master","isCorrect":true,"inputTokens":17149,"outputTokens":305,"latencyMs":2996.1447920000646},{"questionId":"q100","format":"json-pretty","model":"gemini-3-flash-preview","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":20057,"outputTokens":295,"latencyMs":3074.485832999926},{"questionId":"q100","format":"json-compact","model":"gemini-3-flash-preview","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":15095,"outputTokens":251,"latencyMs":2522.9678330000024},{"questionId":"q100","format":"toon","model":"gemini-3-flash-preview","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":12427,"outputTokens":458,"latencyMs":4044.0953329999465},{"questionId":"q100","format":"csv","model":"gemini-3-flash-preview","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":12274,"outputTokens":449,"latencyMs":4740.3539999998175},{"questionId":"q100","format":"xml","model":"gemini-3-flash-preview","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":21943,"outputTokens":347,"latencyMs":5268.900499999989},{"questionId":"q100","format":"yaml","model":"gemini-3-flash-preview","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":17141,"outputTokens":302,"latencyMs":3375.2723749999423},{"questionId":"q101","format":"json-pretty","model":"gemini-3-flash-preview","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":20063,"outputTokens":382,"latencyMs":3648.6807500000577},{"questionId":"q101","format":"json-compact","model":"gemini-3-flash-preview","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":15101,"outputTokens":372,"latencyMs":3123.4450840000063},{"questionId":"q101","format":"toon","model":"gemini-3-flash-preview","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":12433,"outputTokens":549,"latencyMs":3993.329832999967},{"questionId":"q101","format":"csv","model":"gemini-3-flash-preview","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":12280,"outputTokens":553,"latencyMs":6161.550374999875},{"questionId":"q101","format":"xml","model":"gemini-3-flash-preview","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":21949,"outputTokens":378,"latencyMs":3448.810958000133},{"questionId":"q101","format":"yaml","model":"gemini-3-flash-preview","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":17147,"outputTokens":312,"latencyMs":3196.9216660000384},{"questionId":"q102","format":"json-pretty","model":"gemini-3-flash-preview","expected":"678","actual":"678","isCorrect":true,"inputTokens":20063,"outputTokens":304,"latencyMs":2753.285584000172},{"questionId":"q102","format":"json-compact","model":"gemini-3-flash-preview","expected":"678","actual":"678","isCorrect":true,"inputTokens":15101,"outputTokens":278,"latencyMs":3056.6362079998944},{"questionId":"q102","format":"toon","model":"gemini-3-flash-preview","expected":"678","actual":"678","isCorrect":true,"inputTokens":12433,"outputTokens":836,"latencyMs":5912.967791999923},{"questionId":"q102","format":"csv","model":"gemini-3-flash-preview","expected":"678","actual":"678","isCorrect":true,"inputTokens":12280,"outputTokens":742,"latencyMs":6940.559041000204},{"questionId":"q102","format":"xml","model":"gemini-3-flash-preview","expected":"678","actual":"678","isCorrect":true,"inputTokens":21949,"outputTokens":357,"latencyMs":3420.518582999939},{"questionId":"q102","format":"yaml","model":"gemini-3-flash-preview","expected":"678","actual":"678","isCorrect":true,"inputTokens":17147,"outputTokens":286,"latencyMs":3228.524791999953},{"questionId":"q103","format":"json-pretty","model":"gemini-3-flash-preview","expected":"main","actual":"main","isCorrect":true,"inputTokens":20059,"outputTokens":151,"latencyMs":2331.234625000041},{"questionId":"q103","format":"json-compact","model":"gemini-3-flash-preview","expected":"main","actual":"main","isCorrect":true,"inputTokens":15097,"outputTokens":310,"latencyMs":2850.6913749999367},{"questionId":"q103","format":"toon","model":"gemini-3-flash-preview","expected":"main","actual":"main","isCorrect":true,"inputTokens":12429,"outputTokens":399,"latencyMs":3272.5516659999266},{"questionId":"q103","format":"csv","model":"gemini-3-flash-preview","expected":"main","actual":"main","isCorrect":true,"inputTokens":12276,"outputTokens":477,"latencyMs":5130.917875000043},{"questionId":"q103","format":"xml","model":"gemini-3-flash-preview","expected":"main","actual":"main","isCorrect":true,"inputTokens":21945,"outputTokens":339,"latencyMs":3651.871207999997},{"questionId":"q103","format":"yaml","model":"gemini-3-flash-preview","expected":"main","actual":"main","isCorrect":true,"inputTokens":17143,"outputTokens":113,"latencyMs":4187.869750000071},{"questionId":"q104","format":"json-pretty","model":"gemini-3-flash-preview","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":20066,"outputTokens":414,"latencyMs":3960.3870000001043},{"questionId":"q104","format":"json-compact","model":"gemini-3-flash-preview","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":15104,"outputTokens":327,"latencyMs":3003.0286250000354},{"questionId":"q104","format":"toon","model":"gemini-3-flash-preview","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":12436,"outputTokens":1157,"latencyMs":8071.643667000113},{"questionId":"q104","format":"csv","model":"gemini-3-flash-preview","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":12283,"outputTokens":497,"latencyMs":5497.376291999826},{"questionId":"q104","format":"xml","model":"gemini-3-flash-preview","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":21952,"outputTokens":389,"latencyMs":4018.265083000064},{"questionId":"q104","format":"yaml","model":"gemini-3-flash-preview","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":17150,"outputTokens":304,"latencyMs":3488.251042000018},{"questionId":"q105","format":"json-pretty","model":"gemini-3-flash-preview","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":20061,"outputTokens":307,"latencyMs":3335.819958999986},{"questionId":"q105","format":"json-compact","model":"gemini-3-flash-preview","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":15099,"outputTokens":279,"latencyMs":2880.8569579999894},{"questionId":"q105","format":"toon","model":"gemini-3-flash-preview","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":12431,"outputTokens":663,"latencyMs":5555.758832999971},{"questionId":"q105","format":"csv","model":"gemini-3-flash-preview","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":12278,"outputTokens":526,"latencyMs":5690.138708000071},{"questionId":"q105","format":"xml","model":"gemini-3-flash-preview","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":21947,"outputTokens":166,"latencyMs":2661.0854580001906},{"questionId":"q105","format":"yaml","model":"gemini-3-flash-preview","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":17145,"outputTokens":280,"latencyMs":3149.7653339998797},{"questionId":"q106","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":20064,"outputTokens":355,"latencyMs":3686.7114580001216},{"questionId":"q106","format":"json-compact","model":"gemini-3-flash-preview","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":15102,"outputTokens":375,"latencyMs":3059.123250000179},{"questionId":"q106","format":"toon","model":"gemini-3-flash-preview","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":12434,"outputTokens":1062,"latencyMs":6691.106749999803},{"questionId":"q106","format":"csv","model":"gemini-3-flash-preview","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":12281,"outputTokens":459,"latencyMs":5283.2788749998435},{"questionId":"q106","format":"xml","model":"gemini-3-flash-preview","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":21950,"outputTokens":344,"latencyMs":3704.6703340001404},{"questionId":"q106","format":"yaml","model":"gemini-3-flash-preview","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":17148,"outputTokens":363,"latencyMs":3662.9946250000503},{"questionId":"q107","format":"json-pretty","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":20056,"outputTokens":1089,"latencyMs":6864.773415999953},{"questionId":"q107","format":"json-compact","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":15094,"outputTokens":1526,"latencyMs":9207.031332999934},{"questionId":"q107","format":"toon","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":12426,"outputTokens":1359,"latencyMs":9062.027125000022},{"questionId":"q107","format":"csv","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":12273,"outputTokens":1806,"latencyMs":14685.348665999947},{"questionId":"q107","format":"xml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":21942,"outputTokens":1976,"latencyMs":13114.445250000106},{"questionId":"q107","format":"yaml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":17140,"outputTokens":1208,"latencyMs":8724.333208000055},{"questionId":"q108","format":"json-pretty","model":"gemini-3-flash-preview","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":20059,"outputTokens":5726,"latencyMs":31383.031709000003},{"questionId":"q108","format":"json-compact","model":"gemini-3-flash-preview","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":15097,"outputTokens":15927,"latencyMs":83352.50908400002},{"questionId":"q108","format":"toon","model":"gemini-3-flash-preview","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":12429,"outputTokens":14642,"latencyMs":76793.0145419999},{"questionId":"q108","format":"csv","model":"gemini-3-flash-preview","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":12276,"outputTokens":15582,"latencyMs":119955.48891700013},{"questionId":"q108","format":"xml","model":"gemini-3-flash-preview","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":21945,"outputTokens":13882,"latencyMs":82885.28808300011},{"questionId":"q108","format":"yaml","model":"gemini-3-flash-preview","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":17143,"outputTokens":9705,"latencyMs":58469.82349999994},{"questionId":"q109","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":20059,"outputTokens":13535,"latencyMs":71802.53425000003},{"questionId":"q109","format":"json-compact","model":"gemini-3-flash-preview","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":15097,"outputTokens":15321,"latencyMs":79592.80083299987},{"questionId":"q109","format":"toon","model":"gemini-3-flash-preview","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":12429,"outputTokens":26445,"latencyMs":138743.61295900005},{"questionId":"q109","format":"csv","model":"gemini-3-flash-preview","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":12276,"outputTokens":13615,"latencyMs":100573.19325000001},{"questionId":"q109","format":"xml","model":"gemini-3-flash-preview","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":21945,"outputTokens":15448,"latencyMs":92402.44233300001},{"questionId":"q109","format":"yaml","model":"gemini-3-flash-preview","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":17143,"outputTokens":14660,"latencyMs":91427.13770799991},{"questionId":"q110","format":"json-pretty","model":"gemini-3-flash-preview","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":20058,"outputTokens":19797,"latencyMs":106845.49187499983},{"questionId":"q110","format":"json-compact","model":"gemini-3-flash-preview","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":15096,"outputTokens":7409,"latencyMs":39714.11279199994},{"questionId":"q110","format":"toon","model":"gemini-3-flash-preview","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":12428,"outputTokens":14679,"latencyMs":76982.10566700017},{"questionId":"q110","format":"csv","model":"gemini-3-flash-preview","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":12275,"outputTokens":23414,"latencyMs":174875.65070800018},{"questionId":"q110","format":"xml","model":"gemini-3-flash-preview","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":21944,"outputTokens":25851,"latencyMs":159788.21325000003},{"questionId":"q110","format":"yaml","model":"gemini-3-flash-preview","expected":"154136","actual":"154185.63","isCorrect":false,"inputTokens":17142,"outputTokens":10394,"latencyMs":62433.57787499996},{"questionId":"q111","format":"json-pretty","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":20060,"outputTokens":3877,"latencyMs":21225.32150000008},{"questionId":"q111","format":"json-compact","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":15098,"outputTokens":8925,"latencyMs":48984.84008300002},{"questionId":"q111","format":"toon","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":12430,"outputTokens":8459,"latencyMs":45477.47308299993},{"questionId":"q111","format":"csv","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":12277,"outputTokens":8758,"latencyMs":63605.94079099991},{"questionId":"q111","format":"xml","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":21946,"outputTokens":10094,"latencyMs":62024.501332999906},{"questionId":"q111","format":"yaml","model":"gemini-3-flash-preview","expected":"41","actual":"41","isCorrect":true,"inputTokens":17144,"outputTokens":5849,"latencyMs":35764.14533299999},{"questionId":"q112","format":"json-pretty","model":"gemini-3-flash-preview","expected":"53","actual":"53","isCorrect":true,"inputTokens":20060,"outputTokens":4375,"latencyMs":25708.430333000142},{"questionId":"q112","format":"json-compact","model":"gemini-3-flash-preview","expected":"53","actual":"55","isCorrect":false,"inputTokens":15098,"outputTokens":7221,"latencyMs":38571.22424999997},{"questionId":"q112","format":"toon","model":"gemini-3-flash-preview","expected":"53","actual":"53","isCorrect":true,"inputTokens":12430,"outputTokens":16826,"latencyMs":87263.43662499986},{"questionId":"q112","format":"csv","model":"gemini-3-flash-preview","expected":"53","actual":"53","isCorrect":true,"inputTokens":12277,"outputTokens":10953,"latencyMs":81493.85495900013},{"questionId":"q112","format":"xml","model":"gemini-3-flash-preview","expected":"53","actual":"53","isCorrect":true,"inputTokens":21946,"outputTokens":7502,"latencyMs":46489.75708299992},{"questionId":"q112","format":"yaml","model":"gemini-3-flash-preview","expected":"53","actual":"53","isCorrect":true,"inputTokens":17144,"outputTokens":22521,"latencyMs":138532.84033400007},{"questionId":"q113","format":"json-pretty","model":"gemini-3-flash-preview","expected":"77","actual":"77","isCorrect":true,"inputTokens":20063,"outputTokens":3380,"latencyMs":19474.907166999998},{"questionId":"q113","format":"json-compact","model":"gemini-3-flash-preview","expected":"77","actual":"77","isCorrect":true,"inputTokens":15101,"outputTokens":9797,"latencyMs":51337.08504200005},{"questionId":"q113","format":"toon","model":"gemini-3-flash-preview","expected":"77","actual":"77","isCorrect":true,"inputTokens":12433,"outputTokens":5676,"latencyMs":30308.65554100019},{"questionId":"q113","format":"csv","model":"gemini-3-flash-preview","expected":"77","actual":"77","isCorrect":true,"inputTokens":12280,"outputTokens":8687,"latencyMs":64769.25004099985},{"questionId":"q113","format":"xml","model":"gemini-3-flash-preview","expected":"77","actual":"77","isCorrect":true,"inputTokens":21949,"outputTokens":10329,"latencyMs":64240.5879579999},{"questionId":"q113","format":"yaml","model":"gemini-3-flash-preview","expected":"77","actual":"77","isCorrect":true,"inputTokens":17147,"outputTokens":6667,"latencyMs":41567.80608300003},{"questionId":"q114","format":"json-pretty","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":20063,"outputTokens":1183,"latencyMs":8509.001875000307},{"questionId":"q114","format":"json-compact","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":15101,"outputTokens":1906,"latencyMs":11160.206208999734},{"questionId":"q114","format":"toon","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":12433,"outputTokens":3053,"latencyMs":16863.59800000023},{"questionId":"q114","format":"csv","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":12280,"outputTokens":4704,"latencyMs":37698.24920800002},{"questionId":"q114","format":"xml","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":21949,"outputTokens":2804,"latencyMs":17786.60029199999},{"questionId":"q114","format":"yaml","model":"gemini-3-flash-preview","expected":"37","actual":"37","isCorrect":true,"inputTokens":17147,"outputTokens":1307,"latencyMs":9145.139749999624},{"questionId":"q115","format":"json-pretty","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":20063,"outputTokens":1112,"latencyMs":7486.016334000044},{"questionId":"q115","format":"json-compact","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":15101,"outputTokens":1227,"latencyMs":8173.635000000242},{"questionId":"q115","format":"toon","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":12433,"outputTokens":1472,"latencyMs":11321.796833000146},{"questionId":"q115","format":"csv","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":12280,"outputTokens":1027,"latencyMs":8629.404999999795},{"questionId":"q115","format":"xml","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":21949,"outputTokens":1227,"latencyMs":8906.687082999852},{"questionId":"q115","format":"yaml","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":17147,"outputTokens":2133,"latencyMs":14624.23195900023},{"questionId":"q116","format":"json-pretty","model":"gemini-3-flash-preview","expected":"49","actual":"49","isCorrect":true,"inputTokens":20062,"outputTokens":18791,"latencyMs":96396.51483300002},{"questionId":"q116","format":"json-compact","model":"gemini-3-flash-preview","expected":"49","actual":"49","isCorrect":true,"inputTokens":15100,"outputTokens":22416,"latencyMs":115972.16949999984},{"questionId":"q116","format":"toon","model":"gemini-3-flash-preview","expected":"49","actual":"49","isCorrect":true,"inputTokens":12432,"outputTokens":16355,"latencyMs":84317.18258300005},{"questionId":"q116","format":"csv","model":"gemini-3-flash-preview","expected":"49","actual":"49","isCorrect":true,"inputTokens":12279,"outputTokens":16434,"latencyMs":121070.73441699985},{"questionId":"q116","format":"xml","model":"gemini-3-flash-preview","expected":"49","actual":"49","isCorrect":true,"inputTokens":21948,"outputTokens":10321,"latencyMs":62978.07237499999},{"questionId":"q116","format":"yaml","model":"gemini-3-flash-preview","expected":"49","actual":"49","isCorrect":true,"inputTokens":17146,"outputTokens":12161,"latencyMs":74617.54583299998},{"questionId":"q117","format":"json-pretty","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":20062,"outputTokens":4951,"latencyMs":26968.749374999665},{"questionId":"q117","format":"json-compact","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":15100,"outputTokens":6042,"latencyMs":31304.52966699982},{"questionId":"q117","format":"toon","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":12432,"outputTokens":15115,"latencyMs":81497.56237499975},{"questionId":"q117","format":"csv","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":12279,"outputTokens":14830,"latencyMs":114598.21562500019},{"questionId":"q117","format":"xml","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":21948,"outputTokens":5084,"latencyMs":31102.013125000056},{"questionId":"q117","format":"yaml","model":"gemini-3-flash-preview","expected":"23","actual":"23","isCorrect":true,"inputTokens":17146,"outputTokens":5562,"latencyMs":35271.17933299998},{"questionId":"q118","format":"json-pretty","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":20061,"outputTokens":2330,"latencyMs":15030.119249999989},{"questionId":"q118","format":"json-compact","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":15099,"outputTokens":2889,"latencyMs":16500.675624999683},{"questionId":"q118","format":"toon","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":12431,"outputTokens":3077,"latencyMs":16814.760708000045},{"questionId":"q118","format":"csv","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":12278,"outputTokens":2834,"latencyMs":23582.076708000153},{"questionId":"q118","format":"xml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":21947,"outputTokens":3361,"latencyMs":21489.94058299996},{"questionId":"q118","format":"yaml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":17145,"outputTokens":2525,"latencyMs":16160.734375},{"questionId":"q119","format":"json-pretty","model":"gemini-3-flash-preview","expected":"57","actual":"57","isCorrect":true,"inputTokens":20072,"outputTokens":10374,"latencyMs":54647.002125000115},{"questionId":"q119","format":"json-compact","model":"gemini-3-flash-preview","expected":"57","actual":"57","isCorrect":true,"inputTokens":15110,"outputTokens":14983,"latencyMs":78034.23604199989},{"questionId":"q119","format":"toon","model":"gemini-3-flash-preview","expected":"57","actual":"57","isCorrect":true,"inputTokens":12442,"outputTokens":16898,"latencyMs":88867.03208300006},{"questionId":"q119","format":"csv","model":"gemini-3-flash-preview","expected":"57","actual":"57","isCorrect":true,"inputTokens":12289,"outputTokens":19038,"latencyMs":146344.30866600014},{"questionId":"q119","format":"xml","model":"gemini-3-flash-preview","expected":"57","actual":"57","isCorrect":true,"inputTokens":21958,"outputTokens":12176,"latencyMs":72528.92975000013},{"questionId":"q119","format":"yaml","model":"gemini-3-flash-preview","expected":"57","actual":"57","isCorrect":true,"inputTokens":17156,"outputTokens":15805,"latencyMs":98779.64145899983},{"questionId":"q120","format":"json-pretty","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":20073,"outputTokens":15742,"latencyMs":83371.43599999975},{"questionId":"q120","format":"json-compact","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":15111,"outputTokens":19185,"latencyMs":100630.21837500017},{"questionId":"q120","format":"toon","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":12443,"outputTokens":19468,"latencyMs":101790.76791699976},{"questionId":"q120","format":"csv","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":12290,"outputTokens":20826,"latencyMs":158435.00495800003},{"questionId":"q120","format":"xml","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":21959,"outputTokens":9706,"latencyMs":58976.20620799996},{"questionId":"q120","format":"yaml","model":"gemini-3-flash-preview","expected":"43","actual":"43","isCorrect":true,"inputTokens":17157,"outputTokens":13271,"latencyMs":79840.96987499995},{"questionId":"q121","format":"json-pretty","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":20073,"outputTokens":10340,"latencyMs":54298.25204200018},{"questionId":"q121","format":"json-compact","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":15111,"outputTokens":6370,"latencyMs":34149.41604199959},{"questionId":"q121","format":"toon","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":12443,"outputTokens":13374,"latencyMs":70759.14558300003},{"questionId":"q121","format":"csv","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":12290,"outputTokens":10680,"latencyMs":79050.34208299965},{"questionId":"q121","format":"xml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":21959,"outputTokens":8704,"latencyMs":54347.43133299984},{"questionId":"q121","format":"yaml","model":"gemini-3-flash-preview","expected":"25","actual":"25","isCorrect":true,"inputTokens":17157,"outputTokens":3836,"latencyMs":24404.290165999904},{"questionId":"q122","format":"json-pretty","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":20072,"outputTokens":8597,"latencyMs":45838.55229200004},{"questionId":"q122","format":"json-compact","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":15110,"outputTokens":4847,"latencyMs":26016.524833000265},{"questionId":"q122","format":"toon","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":12442,"outputTokens":5150,"latencyMs":27610.3652499998},{"questionId":"q122","format":"csv","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":12289,"outputTokens":21062,"latencyMs":154955.62645799993},{"questionId":"q122","format":"xml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":21958,"outputTokens":6566,"latencyMs":40338.911374999676},{"questionId":"q122","format":"yaml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":17156,"outputTokens":4538,"latencyMs":27895.8099580002},{"questionId":"q123","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":20072,"outputTokens":4105,"latencyMs":22319.978999999817},{"questionId":"q123","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":15110,"outputTokens":10182,"latencyMs":52993.30112500023},{"questionId":"q123","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":12442,"outputTokens":18289,"latencyMs":96740.66679200018},{"questionId":"q123","format":"csv","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":12289,"outputTokens":4838,"latencyMs":35836.71829200024},{"questionId":"q123","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":21958,"outputTokens":2869,"latencyMs":18574.773374999873},{"questionId":"q123","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":17156,"outputTokens":1579,"latencyMs":10453.930792000145},{"questionId":"q124","format":"json-pretty","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":8608,"outputTokens":267,"latencyMs":2424.0212500002235},{"questionId":"q124","format":"json-compact","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":5935,"outputTokens":383,"latencyMs":2769.5240000002086},{"questionId":"q124","format":"toon","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":7062,"outputTokens":285,"latencyMs":3217.1704579996876},{"questionId":"q124","format":"xml","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":9431,"outputTokens":328,"latencyMs":2869.171916999854},{"questionId":"q124","format":"yaml","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":6993,"outputTokens":289,"latencyMs":2557.126457999926},{"questionId":"q125","format":"json-pretty","model":"gemini-3-flash-preview","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":8608,"outputTokens":318,"latencyMs":3223.1263749999925},{"questionId":"q125","format":"json-compact","model":"gemini-3-flash-preview","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":5935,"outputTokens":429,"latencyMs":3635.5987909999676},{"questionId":"q125","format":"toon","model":"gemini-3-flash-preview","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":7062,"outputTokens":477,"latencyMs":4300.350459000096},{"questionId":"q125","format":"xml","model":"gemini-3-flash-preview","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":9431,"outputTokens":339,"latencyMs":3100.7087920000777},{"questionId":"q125","format":"yaml","model":"gemini-3-flash-preview","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":6993,"outputTokens":383,"latencyMs":2880.1571249999106},{"questionId":"q126","format":"json-pretty","model":"gemini-3-flash-preview","expected":"297","actual":"297","isCorrect":true,"inputTokens":8609,"outputTokens":295,"latencyMs":2598.6074999999255},{"questionId":"q126","format":"json-compact","model":"gemini-3-flash-preview","expected":"297","actual":"297","isCorrect":true,"inputTokens":5936,"outputTokens":705,"latencyMs":4438.443750000093},{"questionId":"q126","format":"toon","model":"gemini-3-flash-preview","expected":"297","actual":"297","isCorrect":true,"inputTokens":7063,"outputTokens":242,"latencyMs":2333.717665999662},{"questionId":"q126","format":"xml","model":"gemini-3-flash-preview","expected":"297","actual":"297","isCorrect":true,"inputTokens":9432,"outputTokens":252,"latencyMs":2550.3124170000665},{"questionId":"q126","format":"yaml","model":"gemini-3-flash-preview","expected":"297","actual":"297","isCorrect":true,"inputTokens":6994,"outputTokens":281,"latencyMs":2307.0850829998963},{"questionId":"q127","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":8609,"outputTokens":231,"latencyMs":2298.9086250001565},{"questionId":"q127","format":"json-compact","model":"gemini-3-flash-preview","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":5936,"outputTokens":690,"latencyMs":3840.421374999918},{"questionId":"q127","format":"toon","model":"gemini-3-flash-preview","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":7063,"outputTokens":410,"latencyMs":3249.4291249997914},{"questionId":"q127","format":"xml","model":"gemini-3-flash-preview","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":9432,"outputTokens":268,"latencyMs":2453.096334000118},{"questionId":"q127","format":"yaml","model":"gemini-3-flash-preview","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":6994,"outputTokens":242,"latencyMs":2304.7999580004252},{"questionId":"q128","format":"json-pretty","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":8608,"outputTokens":349,"latencyMs":3285.871041000355},{"questionId":"q128","format":"json-compact","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":5935,"outputTokens":249,"latencyMs":2135.6634579999372},{"questionId":"q128","format":"toon","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":7062,"outputTokens":232,"latencyMs":2489.8922919998877},{"questionId":"q128","format":"xml","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":9431,"outputTokens":263,"latencyMs":2909.1205829996616},{"questionId":"q128","format":"yaml","model":"gemini-3-flash-preview","expected":"error","actual":"error","isCorrect":true,"inputTokens":6993,"outputTokens":353,"latencyMs":2890.1726660002023},{"questionId":"q129","format":"json-pretty","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":8608,"outputTokens":238,"latencyMs":2798.9996670000255},{"questionId":"q129","format":"json-compact","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":5935,"outputTokens":338,"latencyMs":2851.1660830001347},{"questionId":"q129","format":"toon","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":7062,"outputTokens":224,"latencyMs":2713.321124999784},{"questionId":"q129","format":"xml","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":9431,"outputTokens":167,"latencyMs":2212.3508750000037},{"questionId":"q129","format":"yaml","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":6993,"outputTokens":307,"latencyMs":2859.359375},{"questionId":"q130","format":"json-pretty","model":"gemini-3-flash-preview","expected":"298","actual":"298","isCorrect":true,"inputTokens":8609,"outputTokens":289,"latencyMs":3026.115749999881},{"questionId":"q130","format":"json-compact","model":"gemini-3-flash-preview","expected":"298","actual":"298","isCorrect":true,"inputTokens":5936,"outputTokens":183,"latencyMs":1973.9259999999776},{"questionId":"q130","format":"toon","model":"gemini-3-flash-preview","expected":"298","actual":"298","isCorrect":true,"inputTokens":7063,"outputTokens":394,"latencyMs":3659.440957999788},{"questionId":"q130","format":"xml","model":"gemini-3-flash-preview","expected":"298","actual":"298","isCorrect":true,"inputTokens":9432,"outputTokens":256,"latencyMs":3382.314666000195},{"questionId":"q130","format":"yaml","model":"gemini-3-flash-preview","expected":"298","actual":"298","isCorrect":true,"inputTokens":6994,"outputTokens":259,"latencyMs":2465.3808339997195},{"questionId":"q131","format":"json-pretty","model":"gemini-3-flash-preview","expected":"398","actual":"398","isCorrect":true,"inputTokens":8609,"outputTokens":286,"latencyMs":3041.4478339999914},{"questionId":"q131","format":"json-compact","model":"gemini-3-flash-preview","expected":"398","actual":"398","isCorrect":true,"inputTokens":5936,"outputTokens":325,"latencyMs":2796.949624999892},{"questionId":"q131","format":"toon","model":"gemini-3-flash-preview","expected":"398","actual":"398","isCorrect":true,"inputTokens":7063,"outputTokens":282,"latencyMs":2852.4671669998206},{"questionId":"q131","format":"xml","model":"gemini-3-flash-preview","expected":"398","actual":"398","isCorrect":true,"inputTokens":9432,"outputTokens":333,"latencyMs":3065.3007499999367},{"questionId":"q131","format":"yaml","model":"gemini-3-flash-preview","expected":"398","actual":"398","isCorrect":true,"inputTokens":6994,"outputTokens":297,"latencyMs":2544.2407920002006},{"questionId":"q132","format":"json-pretty","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":8608,"outputTokens":180,"latencyMs":2260.6082500000484},{"questionId":"q132","format":"json-compact","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":5935,"outputTokens":168,"latencyMs":1874.333333000075},{"questionId":"q132","format":"toon","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":7062,"outputTokens":188,"latencyMs":2354.53370800009},{"questionId":"q132","format":"xml","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":9431,"outputTokens":247,"latencyMs":2749.997084000148},{"questionId":"q132","format":"yaml","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":6993,"outputTokens":255,"latencyMs":2349.754457999952},{"questionId":"q133","format":"json-pretty","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":8608,"outputTokens":356,"latencyMs":3148.9514159997925},{"questionId":"q133","format":"json-compact","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":5935,"outputTokens":281,"latencyMs":2589.4035420003347},{"questionId":"q133","format":"toon","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":7062,"outputTokens":281,"latencyMs":2554.130750000011},{"questionId":"q133","format":"xml","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":9431,"outputTokens":258,"latencyMs":2656.17191700032},{"questionId":"q133","format":"yaml","model":"gemini-3-flash-preview","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":6993,"outputTokens":283,"latencyMs":2506.751500000246},{"questionId":"q134","format":"json-pretty","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":8583,"outputTokens":5157,"latencyMs":28712.846333000343},{"questionId":"q134","format":"json-compact","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":5910,"outputTokens":7487,"latencyMs":36580.23633399978},{"questionId":"q134","format":"toon","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":7037,"outputTokens":2525,"latencyMs":13479.526708999649},{"questionId":"q134","format":"xml","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":9406,"outputTokens":6391,"latencyMs":33424.45512499986},{"questionId":"q134","format":"yaml","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":6968,"outputTokens":10872,"latencyMs":48255.78658300033},{"questionId":"q135","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2665.00","actual":"2665","isCorrect":true,"inputTokens":8584,"outputTokens":11831,"latencyMs":64820.602624999825},{"questionId":"q135","format":"json-compact","model":"gemini-3-flash-preview","expected":"2665.00","actual":"2665","isCorrect":true,"inputTokens":5911,"outputTokens":10114,"latencyMs":49019.72883299971},{"questionId":"q135","format":"toon","model":"gemini-3-flash-preview","expected":"2665.00","actual":"2665","isCorrect":true,"inputTokens":7038,"outputTokens":43656,"latencyMs":199324.32404199988},{"questionId":"q135","format":"xml","model":"gemini-3-flash-preview","expected":"2665.00","actual":"2665","isCorrect":true,"inputTokens":9407,"outputTokens":10414,"latencyMs":54203.37504100008},{"questionId":"q135","format":"yaml","model":"gemini-3-flash-preview","expected":"2665.00","actual":"2665","isCorrect":true,"inputTokens":6969,"outputTokens":12477,"latencyMs":55254.101749999914},{"questionId":"q136","format":"json-pretty","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":8583,"outputTokens":18508,"latencyMs":100329.93887499999},{"questionId":"q136","format":"json-compact","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":5910,"outputTokens":14007,"latencyMs":67178.57879200019},{"questionId":"q136","format":"toon","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":7037,"outputTokens":14524,"latencyMs":69145.21383399982},{"questionId":"q136","format":"xml","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":9406,"outputTokens":15462,"latencyMs":82450.48729200009},{"questionId":"q136","format":"yaml","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":6968,"outputTokens":10848,"latencyMs":48664.41645799996},{"questionId":"q137","format":"json-pretty","model":"gemini-3-flash-preview","expected":"30","actual":"30","isCorrect":true,"inputTokens":8583,"outputTokens":5620,"latencyMs":30888.40512500005},{"questionId":"q137","format":"json-compact","model":"gemini-3-flash-preview","expected":"30","actual":"30","isCorrect":true,"inputTokens":5910,"outputTokens":7693,"latencyMs":36660.243540999945},{"questionId":"q137","format":"toon","model":"gemini-3-flash-preview","expected":"30","actual":"30","isCorrect":true,"inputTokens":7037,"outputTokens":2920,"latencyMs":15143.715332999825},{"questionId":"q137","format":"xml","model":"gemini-3-flash-preview","expected":"30","actual":"30","isCorrect":true,"inputTokens":9406,"outputTokens":8984,"latencyMs":46835.20883300016},{"questionId":"q137","format":"yaml","model":"gemini-3-flash-preview","expected":"30","actual":"30","isCorrect":true,"inputTokens":6968,"outputTokens":6174,"latencyMs":28497.19212500006},{"questionId":"q138","format":"json-pretty","model":"gemini-3-flash-preview","expected":"19","actual":"19","isCorrect":true,"inputTokens":8583,"outputTokens":9725,"latencyMs":54943.2827920001},{"questionId":"q138","format":"json-compact","model":"gemini-3-flash-preview","expected":"19","actual":"19","isCorrect":true,"inputTokens":5910,"outputTokens":8986,"latencyMs":43876.19408400031},{"questionId":"q138","format":"toon","model":"gemini-3-flash-preview","expected":"19","actual":"19","isCorrect":true,"inputTokens":7037,"outputTokens":6542,"latencyMs":31148.786208000034},{"questionId":"q138","format":"xml","model":"gemini-3-flash-preview","expected":"19","actual":"19","isCorrect":true,"inputTokens":9406,"outputTokens":16071,"latencyMs":82436.98754200013},{"questionId":"q138","format":"yaml","model":"gemini-3-flash-preview","expected":"19","actual":"19","isCorrect":true,"inputTokens":6968,"outputTokens":7150,"latencyMs":32330.752042000182},{"questionId":"q139","format":"json-pretty","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":8586,"outputTokens":5601,"latencyMs":32229.26049999986},{"questionId":"q139","format":"json-compact","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":5913,"outputTokens":11803,"latencyMs":57398.489041999914},{"questionId":"q139","format":"toon","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":7040,"outputTokens":4700,"latencyMs":23756.146958000027},{"questionId":"q139","format":"xml","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":9409,"outputTokens":6010,"latencyMs":31691.516667000018},{"questionId":"q139","format":"yaml","model":"gemini-3-flash-preview","expected":"16","actual":"16","isCorrect":true,"inputTokens":6971,"outputTokens":4894,"latencyMs":22449.997041999828},{"questionId":"q140","format":"json-pretty","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":8586,"outputTokens":3607,"latencyMs":21293.889042000286},{"questionId":"q140","format":"json-compact","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":5913,"outputTokens":7229,"latencyMs":35180.18087500008},{"questionId":"q140","format":"toon","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":7040,"outputTokens":4773,"latencyMs":24345.19016700005},{"questionId":"q140","format":"xml","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":9409,"outputTokens":5434,"latencyMs":28506.110791000072},{"questionId":"q140","format":"yaml","model":"gemini-3-flash-preview","expected":"13","actual":"13","isCorrect":true,"inputTokens":6971,"outputTokens":8644,"latencyMs":39661.89812499983},{"questionId":"q141","format":"json-pretty","model":"gemini-3-flash-preview","expected":"33","actual":"33","isCorrect":true,"inputTokens":8592,"outputTokens":14362,"latencyMs":81133.86466600001},{"questionId":"q141","format":"json-compact","model":"gemini-3-flash-preview","expected":"33","actual":"33","isCorrect":true,"inputTokens":5919,"outputTokens":14987,"latencyMs":104340.38850000035},{"questionId":"q141","format":"toon","model":"gemini-3-flash-preview","expected":"33","actual":"33","isCorrect":true,"inputTokens":7046,"outputTokens":14072,"latencyMs":69169.39199999999},{"questionId":"q141","format":"xml","model":"gemini-3-flash-preview","expected":"33","actual":"33","isCorrect":true,"inputTokens":9415,"outputTokens":15517,"latencyMs":80654.56216700003},{"questionId":"q141","format":"yaml","model":"gemini-3-flash-preview","expected":"33","actual":"33","isCorrect":true,"inputTokens":6977,"outputTokens":8356,"latencyMs":38407.33191600023},{"questionId":"q142","format":"json-pretty","model":"gemini-3-flash-preview","expected":"42","actual":"42","isCorrect":true,"inputTokens":8592,"outputTokens":7512,"latencyMs":42338.74445799971},{"questionId":"q142","format":"json-compact","model":"gemini-3-flash-preview","expected":"42","actual":"42","isCorrect":true,"inputTokens":5919,"outputTokens":14699,"latencyMs":102866.6781250001},{"questionId":"q142","format":"toon","model":"gemini-3-flash-preview","expected":"42","actual":"42","isCorrect":true,"inputTokens":7046,"outputTokens":10024,"latencyMs":48049.216624999885},{"questionId":"q142","format":"xml","model":"gemini-3-flash-preview","expected":"42","actual":"42","isCorrect":true,"inputTokens":9415,"outputTokens":4110,"latencyMs":22297.574332999997},{"questionId":"q142","format":"yaml","model":"gemini-3-flash-preview","expected":"42","actual":"42","isCorrect":true,"inputTokens":6977,"outputTokens":8176,"latencyMs":37879.22820900008},{"questionId":"q143","format":"json-pretty","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":8584,"outputTokens":8433,"latencyMs":48164.083540999796},{"questionId":"q143","format":"json-compact","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":5911,"outputTokens":9618,"latencyMs":67983.92641700013},{"questionId":"q143","format":"toon","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":7038,"outputTokens":16426,"latencyMs":76317.16970800003},{"questionId":"q143","format":"xml","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":9407,"outputTokens":8567,"latencyMs":44877.11583300028},{"questionId":"q143","format":"yaml","model":"gemini-3-flash-preview","expected":"24","actual":"24","isCorrect":true,"inputTokens":6969,"outputTokens":11861,"latencyMs":53283.976916999556},{"questionId":"q144","format":"json-pretty","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":8592,"outputTokens":12164,"latencyMs":68852.63249999983},{"questionId":"q144","format":"json-compact","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":5919,"outputTokens":11643,"latencyMs":56538.2484579999},{"questionId":"q144","format":"toon","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":7046,"outputTokens":5223,"latencyMs":27300.982749999966},{"questionId":"q144","format":"xml","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":9415,"outputTokens":10864,"latencyMs":56605.348084000405},{"questionId":"q144","format":"yaml","model":"gemini-3-flash-preview","expected":"26","actual":"26","isCorrect":true,"inputTokens":6977,"outputTokens":12708,"latencyMs":57055.8494160003},{"questionId":"q145","format":"json-pretty","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":8592,"outputTokens":5474,"latencyMs":31819.555292000063},{"questionId":"q145","format":"json-compact","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":5919,"outputTokens":6151,"latencyMs":30285.51412500022},{"questionId":"q145","format":"toon","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":7046,"outputTokens":5350,"latencyMs":25504.35120799998},{"questionId":"q145","format":"xml","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":9415,"outputTokens":6781,"latencyMs":34964.004333000164},{"questionId":"q145","format":"yaml","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":6977,"outputTokens":5966,"latencyMs":27963.87108300021},{"questionId":"q146","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":8595,"outputTokens":6499,"latencyMs":39847.565542000346},{"questionId":"q146","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":5922,"outputTokens":8538,"latencyMs":41626.66154199978},{"questionId":"q146","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":7049,"outputTokens":8894,"latencyMs":41383.13070899993},{"questionId":"q146","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":9418,"outputTokens":15584,"latencyMs":83139.89533400023},{"questionId":"q146","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":6980,"outputTokens":11573,"latencyMs":53524.50162500003},{"questionId":"q147","format":"json-pretty","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":8595,"outputTokens":5956,"latencyMs":34166.95324999979},{"questionId":"q147","format":"json-compact","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":5922,"outputTokens":2397,"latencyMs":12619.9861659999},{"questionId":"q147","format":"toon","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":7049,"outputTokens":9155,"latencyMs":43449.04179200018},{"questionId":"q147","format":"xml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":9418,"outputTokens":5531,"latencyMs":29418.45379099995},{"questionId":"q147","format":"yaml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":6980,"outputTokens":6409,"latencyMs":29086.21683300007},{"questionId":"q148","format":"json-pretty","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":8595,"outputTokens":7705,"latencyMs":43872.01258300012},{"questionId":"q148","format":"json-compact","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":5922,"outputTokens":7170,"latencyMs":35578.25637499988},{"questionId":"q148","format":"toon","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":7049,"outputTokens":9908,"latencyMs":46831.90787500003},{"questionId":"q148","format":"xml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":9418,"outputTokens":19837,"latencyMs":103368.64858300006},{"questionId":"q148","format":"yaml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":6980,"outputTokens":15097,"latencyMs":68301.71233400004},{"questionId":"q149","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":8591,"outputTokens":9884,"latencyMs":56434.583709000144},{"questionId":"q149","format":"json-compact","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":5918,"outputTokens":12809,"latencyMs":63979.966874999925},{"questionId":"q149","format":"toon","model":"gemini-3-flash-preview","expected":"2","actual":"Answer: 2","isCorrect":true,"inputTokens":7045,"outputTokens":8870,"latencyMs":40109.1437090002},{"questionId":"q149","format":"xml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":9414,"outputTokens":9366,"latencyMs":49063.199750000145},{"questionId":"q149","format":"yaml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":6976,"outputTokens":8560,"latencyMs":38944.46016700007},{"questionId":"q150","format":"json-pretty","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":8591,"outputTokens":10687,"latencyMs":60147.765959000215},{"questionId":"q150","format":"json-compact","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":5918,"outputTokens":21709,"latencyMs":106119.25466600014},{"questionId":"q150","format":"toon","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":7045,"outputTokens":12294,"latencyMs":59010.86845900025},{"questionId":"q150","format":"xml","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":9414,"outputTokens":5382,"latencyMs":28824.669832999818},{"questionId":"q150","format":"yaml","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":6976,"outputTokens":17566,"latencyMs":79004.90779099986},{"questionId":"q151","format":"json-pretty","model":"gemini-3-flash-preview","expected":"development","actual":"development","isCorrect":true,"inputTokens":1237,"outputTokens":94,"latencyMs":1356.483208999969},{"questionId":"q151","format":"json-compact","model":"gemini-3-flash-preview","expected":"development","actual":"development","isCorrect":true,"inputTokens":722,"outputTokens":178,"latencyMs":1744.4732080004178},{"questionId":"q151","format":"toon","model":"gemini-3-flash-preview","expected":"development","actual":"development","isCorrect":true,"inputTokens":863,"outputTokens":69,"latencyMs":1523.4334580004215},{"questionId":"q151","format":"xml","model":"gemini-3-flash-preview","expected":"development","actual":"development","isCorrect":true,"inputTokens":1296,"outputTokens":115,"latencyMs":1761.98837500019},{"questionId":"q151","format":"yaml","model":"gemini-3-flash-preview","expected":"development","actual":"development","isCorrect":true,"inputTokens":886,"outputTokens":159,"latencyMs":1813.217042000033},{"questionId":"q152","format":"json-pretty","model":"gemini-3-flash-preview","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":1235,"outputTokens":165,"latencyMs":1766.4899579999037},{"questionId":"q152","format":"json-compact","model":"gemini-3-flash-preview","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":720,"outputTokens":161,"latencyMs":2303.5573749998584},{"questionId":"q152","format":"toon","model":"gemini-3-flash-preview","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":861,"outputTokens":199,"latencyMs":1999.6343749999069},{"questionId":"q152","format":"xml","model":"gemini-3-flash-preview","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":1294,"outputTokens":344,"latencyMs":2810.8215000000782},{"questionId":"q152","format":"yaml","model":"gemini-3-flash-preview","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":884,"outputTokens":177,"latencyMs":4135.895332999993},{"questionId":"q153","format":"json-pretty","model":"gemini-3-flash-preview","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1235,"outputTokens":104,"latencyMs":1963.6168749998324},{"questionId":"q153","format":"json-compact","model":"gemini-3-flash-preview","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":720,"outputTokens":447,"latencyMs":2891.421833000146},{"questionId":"q153","format":"toon","model":"gemini-3-flash-preview","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":861,"outputTokens":225,"latencyMs":3080.291000000201},{"questionId":"q153","format":"xml","model":"gemini-3-flash-preview","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1294,"outputTokens":333,"latencyMs":3431.1725840000436},{"questionId":"q153","format":"yaml","model":"gemini-3-flash-preview","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":884,"outputTokens":299,"latencyMs":4925.9569580000825},{"questionId":"q154","format":"json-pretty","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":1237,"outputTokens":313,"latencyMs":2861.0692920000292},{"questionId":"q154","format":"json-compact","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":722,"outputTokens":265,"latencyMs":3006.8672500001267},{"questionId":"q154","format":"toon","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":863,"outputTokens":255,"latencyMs":3160.707832999993},{"questionId":"q154","format":"xml","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":1296,"outputTokens":243,"latencyMs":2785.681791000068},{"questionId":"q154","format":"yaml","model":"gemini-3-flash-preview","expected":"18","actual":"18","isCorrect":true,"inputTokens":886,"outputTokens":145,"latencyMs":1815.165542000439},{"questionId":"q155","format":"json-pretty","model":"gemini-3-flash-preview","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1235,"outputTokens":105,"latencyMs":3050.7756249997765},{"questionId":"q155","format":"json-compact","model":"gemini-3-flash-preview","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":720,"outputTokens":232,"latencyMs":2096.2955419998616},{"questionId":"q155","format":"toon","model":"gemini-3-flash-preview","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":861,"outputTokens":201,"latencyMs":2643.581917000003},{"questionId":"q155","format":"xml","model":"gemini-3-flash-preview","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1294,"outputTokens":180,"latencyMs":1802.0862079998478},{"questionId":"q155","format":"yaml","model":"gemini-3-flash-preview","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":884,"outputTokens":190,"latencyMs":3095.989082999993},{"questionId":"q156","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1237,"outputTokens":252,"latencyMs":2563.272583000362},{"questionId":"q156","format":"json-compact","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":722,"outputTokens":362,"latencyMs":3544.412500000093},{"questionId":"q156","format":"toon","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":863,"outputTokens":191,"latencyMs":1886.8387090000324},{"questionId":"q156","format":"xml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1296,"outputTokens":166,"latencyMs":2480.9494579997845},{"questionId":"q156","format":"yaml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":886,"outputTokens":195,"latencyMs":3016.8389580002986},{"questionId":"q157","format":"json-pretty","model":"gemini-3-flash-preview","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1237,"outputTokens":260,"latencyMs":3002.749042000156},{"questionId":"q157","format":"json-compact","model":"gemini-3-flash-preview","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":722,"outputTokens":184,"latencyMs":2796.544875000138},{"questionId":"q157","format":"toon","model":"gemini-3-flash-preview","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":863,"outputTokens":211,"latencyMs":2436.493083000183},{"questionId":"q157","format":"xml","model":"gemini-3-flash-preview","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1296,"outputTokens":264,"latencyMs":2561.323708000127},{"questionId":"q157","format":"yaml","model":"gemini-3-flash-preview","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":886,"outputTokens":216,"latencyMs":1830.2423749999143},{"questionId":"q158","format":"json-pretty","model":"gemini-3-flash-preview","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":1235,"outputTokens":130,"latencyMs":2081.5682910000905},{"questionId":"q158","format":"json-compact","model":"gemini-3-flash-preview","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":720,"outputTokens":296,"latencyMs":3103.874292000197},{"questionId":"q158","format":"toon","model":"gemini-3-flash-preview","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":861,"outputTokens":293,"latencyMs":2729.4198330002837},{"questionId":"q158","format":"xml","model":"gemini-3-flash-preview","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":1294,"outputTokens":152,"latencyMs":2323.6319999997504},{"questionId":"q158","format":"yaml","model":"gemini-3-flash-preview","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":884,"outputTokens":235,"latencyMs":2060.8165839998983},{"questionId":"q159","format":"json-pretty","model":"gemini-3-flash-preview","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1236,"outputTokens":227,"latencyMs":2162.90774999978},{"questionId":"q159","format":"json-compact","model":"gemini-3-flash-preview","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":721,"outputTokens":179,"latencyMs":2358.5181660000235},{"questionId":"q159","format":"toon","model":"gemini-3-flash-preview","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":862,"outputTokens":195,"latencyMs":2127.304125000257},{"questionId":"q159","format":"xml","model":"gemini-3-flash-preview","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1295,"outputTokens":225,"latencyMs":2038.2347920001484},{"questionId":"q159","format":"yaml","model":"gemini-3-flash-preview","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":885,"outputTokens":219,"latencyMs":2214.910457999911},{"questionId":"q160","format":"json-pretty","model":"gemini-3-flash-preview","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":1237,"outputTokens":91,"latencyMs":1939.7651250003837},{"questionId":"q160","format":"json-compact","model":"gemini-3-flash-preview","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":722,"outputTokens":167,"latencyMs":2750.511458000168},{"questionId":"q160","format":"toon","model":"gemini-3-flash-preview","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":863,"outputTokens":167,"latencyMs":1936.0097090001218},{"questionId":"q160","format":"xml","model":"gemini-3-flash-preview","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":1296,"outputTokens":246,"latencyMs":2371.70804100018},{"questionId":"q160","format":"yaml","model":"gemini-3-flash-preview","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":886,"outputTokens":446,"latencyMs":3801.999749999959},{"questionId":"q161","format":"json-pretty","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":1237,"outputTokens":285,"latencyMs":3205.5163340000436},{"questionId":"q161","format":"json-compact","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":722,"outputTokens":187,"latencyMs":3857.28429099964},{"questionId":"q161","format":"toon","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":863,"outputTokens":226,"latencyMs":2249.7077500000596},{"questionId":"q161","format":"xml","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":1296,"outputTokens":228,"latencyMs":2719.9549580002204},{"questionId":"q161","format":"yaml","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":886,"outputTokens":220,"latencyMs":2244.919209000189},{"questionId":"q162","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1237,"outputTokens":143,"latencyMs":2071.9482090002857},{"questionId":"q162","format":"json-compact","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":722,"outputTokens":249,"latencyMs":1951.5478330003098},{"questionId":"q162","format":"toon","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":863,"outputTokens":218,"latencyMs":3030.7930410001427},{"questionId":"q162","format":"xml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1296,"outputTokens":179,"latencyMs":1702.7638750001788},{"questionId":"q162","format":"yaml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":886,"outputTokens":112,"latencyMs":2511.13204200007},{"questionId":"q163","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1236,"outputTokens":182,"latencyMs":2075.898541999981},{"questionId":"q163","format":"json-compact","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":721,"outputTokens":429,"latencyMs":3667.821416999679},{"questionId":"q163","format":"toon","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":862,"outputTokens":231,"latencyMs":2555.316041999962},{"questionId":"q163","format":"xml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1295,"outputTokens":209,"latencyMs":2018.4738340000622},{"questionId":"q163","format":"yaml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":885,"outputTokens":186,"latencyMs":1863.105624999851},{"questionId":"q164","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1236,"outputTokens":105,"latencyMs":1381.3430409999564},{"questionId":"q164","format":"json-compact","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":721,"outputTokens":197,"latencyMs":2837.34924999997},{"questionId":"q164","format":"toon","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":862,"outputTokens":170,"latencyMs":1930.47120800009},{"questionId":"q164","format":"xml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":1295,"outputTokens":377,"latencyMs":3279.6485000001267},{"questionId":"q164","format":"yaml","model":"gemini-3-flash-preview","expected":"2","actual":"2","isCorrect":true,"inputTokens":885,"outputTokens":143,"latencyMs":1744.5412499997765},{"questionId":"q165","format":"json-pretty","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":1236,"outputTokens":216,"latencyMs":2265.6218750001863},{"questionId":"q165","format":"json-compact","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":721,"outputTokens":287,"latencyMs":2761.6886659995653},{"questionId":"q165","format":"toon","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":862,"outputTokens":247,"latencyMs":3142.6592079997063},{"questionId":"q165","format":"xml","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":1295,"outputTokens":436,"latencyMs":3082.7662080000155},{"questionId":"q165","format":"yaml","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":885,"outputTokens":233,"latencyMs":2283.4882499999367},{"questionId":"q166","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1240,"outputTokens":249,"latencyMs":2116.1106660002843},{"questionId":"q166","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":725,"outputTokens":377,"latencyMs":2812.1302919997834},{"questionId":"q166","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":866,"outputTokens":450,"latencyMs":3136.8454169998877},{"questionId":"q166","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1299,"outputTokens":441,"latencyMs":2868.9416249999776},{"questionId":"q166","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":889,"outputTokens":382,"latencyMs":3159.447166000027},{"questionId":"q167","format":"json-pretty","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":1236,"outputTokens":280,"latencyMs":2177.99850000022},{"questionId":"q167","format":"json-compact","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":721,"outputTokens":208,"latencyMs":3065.321750000119},{"questionId":"q167","format":"toon","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":862,"outputTokens":377,"latencyMs":3650.124165999703},{"questionId":"q167","format":"xml","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":1295,"outputTokens":223,"latencyMs":2429.2223749998957},{"questionId":"q167","format":"yaml","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":885,"outputTokens":229,"latencyMs":3948.2972499998286},{"questionId":"q168","format":"json-pretty","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":1238,"outputTokens":307,"latencyMs":2680.3013749998063},{"questionId":"q168","format":"json-compact","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":723,"outputTokens":304,"latencyMs":3679.1967079997994},{"questionId":"q168","format":"toon","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":864,"outputTokens":204,"latencyMs":3266.1833329997025},{"questionId":"q168","format":"xml","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":1297,"outputTokens":204,"latencyMs":3318.4111250001006},{"questionId":"q168","format":"yaml","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":887,"outputTokens":238,"latencyMs":2364.597916000057},{"questionId":"q169","format":"json-pretty","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":1240,"outputTokens":372,"latencyMs":3096.718915999867},{"questionId":"q169","format":"json-compact","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":725,"outputTokens":281,"latencyMs":2706.015875000041},{"questionId":"q169","format":"toon","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":866,"outputTokens":2807,"latencyMs":18313.180917000398},{"questionId":"q169","format":"xml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":1299,"outputTokens":1733,"latencyMs":10828.579791999888},{"questionId":"q169","format":"yaml","model":"gemini-3-flash-preview","expected":"8","actual":"8","isCorrect":true,"inputTokens":889,"outputTokens":707,"latencyMs":5018.436459000222},{"questionId":"q170","format":"json-pretty","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":1239,"outputTokens":392,"latencyMs":3230.3105409997515},{"questionId":"q170","format":"json-compact","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":724,"outputTokens":385,"latencyMs":4350.139124999754},{"questionId":"q170","format":"toon","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":865,"outputTokens":1904,"latencyMs":11660.972834000364},{"questionId":"q170","format":"xml","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":1298,"outputTokens":460,"latencyMs":3677.423874999862},{"questionId":"q170","format":"yaml","model":"gemini-3-flash-preview","expected":"5","actual":"5","isCorrect":true,"inputTokens":888,"outputTokens":289,"latencyMs":2875.7883750000037},{"questionId":"q171","format":"json-pretty","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":1241,"outputTokens":365,"latencyMs":3135.994249999989},{"questionId":"q171","format":"json-compact","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":726,"outputTokens":207,"latencyMs":2582.6173749999143},{"questionId":"q171","format":"toon","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":867,"outputTokens":300,"latencyMs":2855.8550419998355},{"questionId":"q171","format":"xml","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":1300,"outputTokens":605,"latencyMs":5532.035957999993},{"questionId":"q171","format":"yaml","model":"gemini-3-flash-preview","expected":"3","actual":"3","isCorrect":true,"inputTokens":890,"outputTokens":167,"latencyMs":1623.3711669999175},{"questionId":"q172","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1241,"outputTokens":256,"latencyMs":3167.6805420001037},{"questionId":"q172","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":726,"outputTokens":317,"latencyMs":3493.365625000093},{"questionId":"q172","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":867,"outputTokens":347,"latencyMs":3740.0589590002783},{"questionId":"q172","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1300,"outputTokens":356,"latencyMs":2963.488791999873},{"questionId":"q172","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":890,"outputTokens":316,"latencyMs":3080.344166999683},{"questionId":"q173","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1243,"outputTokens":192,"latencyMs":1708.4010409996845},{"questionId":"q173","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":728,"outputTokens":210,"latencyMs":2411.453290999867},{"questionId":"q173","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":869,"outputTokens":307,"latencyMs":2766.541458000429},{"questionId":"q173","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1302,"outputTokens":228,"latencyMs":3558.0790419997647},{"questionId":"q173","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":892,"outputTokens":233,"latencyMs":4017.5527500002645},{"questionId":"q174","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1239,"outputTokens":273,"latencyMs":2889.037250000052},{"questionId":"q174","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":724,"outputTokens":358,"latencyMs":2522.6696670004167},{"questionId":"q174","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":865,"outputTokens":283,"latencyMs":2806.383666999638},{"questionId":"q174","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1298,"outputTokens":325,"latencyMs":3174.8865000000224},{"questionId":"q174","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":888,"outputTokens":330,"latencyMs":4017.2965839998797},{"questionId":"q175","format":"json-pretty","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":1244,"outputTokens":370,"latencyMs":3330.693208000157},{"questionId":"q175","format":"json-compact","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":729,"outputTokens":475,"latencyMs":3981.611374999862},{"questionId":"q175","format":"toon","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":870,"outputTokens":12121,"latencyMs":64797.92758300016},{"questionId":"q175","format":"xml","model":"gemini-3-flash-preview","expected":"0","actual":"0","isCorrect":true,"inputTokens":1303,"outputTokens":7249,"latencyMs":40270.52766599972},{"questionId":"q175","format":"yaml","model":"gemini-3-flash-preview","expected":"0","actual":"Answer: 0","isCorrect":true,"inputTokens":893,"outputTokens":12428,"latencyMs":71397.56554099964},{"questionId":"q176","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1237,"outputTokens":255,"latencyMs":3975.222832999658},{"questionId":"q176","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":722,"outputTokens":322,"latencyMs":3168.289000000339},{"questionId":"q176","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":863,"outputTokens":470,"latencyMs":3961.8239999995567},{"questionId":"q176","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1296,"outputTokens":363,"latencyMs":2776.368209000211},{"questionId":"q176","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":886,"outputTokens":183,"latencyMs":2042.706125000026},{"questionId":"q177","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1247,"outputTokens":398,"latencyMs":3183.3096249997616},{"questionId":"q177","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":732,"outputTokens":601,"latencyMs":4003.3801659997553},{"questionId":"q177","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":873,"outputTokens":460,"latencyMs":3824.3355840002187},{"questionId":"q177","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1306,"outputTokens":335,"latencyMs":2670.33212500019},{"questionId":"q177","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":896,"outputTokens":509,"latencyMs":3931.6993750003166},{"questionId":"q178","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1240,"outputTokens":202,"latencyMs":2379.3624999998137},{"questionId":"q178","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":725,"outputTokens":343,"latencyMs":2364.520125000272},{"questionId":"q178","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":866,"outputTokens":360,"latencyMs":3147.515875000041},{"questionId":"q178","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1299,"outputTokens":560,"latencyMs":3410.046916999854},{"questionId":"q178","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":889,"outputTokens":251,"latencyMs":2061.3066670000553},{"questionId":"q179","format":"json-pretty","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1239,"outputTokens":264,"latencyMs":2578.9504999998026},{"questionId":"q179","format":"json-compact","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":724,"outputTokens":411,"latencyMs":3026.957417000085},{"questionId":"q179","format":"toon","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":865,"outputTokens":384,"latencyMs":3062.3282920001075},{"questionId":"q179","format":"xml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":1298,"outputTokens":1253,"latencyMs":8271.817874999717},{"questionId":"q179","format":"yaml","model":"gemini-3-flash-preview","expected":"1","actual":"1","isCorrect":true,"inputTokens":888,"outputTokens":1088,"latencyMs":6691.79045900004},{"questionId":"q180","format":"json-pretty","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":7952,"outputTokens":315,"latencyMs":3943.8354580001906},{"questionId":"q180","format":"json-compact","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":4742,"outputTokens":322,"latencyMs":3352.6187920002267},{"questionId":"q180","format":"toon","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":3385,"outputTokens":423,"latencyMs":3825.752249999903},{"questionId":"q180","format":"csv","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":3237,"outputTokens":348,"latencyMs":3906.314874999691},{"questionId":"q180","format":"xml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":9139,"outputTokens":292,"latencyMs":3257.018541999627},{"questionId":"q180","format":"yaml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":5792,"outputTokens":201,"latencyMs":2769.807792000007},{"questionId":"q181","format":"json-pretty","model":"gemini-3-flash-preview","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":7958,"outputTokens":197,"latencyMs":2067.750457999762},{"questionId":"q181","format":"json-compact","model":"gemini-3-flash-preview","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":4748,"outputTokens":298,"latencyMs":3136.904000000097},{"questionId":"q181","format":"toon","model":"gemini-3-flash-preview","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":3391,"outputTokens":162,"latencyMs":2419.802917000372},{"questionId":"q181","format":"csv","model":"gemini-3-flash-preview","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":3243,"outputTokens":238,"latencyMs":2899.084875000175},{"questionId":"q181","format":"xml","model":"gemini-3-flash-preview","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":9145,"outputTokens":338,"latencyMs":3334.7015829999},{"questionId":"q181","format":"yaml","model":"gemini-3-flash-preview","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":5798,"outputTokens":204,"latencyMs":2331.2722080000676},{"questionId":"q182","format":"json-pretty","model":"gemini-3-flash-preview","expected":"email","actual":"email","isCorrect":true,"inputTokens":7955,"outputTokens":186,"latencyMs":1860.572040999774},{"questionId":"q182","format":"json-compact","model":"gemini-3-flash-preview","expected":"email","actual":"email","isCorrect":true,"inputTokens":4745,"outputTokens":188,"latencyMs":2353.6782500003465},{"questionId":"q182","format":"toon","model":"gemini-3-flash-preview","expected":"email","actual":"email","isCorrect":true,"inputTokens":3388,"outputTokens":210,"latencyMs":2166.1137500000186},{"questionId":"q182","format":"csv","model":"gemini-3-flash-preview","expected":"email","actual":"email","isCorrect":true,"inputTokens":3240,"outputTokens":156,"latencyMs":2828.5353749999776},{"questionId":"q182","format":"xml","model":"gemini-3-flash-preview","expected":"email","actual":"email","isCorrect":true,"inputTokens":9142,"outputTokens":309,"latencyMs":3038.5298750000075},{"questionId":"q182","format":"yaml","model":"gemini-3-flash-preview","expected":"email","actual":"email","isCorrect":true,"inputTokens":5795,"outputTokens":191,"latencyMs":2307.4157910002396},{"questionId":"q183","format":"json-pretty","model":"gemini-3-flash-preview","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":7956,"outputTokens":255,"latencyMs":2322.740791999735},{"questionId":"q183","format":"json-compact","model":"gemini-3-flash-preview","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":4746,"outputTokens":322,"latencyMs":3037.849916000385},{"questionId":"q183","format":"toon","model":"gemini-3-flash-preview","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":3389,"outputTokens":447,"latencyMs":3962.5759999998845},{"questionId":"q183","format":"csv","model":"gemini-3-flash-preview","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":3241,"outputTokens":242,"latencyMs":3107.0507089998573},{"questionId":"q183","format":"xml","model":"gemini-3-flash-preview","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":9143,"outputTokens":345,"latencyMs":3499.3677920000628},{"questionId":"q183","format":"yaml","model":"gemini-3-flash-preview","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":5796,"outputTokens":275,"latencyMs":2694.6957910000347},{"questionId":"q184","format":"json-pretty","model":"gemini-3-flash-preview","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":7956,"outputTokens":225,"latencyMs":2593.3629159997217},{"questionId":"q184","format":"json-compact","model":"gemini-3-flash-preview","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":4746,"outputTokens":249,"latencyMs":3521.3388749998994},{"questionId":"q184","format":"toon","model":"gemini-3-flash-preview","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":3389,"outputTokens":290,"latencyMs":3582.416000000201},{"questionId":"q184","format":"csv","model":"gemini-3-flash-preview","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":3241,"outputTokens":303,"latencyMs":3177.1879170001484},{"questionId":"q184","format":"xml","model":"gemini-3-flash-preview","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":9143,"outputTokens":278,"latencyMs":3262.093291999772},{"questionId":"q184","format":"yaml","model":"gemini-3-flash-preview","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":5796,"outputTokens":201,"latencyMs":2818.5692920000292},{"questionId":"q185","format":"json-pretty","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":7953,"outputTokens":557,"latencyMs":3882.9571249997243},{"questionId":"q185","format":"json-compact","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":4743,"outputTokens":285,"latencyMs":3122.202416999731},{"questionId":"q185","format":"toon","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":3386,"outputTokens":244,"latencyMs":2954.8477909998037},{"questionId":"q185","format":"csv","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":3238,"outputTokens":405,"latencyMs":3678.0608749999665},{"questionId":"q185","format":"xml","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":9140,"outputTokens":307,"latencyMs":3485.0552500002086},{"questionId":"q185","format":"yaml","model":"gemini-3-flash-preview","expected":"7","actual":"7","isCorrect":true,"inputTokens":5793,"outputTokens":251,"latencyMs":2684.3431250001304},{"questionId":"q186","format":"json-pretty","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":14515,"outputTokens":384,"latencyMs":3508.5958750001155},{"questionId":"q186","format":"json-compact","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":8888,"outputTokens":372,"latencyMs":3151.7762500001118},{"questionId":"q186","format":"toon","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":9421,"outputTokens":249,"latencyMs":2743.1820829999633},{"questionId":"q186","format":"xml","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":16018,"outputTokens":748,"latencyMs":4888.485166999977},{"questionId":"q186","format":"yaml","model":"gemini-3-flash-preview","expected":"50","actual":"50","isCorrect":true,"inputTokens":10583,"outputTokens":710,"latencyMs":4790.436832999811},{"questionId":"q187","format":"json-pretty","model":"gemini-3-flash-preview","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":14524,"outputTokens":258,"latencyMs":2594.7981249997392},{"questionId":"q187","format":"json-compact","model":"gemini-3-flash-preview","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":8897,"outputTokens":310,"latencyMs":3308.189792000223},{"questionId":"q187","format":"toon","model":"gemini-3-flash-preview","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":9430,"outputTokens":631,"latencyMs":4832.389708000235},{"questionId":"q187","format":"xml","model":"gemini-3-flash-preview","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":16027,"outputTokens":4325,"latencyMs":25736.56224999996},{"questionId":"q187","format":"yaml","model":"gemini-3-flash-preview","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":10592,"outputTokens":1492,"latencyMs":9646.41858400032},{"questionId":"q188","format":"json-pretty","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":14520,"outputTokens":398,"latencyMs":3531.046916999854},{"questionId":"q188","format":"json-compact","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":8893,"outputTokens":368,"latencyMs":3435.5410830001347},{"questionId":"q188","format":"toon","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":9426,"outputTokens":764,"latencyMs":5684.054332999978},{"questionId":"q188","format":"xml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":16023,"outputTokens":700,"latencyMs":5889.028833999764},{"questionId":"q188","format":"yaml","model":"gemini-3-flash-preview","expected":"4","actual":"4","isCorrect":true,"inputTokens":10588,"outputTokens":365,"latencyMs":3188.390374999959},{"questionId":"q189","format":"json-pretty","model":"gemini-3-flash-preview","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":14524,"outputTokens":235,"latencyMs":2662.8358329995535},{"questionId":"q189","format":"json-compact","model":"gemini-3-flash-preview","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":8897,"outputTokens":467,"latencyMs":4048.8038749997504},{"questionId":"q189","format":"toon","model":"gemini-3-flash-preview","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":9430,"outputTokens":332,"latencyMs":3812.723042000085},{"questionId":"q189","format":"xml","model":"gemini-3-flash-preview","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":16027,"outputTokens":1380,"latencyMs":9444.909583},{"questionId":"q189","format":"yaml","model":"gemini-3-flash-preview","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":10592,"outputTokens":327,"latencyMs":3314.23037499981},{"questionId":"q190","format":"json-pretty","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":14519,"outputTokens":408,"latencyMs":3413.136832999997},{"questionId":"q190","format":"json-compact","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8892,"outputTokens":285,"latencyMs":2905.8485830002464},{"questionId":"q190","format":"toon","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":9425,"outputTokens":584,"latencyMs":5310.31254200032},{"questionId":"q190","format":"xml","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":16022,"outputTokens":403,"latencyMs":3149.7518750000745},{"questionId":"q190","format":"yaml","model":"gemini-3-flash-preview","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":10587,"outputTokens":400,"latencyMs":3436.1740419999696},{"questionId":"q191","format":"json-pretty","model":"gemini-3-flash-preview","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":14525,"outputTokens":470,"latencyMs":4506.274041000288},{"questionId":"q191","format":"json-compact","model":"gemini-3-flash-preview","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":8898,"outputTokens":442,"latencyMs":4879.77358300006},{"questionId":"q191","format":"toon","model":"gemini-3-flash-preview","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":9431,"outputTokens":238,"latencyMs":2592.435749999713},{"questionId":"q191","format":"xml","model":"gemini-3-flash-preview","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":16028,"outputTokens":317,"latencyMs":2828.340749999974},{"questionId":"q191","format":"yaml","model":"gemini-3-flash-preview","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":10593,"outputTokens":539,"latencyMs":4051.061791999731},{"questionId":"q192","format":"json-pretty","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":4842,"outputTokens":492,"latencyMs":3239.620291000232},{"questionId":"q192","format":"json-compact","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":3090,"outputTokens":1226,"latencyMs":7195.048208000138},{"questionId":"q192","format":"toon","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":2353,"outputTokens":1234,"latencyMs":7177.8317080000415},{"questionId":"q192","format":"csv","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":2267,"outputTokens":1411,"latencyMs":7650.357165999711},{"questionId":"q192","format":"xml","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":5485,"outputTokens":1266,"latencyMs":7596.692000000272},{"questionId":"q192","format":"yaml","model":"gemini-3-flash-preview","expected":"60","actual":"60","isCorrect":true,"inputTokens":3871,"outputTokens":1206,"latencyMs":6596.45620799996},{"questionId":"q193","format":"json-pretty","model":"gemini-3-flash-preview","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":4847,"outputTokens":209,"latencyMs":1896.434499999974},{"questionId":"q193","format":"json-compact","model":"gemini-3-flash-preview","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":3095,"outputTokens":258,"latencyMs":2658.780958000105},{"questionId":"q193","format":"toon","model":"gemini-3-flash-preview","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":2358,"outputTokens":197,"latencyMs":2764.855624999851},{"questionId":"q193","format":"csv","model":"gemini-3-flash-preview","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":2272,"outputTokens":454,"latencyMs":3435.9187500001863},{"questionId":"q193","format":"xml","model":"gemini-3-flash-preview","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":5490,"outputTokens":209,"latencyMs":3226.861624999903},{"questionId":"q193","format":"yaml","model":"gemini-3-flash-preview","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":3876,"outputTokens":190,"latencyMs":2388.0288749998435},{"questionId":"q194","format":"json-pretty","model":"gemini-3-flash-preview","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":4845,"outputTokens":166,"latencyMs":1974.8230409999378},{"questionId":"q194","format":"json-compact","model":"gemini-3-flash-preview","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":3093,"outputTokens":270,"latencyMs":2943.581833000295},{"questionId":"q194","format":"toon","model":"gemini-3-flash-preview","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":2356,"outputTokens":193,"latencyMs":2672.7335839997977},{"questionId":"q194","format":"csv","model":"gemini-3-flash-preview","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":2270,"outputTokens":253,"latencyMs":2323.008790999651},{"questionId":"q194","format":"xml","model":"gemini-3-flash-preview","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":5488,"outputTokens":270,"latencyMs":3329.949667000212},{"questionId":"q194","format":"yaml","model":"gemini-3-flash-preview","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":3874,"outputTokens":258,"latencyMs":2539.2681660000235},{"questionId":"q195","format":"json-pretty","model":"gemini-3-flash-preview","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":4846,"outputTokens":256,"latencyMs":1998.8394589996897},{"questionId":"q195","format":"json-compact","model":"gemini-3-flash-preview","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":3094,"outputTokens":364,"latencyMs":3745.7140000001527},{"questionId":"q195","format":"toon","model":"gemini-3-flash-preview","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":2357,"outputTokens":548,"latencyMs":3837.162624999881},{"questionId":"q195","format":"csv","model":"gemini-3-flash-preview","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":2271,"outputTokens":427,"latencyMs":3214.532333999872},{"questionId":"q195","format":"xml","model":"gemini-3-flash-preview","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":5489,"outputTokens":210,"latencyMs":2239.8655829997733},{"questionId":"q195","format":"yaml","model":"gemini-3-flash-preview","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":3875,"outputTokens":298,"latencyMs":2552.7652079998516},{"questionId":"q196","format":"json-pretty","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":4842,"outputTokens":191,"latencyMs":1745.297875000164},{"questionId":"q196","format":"json-compact","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":3090,"outputTokens":289,"latencyMs":3169.089292000048},{"questionId":"q196","format":"toon","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":2353,"outputTokens":245,"latencyMs":2550.201374999713},{"questionId":"q196","format":"csv","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":2267,"outputTokens":324,"latencyMs":2538.415124999825},{"questionId":"q196","format":"xml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":5485,"outputTokens":241,"latencyMs":2524.748583999928},{"questionId":"q196","format":"yaml","model":"gemini-3-flash-preview","expected":"6","actual":"6","isCorrect":true,"inputTokens":3871,"outputTokens":439,"latencyMs":3012.655792000238},{"questionId":"q197","format":"json-pretty","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":20056,"outputTokens":1118,"latencyMs":8078.058124999981},{"questionId":"q197","format":"json-compact","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":15094,"outputTokens":2900,"latencyMs":16296.94633299997},{"questionId":"q197","format":"toon","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":12426,"outputTokens":5207,"latencyMs":29083.650500000454},{"questionId":"q197","format":"csv","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":12273,"outputTokens":3685,"latencyMs":27621.482290999964},{"questionId":"q197","format":"xml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":21942,"outputTokens":1658,"latencyMs":12432.322666999884},{"questionId":"q197","format":"yaml","model":"gemini-3-flash-preview","expected":"100","actual":"100","isCorrect":true,"inputTokens":17140,"outputTokens":6480,"latencyMs":39716.9951249999},{"questionId":"q198","format":"json-pretty","model":"gemini-3-flash-preview","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":20062,"outputTokens":493,"latencyMs":6124.747957999818},{"questionId":"q198","format":"json-compact","model":"gemini-3-flash-preview","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":15100,"outputTokens":428,"latencyMs":3494.3070420003496},{"questionId":"q198","format":"toon","model":"gemini-3-flash-preview","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":12432,"outputTokens":341,"latencyMs":3434.7930000000633},{"questionId":"q198","format":"csv","model":"gemini-3-flash-preview","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":12279,"outputTokens":310,"latencyMs":3622.276000000071},{"questionId":"q198","format":"xml","model":"gemini-3-flash-preview","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":21948,"outputTokens":518,"latencyMs":5547.129416999873},{"questionId":"q198","format":"yaml","model":"gemini-3-flash-preview","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":17146,"outputTokens":362,"latencyMs":3922.9449999998324},{"questionId":"q199","format":"json-pretty","model":"gemini-3-flash-preview","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":20060,"outputTokens":429,"latencyMs":5872.227500000037},{"questionId":"q199","format":"json-compact","model":"gemini-3-flash-preview","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":15098,"outputTokens":401,"latencyMs":3150.725666999817},{"questionId":"q199","format":"toon","model":"gemini-3-flash-preview","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":12430,"outputTokens":274,"latencyMs":2704.2370000001974},{"questionId":"q199","format":"csv","model":"gemini-3-flash-preview","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":12277,"outputTokens":232,"latencyMs":3072.834290999919},{"questionId":"q199","format":"xml","model":"gemini-3-flash-preview","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":21946,"outputTokens":376,"latencyMs":3661.298082999885},{"questionId":"q199","format":"yaml","model":"gemini-3-flash-preview","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":17144,"outputTokens":398,"latencyMs":3777.305541999638},{"questionId":"q200","format":"json-pretty","model":"gemini-3-flash-preview","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":20060,"outputTokens":341,"latencyMs":3306.3240839997306},{"questionId":"q200","format":"json-compact","model":"gemini-3-flash-preview","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":15098,"outputTokens":245,"latencyMs":2386.4694590000436},{"questionId":"q200","format":"toon","model":"gemini-3-flash-preview","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":12430,"outputTokens":538,"latencyMs":4471.466667000204},{"questionId":"q200","format":"csv","model":"gemini-3-flash-preview","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":12277,"outputTokens":626,"latencyMs":6235.981041999999},{"questionId":"q200","format":"xml","model":"gemini-3-flash-preview","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":21946,"outputTokens":379,"latencyMs":4212.068833000027},{"questionId":"q200","format":"yaml","model":"gemini-3-flash-preview","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":17144,"outputTokens":408,"latencyMs":4292.555374999996},{"questionId":"q201","format":"json-pretty","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":20057,"outputTokens":526,"latencyMs":4570.304167000111},{"questionId":"q201","format":"json-compact","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":15095,"outputTokens":469,"latencyMs":4114.027958999854},{"questionId":"q201","format":"toon","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":12427,"outputTokens":483,"latencyMs":4718.540916999802},{"questionId":"q201","format":"csv","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":12274,"outputTokens":265,"latencyMs":3433.0660839998163},{"questionId":"q201","format":"xml","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":21943,"outputTokens":395,"latencyMs":3935.7870419998653},{"questionId":"q201","format":"yaml","model":"gemini-3-flash-preview","expected":"11","actual":"11","isCorrect":true,"inputTokens":17141,"outputTokens":569,"latencyMs":5149.430582999717},{"questionId":"q202","format":"json-pretty","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":8583,"outputTokens":3184,"latencyMs":18665.33362499997},{"questionId":"q202","format":"json-compact","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":5910,"outputTokens":4136,"latencyMs":20826.376708999742},{"questionId":"q202","format":"toon","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":7037,"outputTokens":4510,"latencyMs":21621.044499999844},{"questionId":"q202","format":"xml","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":9406,"outputTokens":6379,"latencyMs":34324.00137499999},{"questionId":"q202","format":"yaml","model":"gemini-3-flash-preview","expected":"75","actual":"75","isCorrect":true,"inputTokens":6968,"outputTokens":11381,"latencyMs":50759.27483400004},{"questionId":"q203","format":"json-pretty","model":"gemini-3-flash-preview","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":8593,"outputTokens":15868,"latencyMs":102603.057792},{"questionId":"q203","format":"json-compact","model":"gemini-3-flash-preview","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error,message,stack,retryable","isCorrect":false,"inputTokens":5920,"outputTokens":10808,"latencyMs":60124.26979199983},{"questionId":"q203","format":"toon","model":"gemini-3-flash-preview","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error,message,stack,retryable","isCorrect":false,"inputTokens":7047,"outputTokens":15092,"latencyMs":80030.22683299985},{"questionId":"q203","format":"xml","model":"gemini-3-flash-preview","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error,message,stack,retryable","isCorrect":false,"inputTokens":9416,"outputTokens":9526,"latencyMs":58497.83049999969},{"questionId":"q203","format":"yaml","model":"gemini-3-flash-preview","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":6978,"outputTokens":11767,"latencyMs":61055.251166999806},{"questionId":"q204","format":"json-pretty","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":8587,"outputTokens":207,"latencyMs":2769.122083000373},{"questionId":"q204","format":"json-compact","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":5914,"outputTokens":291,"latencyMs":2656.535166000016},{"questionId":"q204","format":"toon","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":7041,"outputTokens":382,"latencyMs":2987.1058339998126},{"questionId":"q204","format":"xml","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":9410,"outputTokens":529,"latencyMs":4111.459499999881},{"questionId":"q204","format":"yaml","model":"gemini-3-flash-preview","expected":"info","actual":"info","isCorrect":true,"inputTokens":6972,"outputTokens":356,"latencyMs":2771.627915999852},{"questionId":"q205","format":"json-pretty","model":"gemini-3-flash-preview","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1679,"outputTokens":364,"latencyMs":3299.07416700013},{"questionId":"q205","format":"json-compact","model":"gemini-3-flash-preview","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1027,"outputTokens":383,"latencyMs":4504.669333000202},{"questionId":"q205","format":"toon","model":"gemini-3-flash-preview","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":791,"outputTokens":5093,"latencyMs":26681.41149999993},{"questionId":"q205","format":"csv","model":"gemini-3-flash-preview","expected":"YES","actual":"NO","isCorrect":false,"inputTokens":740,"outputTokens":1047,"latencyMs":5415.330792000052},{"questionId":"q205","format":"xml","model":"gemini-3-flash-preview","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1904,"outputTokens":17764,"latencyMs":100895.7315410003},{"questionId":"q205","format":"yaml","model":"gemini-3-flash-preview","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1234,"outputTokens":200,"latencyMs":2292.195166000165},{"questionId":"q206","format":"json-pretty","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1439,"outputTokens":351,"latencyMs":3390.177709000185},{"questionId":"q206","format":"json-compact","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":883,"outputTokens":1294,"latencyMs":7823.7045840001665},{"questionId":"q206","format":"toon","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":689,"outputTokens":1630,"latencyMs":8854.233624999877},{"questionId":"q206","format":"csv","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":641,"outputTokens":1091,"latencyMs":6341.708249999676},{"questionId":"q206","format":"xml","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1628,"outputTokens":2333,"latencyMs":14825.134624999948},{"questionId":"q206","format":"yaml","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1059,"outputTokens":502,"latencyMs":4344.449874999933},{"questionId":"q207","format":"json-pretty","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1906,"outputTokens":10728,"latencyMs":60138.674999999814},{"questionId":"q207","format":"json-compact","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1158,"outputTokens":11149,"latencyMs":60800.22116599977},{"questionId":"q207","format":"toon","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":880,"outputTokens":14565,"latencyMs":73282.9954169998},{"questionId":"q207","format":"csv","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":826,"outputTokens":872,"latencyMs":5295.445292000193},{"questionId":"q207","format":"xml","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":2167,"outputTokens":2846,"latencyMs":18577.68866700027},{"questionId":"q207","format":"yaml","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1396,"outputTokens":1407,"latencyMs":9823.643208999652},{"questionId":"q208","format":"json-pretty","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1667,"outputTokens":747,"latencyMs":5737.518333999906},{"questionId":"q208","format":"json-compact","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1019,"outputTokens":2318,"latencyMs":12557.172875000164},{"questionId":"q208","format":"toon","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1252,"outputTokens":6575,"latencyMs":37929.264667000156},{"questionId":"q208","format":"csv","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":734,"outputTokens":1047,"latencyMs":5862.587791000027},{"questionId":"q208","format":"xml","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1891,"outputTokens":398,"latencyMs":3451.928125000093},{"questionId":"q208","format":"yaml","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1224,"outputTokens":586,"latencyMs":4121.9415830001235},{"questionId":"q209","format":"json-pretty","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1615,"outputTokens":370,"latencyMs":2777.636208000127},{"questionId":"q209","format":"json-compact","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":979,"outputTokens":1354,"latencyMs":7486.286458000075},{"questionId":"q209","format":"toon","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1210,"outputTokens":20177,"latencyMs":113554.60433300026},{"questionId":"q209","format":"csv","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":536,"outputTokens":1066,"latencyMs":5578.249165999703},{"questionId":"q209","format":"xml","model":"gemini-3-flash-preview","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1836,"outputTokens":16221,"latencyMs":89791.01670799963},{"questionId":"q209","format":"yaml","model":"gemini-3-flash-preview","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1182,"outputTokens":191,"latencyMs":1986.013666999992}] ================================================ FILE: benchmarks/results/accuracy/models/gpt-5-nano ================================================ [{"questionId":"q1","format":"json-pretty","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":6453,"outputTokens":72,"latencyMs":2972.5673339999994},{"questionId":"q1","format":"json-compact","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":4046,"outputTokens":136,"latencyMs":3134.6921249999996},{"questionId":"q1","format":"toon","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":2610,"outputTokens":136,"latencyMs":3895.96425},{"questionId":"q1","format":"csv","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":2444,"outputTokens":136,"latencyMs":3544.769292},{"questionId":"q1","format":"xml","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":7415,"outputTokens":264,"latencyMs":7031.435208000001},{"questionId":"q1","format":"yaml","model":"gpt-5-nano","expected":"56176","actual":"56176","isCorrect":true,"inputTokens":5073,"outputTokens":136,"latencyMs":3521.167166},{"questionId":"q2","format":"json-pretty","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":6453,"outputTokens":135,"latencyMs":3347.5196659999997},{"questionId":"q2","format":"json-compact","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":4046,"outputTokens":263,"latencyMs":5199.591958},{"questionId":"q2","format":"toon","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":2610,"outputTokens":199,"latencyMs":3885.8805},{"questionId":"q2","format":"csv","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":2444,"outputTokens":135,"latencyMs":3269.682833},{"questionId":"q2","format":"xml","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":7415,"outputTokens":199,"latencyMs":6971.545625000001},{"questionId":"q2","format":"yaml","model":"gpt-5-nano","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":5073,"outputTokens":135,"latencyMs":5154.9494159999995},{"questionId":"q3","format":"json-pretty","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":6455,"outputTokens":204,"latencyMs":3733.00875},{"questionId":"q3","format":"json-compact","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":4048,"outputTokens":268,"latencyMs":3525.1048329999994},{"questionId":"q3","format":"toon","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":2612,"outputTokens":268,"latencyMs":8655.205000000002},{"questionId":"q3","format":"csv","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":2446,"outputTokens":204,"latencyMs":6536.7831670000005},{"questionId":"q3","format":"xml","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":7417,"outputTokens":268,"latencyMs":5954.473},{"questionId":"q3","format":"yaml","model":"gpt-5-nano","expected":"lorenza.kunze@yahoo.com","actual":"lorenza.kunze@yahoo.com","isCorrect":true,"inputTokens":5075,"outputTokens":268,"latencyMs":3160.4033339999996},{"questionId":"q4","format":"json-pretty","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":6455,"outputTokens":199,"latencyMs":3313.826750000001},{"questionId":"q4","format":"json-compact","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":4048,"outputTokens":199,"latencyMs":3131.2316250000003},{"questionId":"q4","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2612,"outputTokens":199,"latencyMs":5442.334000000001},{"questionId":"q4","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2446,"outputTokens":199,"latencyMs":3414.6267500000013},{"questionId":"q4","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":7417,"outputTokens":327,"latencyMs":8402.244708},{"questionId":"q4","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":5075,"outputTokens":199,"latencyMs":3475.437167},{"questionId":"q5","format":"json-pretty","model":"gpt-5-nano","expected":"no","actual":"false","isCorrect":true,"inputTokens":6451,"outputTokens":263,"latencyMs":4907.695667},{"questionId":"q5","format":"json-compact","model":"gpt-5-nano","expected":"no","actual":"false","isCorrect":true,"inputTokens":4044,"outputTokens":327,"latencyMs":4696.913291999999},{"questionId":"q5","format":"toon","model":"gpt-5-nano","expected":"no","actual":"false","isCorrect":true,"inputTokens":2608,"outputTokens":391,"latencyMs":3964.1052500000005},{"questionId":"q5","format":"csv","model":"gpt-5-nano","expected":"no","actual":"0","isCorrect":true,"inputTokens":2442,"outputTokens":391,"latencyMs":4546.052125},{"questionId":"q5","format":"xml","model":"gpt-5-nano","expected":"no","actual":"false","isCorrect":true,"inputTokens":7413,"outputTokens":263,"latencyMs":12447.142749999999},{"questionId":"q5","format":"yaml","model":"gpt-5-nano","expected":"no","actual":"false","isCorrect":true,"inputTokens":5071,"outputTokens":327,"latencyMs":5445.827582999998},{"questionId":"q6","format":"json-pretty","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":6452,"outputTokens":200,"latencyMs":2964.3259579999994},{"questionId":"q6","format":"json-compact","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":4045,"outputTokens":136,"latencyMs":2476.4536669999998},{"questionId":"q6","format":"toon","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":2609,"outputTokens":264,"latencyMs":10081.144249999998},{"questionId":"q6","format":"csv","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":2443,"outputTokens":200,"latencyMs":3566.9233750000003},{"questionId":"q6","format":"xml","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":7414,"outputTokens":200,"latencyMs":3328.505791999998},{"questionId":"q6","format":"yaml","model":"gpt-5-nano","expected":"133081","actual":"133081","isCorrect":true,"inputTokens":5072,"outputTokens":136,"latencyMs":2627.9952919999996},{"questionId":"q7","format":"json-pretty","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":6453,"outputTokens":199,"latencyMs":6494.151333},{"questionId":"q7","format":"json-compact","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":4046,"outputTokens":327,"latencyMs":7962.695958},{"questionId":"q7","format":"toon","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":2610,"outputTokens":135,"latencyMs":3294.491083000001},{"questionId":"q7","format":"csv","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":2444,"outputTokens":71,"latencyMs":2967.147291999998},{"questionId":"q7","format":"xml","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":7415,"outputTokens":199,"latencyMs":5902.4375},{"questionId":"q7","format":"yaml","model":"gpt-5-nano","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":5073,"outputTokens":135,"latencyMs":2749.074292000001},{"questionId":"q8","format":"json-pretty","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":6454,"outputTokens":397,"latencyMs":6029.30125},{"questionId":"q8","format":"json-compact","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":4047,"outputTokens":525,"latencyMs":9383.724667000002},{"questionId":"q8","format":"toon","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":2611,"outputTokens":205,"latencyMs":6669.472833},{"questionId":"q8","format":"csv","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":2445,"outputTokens":269,"latencyMs":4525.544707999998},{"questionId":"q8","format":"xml","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":7416,"outputTokens":205,"latencyMs":3729.2334999999985},{"questionId":"q8","format":"yaml","model":"gpt-5-nano","expected":"delpha.russel@gmail.com","actual":"delpha.russel@gmail.com","isCorrect":true,"inputTokens":5074,"outputTokens":205,"latencyMs":5299.043415999997},{"questionId":"q9","format":"json-pretty","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":6456,"outputTokens":199,"latencyMs":8089.861666000001},{"questionId":"q9","format":"json-compact","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":4049,"outputTokens":135,"latencyMs":3481.7531249999993},{"questionId":"q9","format":"toon","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":2613,"outputTokens":519,"latencyMs":6562.223791000004},{"questionId":"q9","format":"csv","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":2447,"outputTokens":199,"latencyMs":6497.464292000004},{"questionId":"q9","format":"xml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":7418,"outputTokens":263,"latencyMs":4308.770542000006},{"questionId":"q9","format":"yaml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":5076,"outputTokens":455,"latencyMs":7165.029666999995},{"questionId":"q10","format":"json-pretty","model":"gpt-5-nano","expected":"yes","actual":"true","isCorrect":true,"inputTokens":6453,"outputTokens":199,"latencyMs":7847.269749999992},{"questionId":"q10","format":"json-compact","model":"gpt-5-nano","expected":"yes","actual":"true","isCorrect":true,"inputTokens":4046,"outputTokens":263,"latencyMs":5162.878791000003},{"questionId":"q10","format":"toon","model":"gpt-5-nano","expected":"yes","actual":"false","isCorrect":false,"inputTokens":2610,"outputTokens":3015,"latencyMs":44119.86958299999},{"questionId":"q10","format":"csv","model":"gpt-5-nano","expected":"yes","actual":"1","isCorrect":true,"inputTokens":2444,"outputTokens":711,"latencyMs":10732.124499999998},{"questionId":"q10","format":"xml","model":"gpt-5-nano","expected":"yes","actual":"true","isCorrect":true,"inputTokens":7415,"outputTokens":455,"latencyMs":7670.041000000005},{"questionId":"q10","format":"yaml","model":"gpt-5-nano","expected":"yes","actual":"false","isCorrect":false,"inputTokens":5073,"outputTokens":1671,"latencyMs":32276.611915999994},{"questionId":"q11","format":"json-pretty","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":6453,"outputTokens":200,"latencyMs":5021.2276249999995},{"questionId":"q11","format":"json-compact","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":4046,"outputTokens":136,"latencyMs":4285.859167000002},{"questionId":"q11","format":"toon","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":2610,"outputTokens":136,"latencyMs":2937.344834000003},{"questionId":"q11","format":"csv","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":2444,"outputTokens":136,"latencyMs":2420.909792000006},{"questionId":"q11","format":"xml","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":7415,"outputTokens":136,"latencyMs":2657.114041000008},{"questionId":"q11","format":"yaml","model":"gpt-5-nano","expected":"109064","actual":"109064","isCorrect":true,"inputTokens":5073,"outputTokens":136,"latencyMs":4141.019750000007},{"questionId":"q12","format":"json-pretty","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":6451,"outputTokens":327,"latencyMs":8326.443249999997},{"questionId":"q12","format":"json-compact","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":4044,"outputTokens":71,"latencyMs":1961.1611669999984},{"questionId":"q12","format":"toon","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":2608,"outputTokens":455,"latencyMs":9416.313375000012},{"questionId":"q12","format":"csv","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":2442,"outputTokens":199,"latencyMs":3679.7195409999986},{"questionId":"q12","format":"xml","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":7413,"outputTokens":391,"latencyMs":4654.398666000008},{"questionId":"q12","format":"yaml","model":"gpt-5-nano","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":5071,"outputTokens":263,"latencyMs":5227.314999999988},{"questionId":"q13","format":"json-pretty","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":6450,"outputTokens":583,"latencyMs":11891.486790999988},{"questionId":"q13","format":"json-compact","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":4043,"outputTokens":903,"latencyMs":11808.044291999991},{"questionId":"q13","format":"toon","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2607,"outputTokens":1031,"latencyMs":13869.627583000009},{"questionId":"q13","format":"csv","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2441,"outputTokens":1095,"latencyMs":11982.031124999994},{"questionId":"q13","format":"xml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":7412,"outputTokens":1159,"latencyMs":12268.117834000004},{"questionId":"q13","format":"yaml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":5070,"outputTokens":903,"latencyMs":14245.349500000011},{"questionId":"q14","format":"json-pretty","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":6450,"outputTokens":583,"latencyMs":10854.242750000005},{"questionId":"q14","format":"json-compact","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":4043,"outputTokens":1543,"latencyMs":14848.513416999995},{"questionId":"q14","format":"toon","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2607,"outputTokens":1351,"latencyMs":18436.125499999995},{"questionId":"q14","format":"csv","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2441,"outputTokens":583,"latencyMs":7873.872290999992},{"questionId":"q14","format":"xml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":7412,"outputTokens":1159,"latencyMs":16891.326750000007},{"questionId":"q14","format":"yaml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":5070,"outputTokens":775,"latencyMs":11653.730375},{"questionId":"q15","format":"json-pretty","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":6450,"outputTokens":711,"latencyMs":10938.143958},{"questionId":"q15","format":"json-compact","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":4043,"outputTokens":839,"latencyMs":9845.249500000005},{"questionId":"q15","format":"toon","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2607,"outputTokens":1351,"latencyMs":17360.869999999995},{"questionId":"q15","format":"csv","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":2441,"outputTokens":583,"latencyMs":6245.138082999998},{"questionId":"q15","format":"xml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":7412,"outputTokens":583,"latencyMs":6926.366333000013},{"questionId":"q15","format":"yaml","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":5070,"outputTokens":1159,"latencyMs":10682.860499999995},{"questionId":"q16","format":"json-pretty","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":6455,"outputTokens":2887,"latencyMs":26044.378417},{"questionId":"q16","format":"json-compact","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":4048,"outputTokens":2311,"latencyMs":20886.991708999994},{"questionId":"q16","format":"toon","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":2612,"outputTokens":3207,"latencyMs":38211.708791},{"questionId":"q16","format":"csv","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":2446,"outputTokens":3591,"latencyMs":34722.982124999995},{"questionId":"q16","format":"xml","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":7417,"outputTokens":2759,"latencyMs":27677.160040999996},{"questionId":"q16","format":"yaml","model":"gpt-5-nano","expected":"91","actual":"91","isCorrect":true,"inputTokens":5075,"outputTokens":2183,"latencyMs":21999.34112499999},{"questionId":"q17","format":"json-pretty","model":"gpt-5-nano","expected":"67","actual":"68","isCorrect":false,"inputTokens":6455,"outputTokens":2375,"latencyMs":24885.276625},{"questionId":"q17","format":"json-compact","model":"gpt-5-nano","expected":"67","actual":"67","isCorrect":true,"inputTokens":4048,"outputTokens":2567,"latencyMs":23865.78125},{"questionId":"q17","format":"toon","model":"gpt-5-nano","expected":"67","actual":"67","isCorrect":true,"inputTokens":2612,"outputTokens":3271,"latencyMs":33953.05562499999},{"questionId":"q17","format":"csv","model":"gpt-5-nano","expected":"67","actual":"66","isCorrect":false,"inputTokens":2446,"outputTokens":2695,"latencyMs":28272.904916999993},{"questionId":"q17","format":"xml","model":"gpt-5-nano","expected":"67","actual":"66","isCorrect":false,"inputTokens":7417,"outputTokens":2183,"latencyMs":20128.4455},{"questionId":"q17","format":"yaml","model":"gpt-5-nano","expected":"67","actual":"67","isCorrect":true,"inputTokens":5075,"outputTokens":2887,"latencyMs":27213.535542000012},{"questionId":"q18","format":"json-pretty","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":6455,"outputTokens":2119,"latencyMs":23143.38708300001},{"questionId":"q18","format":"json-compact","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":4048,"outputTokens":1735,"latencyMs":19879.81758399999},{"questionId":"q18","format":"toon","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":2612,"outputTokens":1159,"latencyMs":11657.12725000002},{"questionId":"q18","format":"csv","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":2446,"outputTokens":1607,"latencyMs":17132.650707999986},{"questionId":"q18","format":"xml","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":7417,"outputTokens":2119,"latencyMs":27134.673207999993},{"questionId":"q18","format":"yaml","model":"gpt-5-nano","expected":"41","actual":"42","isCorrect":false,"inputTokens":5075,"outputTokens":1671,"latencyMs":17106.441208000004},{"questionId":"q19","format":"json-pretty","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":6451,"outputTokens":135,"latencyMs":2293.6627090000256},{"questionId":"q19","format":"json-compact","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":4044,"outputTokens":135,"latencyMs":2780.268083999981},{"questionId":"q19","format":"toon","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":2608,"outputTokens":199,"latencyMs":4030.7550000000047},{"questionId":"q19","format":"csv","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":2442,"outputTokens":327,"latencyMs":4502.81479199999},{"questionId":"q19","format":"xml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":7413,"outputTokens":199,"latencyMs":5037.844874999981},{"questionId":"q19","format":"yaml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":5071,"outputTokens":199,"latencyMs":2760.7317500000063},{"questionId":"q20","format":"json-pretty","model":"gpt-5-nano","expected":"96503","actual":"96503.32","isCorrect":true,"inputTokens":6452,"outputTokens":4554,"latencyMs":44195.818792000005},{"questionId":"q20","format":"json-compact","model":"gpt-5-nano","expected":"96503","actual":"96493.32","isCorrect":false,"inputTokens":4045,"outputTokens":4362,"latencyMs":49358.45320799999},{"questionId":"q20","format":"toon","model":"gpt-5-nano","expected":"96503","actual":"96503.32","isCorrect":true,"inputTokens":2609,"outputTokens":7882,"latencyMs":80184.430708},{"questionId":"q20","format":"csv","model":"gpt-5-nano","expected":"96503","actual":"96503.32","isCorrect":true,"inputTokens":2443,"outputTokens":8778,"latencyMs":95858.013875},{"questionId":"q20","format":"xml","model":"gpt-5-nano","expected":"96503","actual":"97054.06","isCorrect":false,"inputTokens":7414,"outputTokens":4426,"latencyMs":53834.34679099999},{"questionId":"q20","format":"yaml","model":"gpt-5-nano","expected":"96503","actual":"96503.32","isCorrect":true,"inputTokens":5072,"outputTokens":6666,"latencyMs":84141.234291},{"questionId":"q21","format":"json-pretty","model":"gpt-5-nano","expected":"78","actual":"78","isCorrect":true,"inputTokens":6449,"outputTokens":1415,"latencyMs":16387.246041000006},{"questionId":"q21","format":"json-compact","model":"gpt-5-nano","expected":"78","actual":"78","isCorrect":true,"inputTokens":4042,"outputTokens":1671,"latencyMs":25604.649542},{"questionId":"q21","format":"toon","model":"gpt-5-nano","expected":"78","actual":"78","isCorrect":true,"inputTokens":2606,"outputTokens":1735,"latencyMs":26253.37858400002},{"questionId":"q21","format":"csv","model":"gpt-5-nano","expected":"78","actual":"80","isCorrect":false,"inputTokens":2440,"outputTokens":2439,"latencyMs":42881.88175},{"questionId":"q21","format":"xml","model":"gpt-5-nano","expected":"78","actual":"78","isCorrect":true,"inputTokens":7411,"outputTokens":1543,"latencyMs":18824.236875000002},{"questionId":"q21","format":"yaml","model":"gpt-5-nano","expected":"78","actual":"78","isCorrect":true,"inputTokens":5069,"outputTokens":1351,"latencyMs":19731.303249999997},{"questionId":"q22","format":"json-pretty","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":6449,"outputTokens":1031,"latencyMs":13784.215957999986},{"questionId":"q22","format":"json-compact","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":4042,"outputTokens":1223,"latencyMs":13702.792790999985},{"questionId":"q22","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2606,"outputTokens":1223,"latencyMs":14749.500458000024},{"questionId":"q22","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2440,"outputTokens":1927,"latencyMs":21055.402042},{"questionId":"q22","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":7411,"outputTokens":1031,"latencyMs":16298.56808300002},{"questionId":"q22","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":5069,"outputTokens":1031,"latencyMs":15148.553374999989},{"questionId":"q23","format":"json-pretty","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":6457,"outputTokens":1223,"latencyMs":23748.731792000006},{"questionId":"q23","format":"json-compact","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":4050,"outputTokens":1095,"latencyMs":13191.228082999995},{"questionId":"q23","format":"toon","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2614,"outputTokens":1799,"latencyMs":19708.061292},{"questionId":"q23","format":"csv","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2448,"outputTokens":1863,"latencyMs":17711.145375000022},{"questionId":"q23","format":"xml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":7419,"outputTokens":1287,"latencyMs":14610.536499999987},{"questionId":"q23","format":"yaml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":5077,"outputTokens":1927,"latencyMs":21769.996958999982},{"questionId":"q24","format":"json-pretty","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6457,"outputTokens":1159,"latencyMs":14361.063832999993},{"questionId":"q24","format":"json-compact","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":4050,"outputTokens":903,"latencyMs":10174.464332999982},{"questionId":"q24","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2614,"outputTokens":1415,"latencyMs":14791.934709000023},{"questionId":"q24","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2448,"outputTokens":903,"latencyMs":10505.627374999982},{"questionId":"q24","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":7419,"outputTokens":1415,"latencyMs":15141.258583000017},{"questionId":"q24","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5077,"outputTokens":2503,"latencyMs":25832.709875},{"questionId":"q25","format":"json-pretty","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6457,"outputTokens":1415,"latencyMs":17542.110833000013},{"questionId":"q25","format":"json-compact","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":4050,"outputTokens":1415,"latencyMs":15454.643125000002},{"questionId":"q25","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2614,"outputTokens":1671,"latencyMs":18372.684709000023},{"questionId":"q25","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2448,"outputTokens":1607,"latencyMs":18959.7825},{"questionId":"q25","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":7419,"outputTokens":1351,"latencyMs":17531.771584000002},{"questionId":"q25","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5077,"outputTokens":1863,"latencyMs":20752.042625000002},{"questionId":"q26","format":"json-pretty","model":"gpt-5-nano","expected":"12","actual":"11","isCorrect":false,"inputTokens":6457,"outputTokens":903,"latencyMs":11434.620958999993},{"questionId":"q26","format":"json-compact","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":4050,"outputTokens":1799,"latencyMs":26667.313249999977},{"questionId":"q26","format":"toon","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2614,"outputTokens":1543,"latencyMs":19438.584040999995},{"questionId":"q26","format":"csv","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2448,"outputTokens":1863,"latencyMs":24016.536166999984},{"questionId":"q26","format":"xml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":7419,"outputTokens":1031,"latencyMs":12378.093457999988},{"questionId":"q26","format":"yaml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":5077,"outputTokens":1735,"latencyMs":18425.30349999998},{"questionId":"q27","format":"json-pretty","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6457,"outputTokens":1031,"latencyMs":13011.211625000025},{"questionId":"q27","format":"json-compact","model":"gpt-5-nano","expected":"11","actual":"10","isCorrect":false,"inputTokens":4050,"outputTokens":1031,"latencyMs":12447.250166999991},{"questionId":"q27","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2614,"outputTokens":1799,"latencyMs":21169.804915999994},{"questionId":"q27","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2448,"outputTokens":1095,"latencyMs":13325.901125000004},{"questionId":"q27","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":7419,"outputTokens":1799,"latencyMs":20041.018916},{"questionId":"q27","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5077,"outputTokens":839,"latencyMs":9830.15854200002},{"questionId":"q28","format":"json-pretty","model":"gpt-5-nano","expected":"63","actual":"63","isCorrect":true,"inputTokens":6456,"outputTokens":2695,"latencyMs":34929.70891699998},{"questionId":"q28","format":"json-compact","model":"gpt-5-nano","expected":"63","actual":"63","isCorrect":true,"inputTokens":4049,"outputTokens":2695,"latencyMs":32068.38629199998},{"questionId":"q28","format":"toon","model":"gpt-5-nano","expected":"63","actual":"63","isCorrect":true,"inputTokens":2613,"outputTokens":3207,"latencyMs":35007.753916999965},{"questionId":"q28","format":"csv","model":"gpt-5-nano","expected":"63","actual":"64","isCorrect":false,"inputTokens":2447,"outputTokens":4295,"latencyMs":44454.070875000034},{"questionId":"q28","format":"xml","model":"gpt-5-nano","expected":"63","actual":"63","isCorrect":true,"inputTokens":7418,"outputTokens":4359,"latencyMs":53078.10720800003},{"questionId":"q28","format":"yaml","model":"gpt-5-nano","expected":"63","actual":"63","isCorrect":true,"inputTokens":5076,"outputTokens":5383,"latencyMs":62424.056374999986},{"questionId":"q29","format":"json-pretty","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":6456,"outputTokens":3527,"latencyMs":38803.08712500002},{"questionId":"q29","format":"json-compact","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":4049,"outputTokens":2631,"latencyMs":36088.47983299999},{"questionId":"q29","format":"toon","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":2613,"outputTokens":3463,"latencyMs":37541.076541999995},{"questionId":"q29","format":"csv","model":"gpt-5-nano","expected":"53","actual":"54","isCorrect":false,"inputTokens":2447,"outputTokens":4423,"latencyMs":47284.71529199998},{"questionId":"q29","format":"xml","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":7418,"outputTokens":2375,"latencyMs":26526.219709000026},{"questionId":"q29","format":"yaml","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":5076,"outputTokens":2183,"latencyMs":22403.027584000025},{"questionId":"q30","format":"json-pretty","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":6456,"outputTokens":1607,"latencyMs":17674.940834000008},{"questionId":"q30","format":"json-compact","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":4049,"outputTokens":3591,"latencyMs":36116.21895800001},{"questionId":"q30","format":"toon","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":2613,"outputTokens":3271,"latencyMs":28577.131834},{"questionId":"q30","format":"csv","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":2447,"outputTokens":3527,"latencyMs":35360.875459},{"questionId":"q30","format":"xml","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":7418,"outputTokens":2247,"latencyMs":22521.478082999995},{"questionId":"q30","format":"yaml","model":"gpt-5-nano","expected":"39","actual":"39","isCorrect":true,"inputTokens":5076,"outputTokens":2503,"latencyMs":25049.46987499995},{"questionId":"q31","format":"json-pretty","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6457,"outputTokens":2375,"latencyMs":23942.841625},{"questionId":"q31","format":"json-compact","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":4050,"outputTokens":1223,"latencyMs":32165.900916000013},{"questionId":"q31","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2614,"outputTokens":1351,"latencyMs":14162.266958999971},{"questionId":"q31","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2448,"outputTokens":1479,"latencyMs":17867.209082999965},{"questionId":"q31","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":7419,"outputTokens":1671,"latencyMs":16419.887457999983},{"questionId":"q31","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5077,"outputTokens":1735,"latencyMs":18486.571375},{"questionId":"q32","format":"json-pretty","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":6457,"outputTokens":1351,"latencyMs":13326.963042000018},{"questionId":"q32","format":"json-compact","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":4050,"outputTokens":1287,"latencyMs":12924.029000000039},{"questionId":"q32","format":"toon","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":2614,"outputTokens":1671,"latencyMs":68753.054917},{"questionId":"q32","format":"csv","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":2448,"outputTokens":1735,"latencyMs":20531.763833000034},{"questionId":"q32","format":"xml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":7419,"outputTokens":1479,"latencyMs":25654.030582999985},{"questionId":"q32","format":"yaml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":5077,"outputTokens":1159,"latencyMs":14334.90933299996},{"questionId":"q33","format":"json-pretty","model":"gpt-5-nano","expected":"15","actual":"16","isCorrect":false,"inputTokens":6457,"outputTokens":1991,"latencyMs":22980.818790999998},{"questionId":"q33","format":"json-compact","model":"gpt-5-nano","expected":"15","actual":"15","isCorrect":true,"inputTokens":4050,"outputTokens":1223,"latencyMs":14265.010167},{"questionId":"q33","format":"toon","model":"gpt-5-nano","expected":"15","actual":"15","isCorrect":true,"inputTokens":2614,"outputTokens":1223,"latencyMs":16021.799958000018},{"questionId":"q33","format":"csv","model":"gpt-5-nano","expected":"15","actual":"15","isCorrect":true,"inputTokens":2448,"outputTokens":1095,"latencyMs":12047.490749999997},{"questionId":"q33","format":"xml","model":"gpt-5-nano","expected":"15","actual":"15","isCorrect":true,"inputTokens":7419,"outputTokens":1735,"latencyMs":20477.510917000007},{"questionId":"q33","format":"yaml","model":"gpt-5-nano","expected":"15","actual":"15","isCorrect":true,"inputTokens":5077,"outputTokens":1159,"latencyMs":12986.920958000002},{"questionId":"q34","format":"json-pretty","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":6451,"outputTokens":647,"latencyMs":7786.604583000008},{"questionId":"q34","format":"json-compact","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":4044,"outputTokens":1159,"latencyMs":12812.286625000008},{"questionId":"q34","format":"toon","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2608,"outputTokens":1287,"latencyMs":13650.962291000003},{"questionId":"q34","format":"csv","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":2442,"outputTokens":1543,"latencyMs":22916.750333000033},{"questionId":"q34","format":"xml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":7413,"outputTokens":1735,"latencyMs":27190.99129199999},{"questionId":"q34","format":"yaml","model":"gpt-5-nano","expected":"12","actual":"12","isCorrect":true,"inputTokens":5071,"outputTokens":1223,"latencyMs":18113.423624999996},{"questionId":"q35","format":"json-pretty","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6451,"outputTokens":839,"latencyMs":14319.936749999993},{"questionId":"q35","format":"json-compact","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":4044,"outputTokens":1159,"latencyMs":19572.69550000003},{"questionId":"q35","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2608,"outputTokens":2183,"latencyMs":27670.17745899997},{"questionId":"q35","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":2442,"outputTokens":2055,"latencyMs":30155.347083},{"questionId":"q35","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":7413,"outputTokens":1735,"latencyMs":17707.77020899998},{"questionId":"q35","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5071,"outputTokens":1671,"latencyMs":17215.395207999973},{"questionId":"q36","format":"json-pretty","model":"gpt-5-nano","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":10807,"outputTokens":329,"latencyMs":7442.371583},{"questionId":"q36","format":"json-compact","model":"gpt-5-nano","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":6888,"outputTokens":329,"latencyMs":6396.415417000011},{"questionId":"q36","format":"toon","model":"gpt-5-nano","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":7325,"outputTokens":393,"latencyMs":4602.9237079999875},{"questionId":"q36","format":"xml","model":"gpt-5-nano","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":12115,"outputTokens":265,"latencyMs":4785.739166999992},{"questionId":"q36","format":"yaml","model":"gpt-5-nano","expected":"103.86","actual":"103.86","isCorrect":true,"inputTokens":8439,"outputTokens":137,"latencyMs":7061.959375000035},{"questionId":"q37","format":"json-pretty","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":10807,"outputTokens":328,"latencyMs":6893.168333999987},{"questionId":"q37","format":"json-compact","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":6888,"outputTokens":392,"latencyMs":5599.888457999972},{"questionId":"q37","format":"toon","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":7325,"outputTokens":136,"latencyMs":3737.4124590000138},{"questionId":"q37","format":"xml","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":12115,"outputTokens":200,"latencyMs":6961.4233330000425},{"questionId":"q37","format":"yaml","model":"gpt-5-nano","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":8439,"outputTokens":264,"latencyMs":5140.496583},{"questionId":"q38","format":"json-pretty","model":"gpt-5-nano","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":10807,"outputTokens":585,"latencyMs":7864.396957999968},{"questionId":"q38","format":"json-compact","model":"gpt-5-nano","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":6888,"outputTokens":521,"latencyMs":6421.59816600004},{"questionId":"q38","format":"toon","model":"gpt-5-nano","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":7325,"outputTokens":265,"latencyMs":3733.0714159999625},{"questionId":"q38","format":"xml","model":"gpt-5-nano","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":12115,"outputTokens":393,"latencyMs":9006.905709000013},{"questionId":"q38","format":"yaml","model":"gpt-5-nano","expected":"422.5","actual":"422.5","isCorrect":true,"inputTokens":8439,"outputTokens":329,"latencyMs":6850.975207999989},{"questionId":"q39","format":"json-pretty","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":10807,"outputTokens":391,"latencyMs":5922.136790999968},{"questionId":"q39","format":"json-compact","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":6888,"outputTokens":391,"latencyMs":5514.317334000021},{"questionId":"q39","format":"toon","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":7325,"outputTokens":199,"latencyMs":5819.365749999997},{"questionId":"q39","format":"xml","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":12115,"outputTokens":327,"latencyMs":4577.510000000009},{"questionId":"q39","format":"yaml","model":"gpt-5-nano","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":8439,"outputTokens":199,"latencyMs":3300.337999999989},{"questionId":"q40","format":"json-pretty","model":"gpt-5-nano","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":10807,"outputTokens":522,"latencyMs":10160.32945900003},{"questionId":"q40","format":"json-compact","model":"gpt-5-nano","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":6888,"outputTokens":586,"latencyMs":8726.273291999998},{"questionId":"q40","format":"toon","model":"gpt-5-nano","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":7325,"outputTokens":458,"latencyMs":7168.197749999992},{"questionId":"q40","format":"xml","model":"gpt-5-nano","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":12115,"outputTokens":330,"latencyMs":9622.921792000008},{"questionId":"q40","format":"yaml","model":"gpt-5-nano","expected":"1822.85","actual":"1822.85","isCorrect":true,"inputTokens":8439,"outputTokens":458,"latencyMs":12277.717749999953},{"questionId":"q41","format":"json-pretty","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":10807,"outputTokens":647,"latencyMs":10452.618916000007},{"questionId":"q41","format":"json-compact","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":6888,"outputTokens":327,"latencyMs":7993.501333000022},{"questionId":"q41","format":"toon","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":7325,"outputTokens":391,"latencyMs":6246.408166999987},{"questionId":"q41","format":"xml","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":12115,"outputTokens":327,"latencyMs":6159.920333000016},{"questionId":"q41","format":"yaml","model":"gpt-5-nano","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":8439,"outputTokens":135,"latencyMs":4936.892832999991},{"questionId":"q42","format":"json-pretty","model":"gpt-5-nano","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":10807,"outputTokens":330,"latencyMs":6513.42883400002},{"questionId":"q42","format":"json-compact","model":"gpt-5-nano","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":6888,"outputTokens":266,"latencyMs":5169.701290999947},{"questionId":"q42","format":"toon","model":"gpt-5-nano","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":7325,"outputTokens":394,"latencyMs":6359.4133749999455},{"questionId":"q42","format":"xml","model":"gpt-5-nano","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":12115,"outputTokens":458,"latencyMs":7373.879582999973},{"questionId":"q42","format":"yaml","model":"gpt-5-nano","expected":"1311.35","actual":"1311.35","isCorrect":true,"inputTokens":8439,"outputTokens":586,"latencyMs":9219.977708999999},{"questionId":"q43","format":"json-pretty","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":10807,"outputTokens":456,"latencyMs":6681.0621670000255},{"questionId":"q43","format":"json-compact","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":6888,"outputTokens":456,"latencyMs":6576.267416999966},{"questionId":"q43","format":"toon","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":7325,"outputTokens":328,"latencyMs":5022.24679200002},{"questionId":"q43","format":"xml","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":12115,"outputTokens":200,"latencyMs":3512.9928749999963},{"questionId":"q43","format":"yaml","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8439,"outputTokens":264,"latencyMs":5002.190166999993},{"questionId":"q44","format":"json-pretty","model":"gpt-5-nano","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":10808,"outputTokens":332,"latencyMs":4617.067666999996},{"questionId":"q44","format":"json-compact","model":"gpt-5-nano","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":6889,"outputTokens":780,"latencyMs":10020.49199999997},{"questionId":"q44","format":"toon","model":"gpt-5-nano","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":7326,"outputTokens":268,"latencyMs":7557.820707999985},{"questionId":"q44","format":"xml","model":"gpt-5-nano","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":12116,"outputTokens":588,"latencyMs":8139.475207999989},{"questionId":"q44","format":"yaml","model":"gpt-5-nano","expected":"Debbie O'Kon I","actual":"Debbie O'Kon I","isCorrect":true,"inputTokens":8440,"outputTokens":268,"latencyMs":4885.101999999955},{"questionId":"q45","format":"json-pretty","model":"gpt-5-nano","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":10808,"outputTokens":399,"latencyMs":5571.73229200003},{"questionId":"q45","format":"json-compact","model":"gpt-5-nano","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":6889,"outputTokens":655,"latencyMs":10517.11179200001},{"questionId":"q45","format":"toon","model":"gpt-5-nano","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":7326,"outputTokens":271,"latencyMs":4059.1732920000213},{"questionId":"q45","format":"xml","model":"gpt-5-nano","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":12116,"outputTokens":143,"latencyMs":3110.5762920000125},{"questionId":"q45","format":"yaml","model":"gpt-5-nano","expected":"demetris.hoeger-pollich@yahoo.com","actual":"demetris.hoeger-pollich@yahoo.com","isCorrect":true,"inputTokens":8440,"outputTokens":207,"latencyMs":3715.018374999985},{"questionId":"q46","format":"json-pretty","model":"gpt-5-nano","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":10808,"outputTokens":844,"latencyMs":10064.53433299996},{"questionId":"q46","format":"json-compact","model":"gpt-5-nano","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":6889,"outputTokens":716,"latencyMs":11513.566083000042},{"questionId":"q46","format":"toon","model":"gpt-5-nano","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":7326,"outputTokens":332,"latencyMs":4870.685041000019},{"questionId":"q46","format":"xml","model":"gpt-5-nano","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":12116,"outputTokens":396,"latencyMs":5502.409417000017},{"questionId":"q46","format":"yaml","model":"gpt-5-nano","expected":"2025-09-16","actual":"2025-09-16","isCorrect":true,"inputTokens":8440,"outputTokens":204,"latencyMs":3546.790500000003},{"questionId":"q47","format":"json-pretty","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":10807,"outputTokens":711,"latencyMs":8666.248917000019},{"questionId":"q47","format":"json-compact","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":6888,"outputTokens":391,"latencyMs":5154.15979200002},{"questionId":"q47","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":7325,"outputTokens":455,"latencyMs":6469.648125000007},{"questionId":"q47","format":"xml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":12115,"outputTokens":967,"latencyMs":12026.984790999966},{"questionId":"q47","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":8439,"outputTokens":327,"latencyMs":4457.898167000036},{"questionId":"q48","format":"json-pretty","model":"gpt-5-nano","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":10808,"outputTokens":266,"latencyMs":4677.685125000018},{"questionId":"q48","format":"json-compact","model":"gpt-5-nano","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":6889,"outputTokens":458,"latencyMs":7593.830499999982},{"questionId":"q48","format":"toon","model":"gpt-5-nano","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":7326,"outputTokens":330,"latencyMs":7164.58362499997},{"questionId":"q48","format":"xml","model":"gpt-5-nano","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":12116,"outputTokens":202,"latencyMs":4569.4382500000065},{"questionId":"q48","format":"yaml","model":"gpt-5-nano","expected":"Patty Senger","actual":"Patty Senger","isCorrect":true,"inputTokens":8440,"outputTokens":266,"latencyMs":8655.245208000008},{"questionId":"q49","format":"json-pretty","model":"gpt-5-nano","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":10808,"outputTokens":333,"latencyMs":9397.804249999986},{"questionId":"q49","format":"json-compact","model":"gpt-5-nano","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":6889,"outputTokens":525,"latencyMs":7369.637999999977},{"questionId":"q49","format":"toon","model":"gpt-5-nano","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":7326,"outputTokens":269,"latencyMs":7476.084625000018},{"questionId":"q49","format":"xml","model":"gpt-5-nano","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":12116,"outputTokens":205,"latencyMs":4457.102749999962},{"questionId":"q49","format":"yaml","model":"gpt-5-nano","expected":"viva.paucek@gmail.com","actual":"viva.paucek@gmail.com","isCorrect":true,"inputTokens":8440,"outputTokens":205,"latencyMs":3285.5180420000106},{"questionId":"q50","format":"json-pretty","model":"gpt-5-nano","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":10808,"outputTokens":332,"latencyMs":5102.7447909999755},{"questionId":"q50","format":"json-compact","model":"gpt-5-nano","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":6889,"outputTokens":652,"latencyMs":8489.679457999999},{"questionId":"q50","format":"toon","model":"gpt-5-nano","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":7326,"outputTokens":204,"latencyMs":7314.751374999993},{"questionId":"q50","format":"xml","model":"gpt-5-nano","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":12116,"outputTokens":332,"latencyMs":5297.356582999986},{"questionId":"q50","format":"yaml","model":"gpt-5-nano","expected":"2025-09-21","actual":"2025-09-21","isCorrect":true,"inputTokens":8440,"outputTokens":460,"latencyMs":5892.525124999986},{"questionId":"q51","format":"json-pretty","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":10807,"outputTokens":519,"latencyMs":7676.625582999957},{"questionId":"q51","format":"json-compact","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":6888,"outputTokens":711,"latencyMs":10736.315040999965},{"questionId":"q51","format":"toon","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":7325,"outputTokens":327,"latencyMs":7610.965416999999},{"questionId":"q51","format":"xml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":12115,"outputTokens":647,"latencyMs":9436.054707999981},{"questionId":"q51","format":"yaml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":8439,"outputTokens":583,"latencyMs":7257.893417000014},{"questionId":"q52","format":"json-pretty","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":10804,"outputTokens":1031,"latencyMs":12227.468916999991},{"questionId":"q52","format":"json-compact","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6885,"outputTokens":903,"latencyMs":10091.694916000008},{"questionId":"q52","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":7322,"outputTokens":839,"latencyMs":10802.154916999978},{"questionId":"q52","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":12112,"outputTokens":1287,"latencyMs":15510.80912499997},{"questionId":"q52","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":8436,"outputTokens":775,"latencyMs":11378.500208000012},{"questionId":"q53","format":"json-pretty","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":10804,"outputTokens":775,"latencyMs":8308.13866700005},{"questionId":"q53","format":"json-compact","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6885,"outputTokens":775,"latencyMs":9544.55587500002},{"questionId":"q53","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":7322,"outputTokens":775,"latencyMs":17279.684707999986},{"questionId":"q53","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":12112,"outputTokens":967,"latencyMs":9357.227749999962},{"questionId":"q53","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":8436,"outputTokens":647,"latencyMs":7357.085124999983},{"questionId":"q54","format":"json-pretty","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":10805,"outputTokens":903,"latencyMs":9983.735792000021},{"questionId":"q54","format":"json-compact","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6886,"outputTokens":775,"latencyMs":8389.489208000014},{"questionId":"q54","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":7323,"outputTokens":711,"latencyMs":8864.96325000003},{"questionId":"q54","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":12113,"outputTokens":903,"latencyMs":11110.859708000033},{"questionId":"q54","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":8437,"outputTokens":647,"latencyMs":9254.883999999962},{"questionId":"q55","format":"json-pretty","model":"gpt-5-nano","expected":"34904.81","actual":"34682.16","isCorrect":false,"inputTokens":10805,"outputTokens":3210,"latencyMs":35287.34470799996},{"questionId":"q55","format":"json-compact","model":"gpt-5-nano","expected":"34904.81","actual":"35004.81","isCorrect":false,"inputTokens":6886,"outputTokens":3082,"latencyMs":53411.28241600003},{"questionId":"q55","format":"toon","model":"gpt-5-nano","expected":"34904.81","actual":"34904.81","isCorrect":true,"inputTokens":7323,"outputTokens":5386,"latencyMs":65610.21349999995},{"questionId":"q55","format":"xml","model":"gpt-5-nano","expected":"34904.81","actual":"35004.81","isCorrect":false,"inputTokens":12113,"outputTokens":3082,"latencyMs":30146.02779099997},{"questionId":"q55","format":"yaml","model":"gpt-5-nano","expected":"34904.81","actual":"34904.81","isCorrect":true,"inputTokens":8437,"outputTokens":5194,"latencyMs":47443.21795800002},{"questionId":"q56","format":"json-pretty","model":"gpt-5-nano","expected":"698.10","actual":"698.10","isCorrect":true,"inputTokens":10803,"outputTokens":7433,"latencyMs":99668.25054200005},{"questionId":"q56","format":"json-compact","model":"gpt-5-nano","expected":"698.10","actual":"698.10","isCorrect":true,"inputTokens":6884,"outputTokens":2825,"latencyMs":29726.150667000038},{"questionId":"q56","format":"toon","model":"gpt-5-nano","expected":"698.10","actual":"698.0962","isCorrect":true,"inputTokens":7321,"outputTokens":3402,"latencyMs":29333.87345799996},{"questionId":"q56","format":"xml","model":"gpt-5-nano","expected":"698.10","actual":"698.10","isCorrect":true,"inputTokens":12111,"outputTokens":8713,"latencyMs":88680.83875000005},{"questionId":"q56","format":"yaml","model":"gpt-5-nano","expected":"698.10","actual":"700.10","isCorrect":false,"inputTokens":8435,"outputTokens":2185,"latencyMs":22504.17745899997},{"questionId":"q57","format":"json-pretty","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":10804,"outputTokens":263,"latencyMs":3635.718082999985},{"questionId":"q57","format":"json-compact","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":6885,"outputTokens":455,"latencyMs":6344.6660000000265},{"questionId":"q57","format":"toon","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":7322,"outputTokens":135,"latencyMs":2214.3594589999993},{"questionId":"q57","format":"xml","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":12112,"outputTokens":199,"latencyMs":3028.1589169999934},{"questionId":"q57","format":"yaml","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":8436,"outputTokens":263,"latencyMs":3726.436791999964},{"questionId":"q58","format":"json-pretty","model":"gpt-5-nano","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":10803,"outputTokens":1034,"latencyMs":17297.540125000058},{"questionId":"q58","format":"json-compact","model":"gpt-5-nano","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":6884,"outputTokens":1226,"latencyMs":15772.636416999972},{"questionId":"q58","format":"toon","model":"gpt-5-nano","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":7321,"outputTokens":714,"latencyMs":7838.149208000046},{"questionId":"q58","format":"xml","model":"gpt-5-nano","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":12111,"outputTokens":1418,"latencyMs":18521.650375000027},{"questionId":"q58","format":"yaml","model":"gpt-5-nano","expected":"2152.82","actual":"2152.82","isCorrect":true,"inputTokens":8435,"outputTokens":1546,"latencyMs":16359.941374999937},{"questionId":"q59","format":"json-pretty","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":10807,"outputTokens":1543,"latencyMs":27746.01329100004},{"questionId":"q59","format":"json-compact","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":6888,"outputTokens":1799,"latencyMs":21588.628625000012},{"questionId":"q59","format":"toon","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":7325,"outputTokens":1607,"latencyMs":22641.205915999948},{"questionId":"q59","format":"xml","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":12115,"outputTokens":2119,"latencyMs":21735.99774999998},{"questionId":"q59","format":"yaml","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":8439,"outputTokens":1415,"latencyMs":15331.604541999986},{"questionId":"q60","format":"json-pretty","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":10807,"outputTokens":1671,"latencyMs":23176.00108299998},{"questionId":"q60","format":"json-compact","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":6888,"outputTokens":1031,"latencyMs":11122.897291000001},{"questionId":"q60","format":"toon","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":7325,"outputTokens":2119,"latencyMs":44203.42183300003},{"questionId":"q60","format":"xml","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":12115,"outputTokens":1415,"latencyMs":21619.647499999963},{"questionId":"q60","format":"yaml","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":8439,"outputTokens":2055,"latencyMs":20646.457915999927},{"questionId":"q61","format":"json-pretty","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":10807,"outputTokens":1799,"latencyMs":22455.639375000028},{"questionId":"q61","format":"json-compact","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":6888,"outputTokens":1223,"latencyMs":12465.433750000084},{"questionId":"q61","format":"toon","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":7325,"outputTokens":1543,"latencyMs":17901.21987499995},{"questionId":"q61","format":"xml","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":12115,"outputTokens":1351,"latencyMs":21725.661124999984},{"questionId":"q61","format":"yaml","model":"gpt-5-nano","expected":"28","actual":"28","isCorrect":true,"inputTokens":8439,"outputTokens":1479,"latencyMs":14143.484124999959},{"questionId":"q62","format":"json-pretty","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":10811,"outputTokens":1095,"latencyMs":11719.594000000041},{"questionId":"q62","format":"json-compact","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":6892,"outputTokens":1543,"latencyMs":16026.440790999914},{"questionId":"q62","format":"toon","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":7329,"outputTokens":839,"latencyMs":8525.078959000064},{"questionId":"q62","format":"xml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":12119,"outputTokens":1031,"latencyMs":11283.568582999986},{"questionId":"q62","format":"yaml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":8443,"outputTokens":1159,"latencyMs":13248.422166000004},{"questionId":"q63","format":"json-pretty","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":10811,"outputTokens":903,"latencyMs":10624.811125000007},{"questionId":"q63","format":"json-compact","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":6892,"outputTokens":1287,"latencyMs":15027.778207999887},{"questionId":"q63","format":"toon","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":7329,"outputTokens":967,"latencyMs":10102.057166000013},{"questionId":"q63","format":"xml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":12119,"outputTokens":1223,"latencyMs":14080.474375000107},{"questionId":"q63","format":"yaml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":8443,"outputTokens":839,"latencyMs":10806.409125000006},{"questionId":"q64","format":"json-pretty","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":10812,"outputTokens":647,"latencyMs":8619.796208000043},{"questionId":"q64","format":"json-compact","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":6893,"outputTokens":1095,"latencyMs":11266.89533300011},{"questionId":"q64","format":"toon","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":7330,"outputTokens":903,"latencyMs":10153.941749999998},{"questionId":"q64","format":"xml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":12120,"outputTokens":903,"latencyMs":10022.941333999974},{"questionId":"q64","format":"yaml","model":"gpt-5-nano","expected":"10","actual":"10","isCorrect":true,"inputTokens":8444,"outputTokens":1031,"latencyMs":11607.239833},{"questionId":"q65","format":"json-pretty","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":10812,"outputTokens":839,"latencyMs":9680.83516700007},{"questionId":"q65","format":"json-compact","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":6893,"outputTokens":1351,"latencyMs":13172.463165999972},{"questionId":"q65","format":"toon","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":7330,"outputTokens":967,"latencyMs":13761.158374999999},{"questionId":"q65","format":"xml","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":12120,"outputTokens":775,"latencyMs":8579.024916999973},{"questionId":"q65","format":"yaml","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":8444,"outputTokens":967,"latencyMs":11227.277834000066},{"questionId":"q66","format":"json-pretty","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":10811,"outputTokens":1095,"latencyMs":11719.79470800003},{"questionId":"q66","format":"json-compact","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":6892,"outputTokens":1287,"latencyMs":12974.757499999949},{"questionId":"q66","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":7329,"outputTokens":1159,"latencyMs":12100.158374999999},{"questionId":"q66","format":"xml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":12119,"outputTokens":1607,"latencyMs":18089.76549999998},{"questionId":"q66","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":8443,"outputTokens":1351,"latencyMs":14901.317249999964},{"questionId":"q67","format":"json-pretty","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":10811,"outputTokens":967,"latencyMs":13135.226917000022},{"questionId":"q67","format":"json-compact","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":6892,"outputTokens":1479,"latencyMs":17517.77812500007},{"questionId":"q67","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":7329,"outputTokens":1159,"latencyMs":14243.380082999938},{"questionId":"q67","format":"xml","model":"gpt-5-nano","expected":"3","actual":"8","isCorrect":false,"inputTokens":12119,"outputTokens":4551,"latencyMs":60141.679957999964},{"questionId":"q67","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":8443,"outputTokens":1223,"latencyMs":14046.266375000007},{"questionId":"q68","format":"json-pretty","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":10812,"outputTokens":1095,"latencyMs":13080.296417000005},{"questionId":"q68","format":"json-compact","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":6893,"outputTokens":1095,"latencyMs":12948.556707999902},{"questionId":"q68","format":"toon","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":7330,"outputTokens":967,"latencyMs":11987.064000000013},{"questionId":"q68","format":"xml","model":"gpt-5-nano","expected":"5","actual":"4","isCorrect":false,"inputTokens":12120,"outputTokens":2375,"latencyMs":25660.169375000056},{"questionId":"q68","format":"yaml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":8444,"outputTokens":1095,"latencyMs":13157.490292000002},{"questionId":"q69","format":"json-pretty","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":10813,"outputTokens":1991,"latencyMs":22131.704499999993},{"questionId":"q69","format":"json-compact","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":6894,"outputTokens":2503,"latencyMs":35345.66633400007},{"questionId":"q69","format":"toon","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":7331,"outputTokens":1799,"latencyMs":20451.80954200006},{"questionId":"q69","format":"xml","model":"gpt-5-nano","expected":"20","actual":"23","isCorrect":false,"inputTokens":12121,"outputTokens":4423,"latencyMs":45258.41941600002},{"questionId":"q69","format":"yaml","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":8445,"outputTokens":3335,"latencyMs":34735.526083000004},{"questionId":"q70","format":"json-pretty","model":"gpt-5-nano","expected":"19","actual":"19","isCorrect":true,"inputTokens":10813,"outputTokens":2887,"latencyMs":41997.15891700005},{"questionId":"q70","format":"json-compact","model":"gpt-5-nano","expected":"19","actual":"19","isCorrect":true,"inputTokens":6894,"outputTokens":2503,"latencyMs":39163.075124999974},{"questionId":"q70","format":"toon","model":"gpt-5-nano","expected":"19","actual":"19","isCorrect":true,"inputTokens":7331,"outputTokens":2503,"latencyMs":27668.932207999984},{"questionId":"q70","format":"xml","model":"gpt-5-nano","expected":"19","actual":"30","isCorrect":false,"inputTokens":12121,"outputTokens":7303,"latencyMs":115098.00795900007},{"questionId":"q70","format":"yaml","model":"gpt-5-nano","expected":"19","actual":"18","isCorrect":false,"inputTokens":8445,"outputTokens":2247,"latencyMs":46982.19175},{"questionId":"q71","format":"json-pretty","model":"gpt-5-nano","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":3772,"outputTokens":72,"latencyMs":2913.5479589999886},{"questionId":"q71","format":"json-compact","model":"gpt-5-nano","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":2445,"outputTokens":136,"latencyMs":2803.939374999958},{"questionId":"q71","format":"toon","model":"gpt-5-nano","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":1643,"outputTokens":200,"latencyMs":6143.997375000035},{"questionId":"q71","format":"csv","model":"gpt-5-nano","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":1501,"outputTokens":264,"latencyMs":3737.9859579999465},{"questionId":"q71","format":"xml","model":"gpt-5-nano","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":4478,"outputTokens":136,"latencyMs":6451.102792000049},{"questionId":"q71","format":"yaml","model":"gpt-5-nano","expected":"4322","actual":"4322","isCorrect":true,"inputTokens":3043,"outputTokens":136,"latencyMs":3775.4380420000525},{"questionId":"q72","format":"json-pretty","model":"gpt-5-nano","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":3772,"outputTokens":202,"latencyMs":5033.039417000022},{"questionId":"q72","format":"json-compact","model":"gpt-5-nano","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":2445,"outputTokens":266,"latencyMs":4111.494624999934},{"questionId":"q72","format":"toon","model":"gpt-5-nano","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":1643,"outputTokens":394,"latencyMs":8638.389167000074},{"questionId":"q72","format":"csv","model":"gpt-5-nano","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":1501,"outputTokens":458,"latencyMs":8524.106499999994},{"questionId":"q72","format":"xml","model":"gpt-5-nano","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":4478,"outputTokens":330,"latencyMs":3970.999124999973},{"questionId":"q72","format":"yaml","model":"gpt-5-nano","expected":"10432.04","actual":"10432.04","isCorrect":true,"inputTokens":3043,"outputTokens":394,"latencyMs":4908.646542000002},{"questionId":"q73","format":"json-pretty","model":"gpt-5-nano","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":3773,"outputTokens":393,"latencyMs":5176.399416},{"questionId":"q73","format":"json-compact","model":"gpt-5-nano","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":2446,"outputTokens":393,"latencyMs":4801.460207999917},{"questionId":"q73","format":"toon","model":"gpt-5-nano","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":1644,"outputTokens":393,"latencyMs":6057.693375000032},{"questionId":"q73","format":"csv","model":"gpt-5-nano","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":1502,"outputTokens":265,"latencyMs":3750.650166999898},{"questionId":"q73","format":"xml","model":"gpt-5-nano","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":4479,"outputTokens":265,"latencyMs":3854.7792090000585},{"questionId":"q73","format":"yaml","model":"gpt-5-nano","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":3044,"outputTokens":393,"latencyMs":6347.295958000002},{"questionId":"q74","format":"json-pretty","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":3773,"outputTokens":199,"latencyMs":2818.6481669999193},{"questionId":"q74","format":"json-compact","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":2446,"outputTokens":583,"latencyMs":7016.024041999946},{"questionId":"q74","format":"toon","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":1644,"outputTokens":327,"latencyMs":11679.409291999997},{"questionId":"q74","format":"csv","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":1502,"outputTokens":263,"latencyMs":8099.556624999968},{"questionId":"q74","format":"xml","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":4479,"outputTokens":263,"latencyMs":5838.912250000052},{"questionId":"q74","format":"yaml","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":3044,"outputTokens":199,"latencyMs":4037.007084000041},{"questionId":"q75","format":"json-pretty","model":"gpt-5-nano","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":3772,"outputTokens":200,"latencyMs":3196.2014169999165},{"questionId":"q75","format":"json-compact","model":"gpt-5-nano","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":2445,"outputTokens":456,"latencyMs":5860.588916999986},{"questionId":"q75","format":"toon","model":"gpt-5-nano","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":1643,"outputTokens":136,"latencyMs":7298.248332999996},{"questionId":"q75","format":"csv","model":"gpt-5-nano","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":1501,"outputTokens":136,"latencyMs":2674.1493330000667},{"questionId":"q75","format":"xml","model":"gpt-5-nano","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":4478,"outputTokens":136,"latencyMs":4064.031667000032},{"questionId":"q75","format":"yaml","model":"gpt-5-nano","expected":"4096","actual":"4096","isCorrect":true,"inputTokens":3043,"outputTokens":264,"latencyMs":5344.256874999963},{"questionId":"q76","format":"json-pretty","model":"gpt-5-nano","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":3772,"outputTokens":522,"latencyMs":7330.072791999904},{"questionId":"q76","format":"json-compact","model":"gpt-5-nano","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":2445,"outputTokens":650,"latencyMs":6851.327042000019},{"questionId":"q76","format":"toon","model":"gpt-5-nano","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":1643,"outputTokens":522,"latencyMs":5857.433624999947},{"questionId":"q76","format":"csv","model":"gpt-5-nano","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":1501,"outputTokens":522,"latencyMs":6343.291249999893},{"questionId":"q76","format":"xml","model":"gpt-5-nano","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":4478,"outputTokens":266,"latencyMs":4330.397083999938},{"questionId":"q76","format":"yaml","model":"gpt-5-nano","expected":"4533.1","actual":"4533.1","isCorrect":true,"inputTokens":3043,"outputTokens":394,"latencyMs":5960.957958000014},{"questionId":"q77","format":"json-pretty","model":"gpt-5-nano","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":3773,"outputTokens":329,"latencyMs":5319.284916999983},{"questionId":"q77","format":"json-compact","model":"gpt-5-nano","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":2446,"outputTokens":329,"latencyMs":4361.255540999933},{"questionId":"q77","format":"toon","model":"gpt-5-nano","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":1644,"outputTokens":585,"latencyMs":7900.699583000038},{"questionId":"q77","format":"csv","model":"gpt-5-nano","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":1502,"outputTokens":457,"latencyMs":5575.410874999943},{"questionId":"q77","format":"xml","model":"gpt-5-nano","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":4479,"outputTokens":521,"latencyMs":8356.306833000039},{"questionId":"q77","format":"yaml","model":"gpt-5-nano","expected":"0.63","actual":"0.63","isCorrect":true,"inputTokens":3044,"outputTokens":329,"latencyMs":7722.601000000024},{"questionId":"q78","format":"json-pretty","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":3773,"outputTokens":263,"latencyMs":5472.443542000023},{"questionId":"q78","format":"json-compact","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":2446,"outputTokens":135,"latencyMs":3127.7964580000844},{"questionId":"q78","format":"toon","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":1644,"outputTokens":263,"latencyMs":3561.4773750000168},{"questionId":"q78","format":"csv","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":1502,"outputTokens":199,"latencyMs":2641.831709000049},{"questionId":"q78","format":"xml","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":4479,"outputTokens":263,"latencyMs":3984.2812919999706},{"questionId":"q78","format":"yaml","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":3044,"outputTokens":327,"latencyMs":4146.360374999931},{"questionId":"q79","format":"json-pretty","model":"gpt-5-nano","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":3772,"outputTokens":200,"latencyMs":3001.3016669999342},{"questionId":"q79","format":"json-compact","model":"gpt-5-nano","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":2445,"outputTokens":200,"latencyMs":3991.4621250000782},{"questionId":"q79","format":"toon","model":"gpt-5-nano","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":1643,"outputTokens":456,"latencyMs":7423.164000000106},{"questionId":"q79","format":"csv","model":"gpt-5-nano","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":1501,"outputTokens":328,"latencyMs":3855.023082999978},{"questionId":"q79","format":"xml","model":"gpt-5-nano","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":4478,"outputTokens":264,"latencyMs":4289.763542000088},{"questionId":"q79","format":"yaml","model":"gpt-5-nano","expected":"4076","actual":"4076","isCorrect":true,"inputTokens":3043,"outputTokens":200,"latencyMs":3733.6695410000393},{"questionId":"q80","format":"json-pretty","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":3769,"outputTokens":903,"latencyMs":9863.215167000075},{"questionId":"q80","format":"json-compact","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":2442,"outputTokens":775,"latencyMs":9231.59695799998},{"questionId":"q80","format":"toon","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":1640,"outputTokens":391,"latencyMs":5704.973082999932},{"questionId":"q80","format":"csv","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":1498,"outputTokens":583,"latencyMs":8583.113332999987},{"questionId":"q80","format":"xml","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":4475,"outputTokens":327,"latencyMs":5083.1711249999935},{"questionId":"q80","format":"yaml","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":3040,"outputTokens":455,"latencyMs":6700.828666999936},{"questionId":"q81","format":"json-pretty","model":"gpt-5-nano","expected":"328320","actual":"328320","isCorrect":true,"inputTokens":3770,"outputTokens":1608,"latencyMs":18562.350708000013},{"questionId":"q81","format":"json-compact","model":"gpt-5-nano","expected":"328320","actual":"328320","isCorrect":true,"inputTokens":2443,"outputTokens":2312,"latencyMs":21658.89724999992},{"questionId":"q81","format":"toon","model":"gpt-5-nano","expected":"328320","actual":"328320","isCorrect":true,"inputTokens":1641,"outputTokens":2760,"latencyMs":26115.369374999893},{"questionId":"q81","format":"csv","model":"gpt-5-nano","expected":"328320","actual":"328320","isCorrect":true,"inputTokens":1499,"outputTokens":2760,"latencyMs":27018.220417000004},{"questionId":"q81","format":"xml","model":"gpt-5-nano","expected":"328320","actual":"328320","isCorrect":true,"inputTokens":4476,"outputTokens":3208,"latencyMs":35106.79574999993},{"questionId":"q81","format":"yaml","model":"gpt-5-nano","expected":"328320","actual":"328320","isCorrect":true,"inputTokens":3041,"outputTokens":3144,"latencyMs":28490.40104200004},{"questionId":"q82","format":"json-pretty","model":"gpt-5-nano","expected":"1791","actual":"1791","isCorrect":true,"inputTokens":3770,"outputTokens":2056,"latencyMs":22616.934583000024},{"questionId":"q82","format":"json-compact","model":"gpt-5-nano","expected":"1791","actual":"1791","isCorrect":true,"inputTokens":2443,"outputTokens":2376,"latencyMs":29998.364584000083},{"questionId":"q82","format":"toon","model":"gpt-5-nano","expected":"1791","actual":"1791","isCorrect":true,"inputTokens":1641,"outputTokens":3080,"latencyMs":32440.0187919999},{"questionId":"q82","format":"csv","model":"gpt-5-nano","expected":"1791","actual":"1791","isCorrect":true,"inputTokens":1499,"outputTokens":1864,"latencyMs":32674.871374999988},{"questionId":"q82","format":"xml","model":"gpt-5-nano","expected":"1791","actual":"1791","isCorrect":true,"inputTokens":4476,"outputTokens":1992,"latencyMs":18328.068833000027},{"questionId":"q82","format":"yaml","model":"gpt-5-nano","expected":"1791","actual":"1791","isCorrect":true,"inputTokens":3041,"outputTokens":2440,"latencyMs":26599.709708000068},{"questionId":"q83","format":"json-pretty","model":"gpt-5-nano","expected":"311695.88","actual":"311695.88","isCorrect":true,"inputTokens":3768,"outputTokens":3594,"latencyMs":31722.614457999938},{"questionId":"q83","format":"json-compact","model":"gpt-5-nano","expected":"311695.88","actual":"311695.88","isCorrect":true,"inputTokens":2441,"outputTokens":6346,"latencyMs":61432.27987500001},{"questionId":"q83","format":"toon","model":"gpt-5-nano","expected":"311695.88","actual":"310695.88","isCorrect":false,"inputTokens":1639,"outputTokens":6922,"latencyMs":67581.54674999998},{"questionId":"q83","format":"csv","model":"gpt-5-nano","expected":"311695.88","actual":"311695.88","isCorrect":true,"inputTokens":1497,"outputTokens":3658,"latencyMs":34084.16679099994},{"questionId":"q83","format":"xml","model":"gpt-5-nano","expected":"311695.88","actual":"310795.88","isCorrect":false,"inputTokens":4474,"outputTokens":7178,"latencyMs":72630.23875000002},{"questionId":"q83","format":"yaml","model":"gpt-5-nano","expected":"311695.88","actual":"311695.88","isCorrect":true,"inputTokens":3039,"outputTokens":5898,"latencyMs":57679.497999999905},{"questionId":"q84","format":"json-pretty","model":"gpt-5-nano","expected":"0.53","actual":"0.53","isCorrect":true,"inputTokens":3766,"outputTokens":7369,"latencyMs":67930.0475000001},{"questionId":"q84","format":"json-compact","model":"gpt-5-nano","expected":"0.53","actual":"0.5278333333333333","isCorrect":true,"inputTokens":2439,"outputTokens":4942,"latencyMs":46617.24916699994},{"questionId":"q84","format":"toon","model":"gpt-5-nano","expected":"0.53","actual":"0.527833","isCorrect":true,"inputTokens":1637,"outputTokens":4874,"latencyMs":46005.71258299996},{"questionId":"q84","format":"csv","model":"gpt-5-nano","expected":"0.53","actual":"0.5278333333","isCorrect":true,"inputTokens":1495,"outputTokens":3532,"latencyMs":33943.250624999986},{"questionId":"q84","format":"xml","model":"gpt-5-nano","expected":"0.53","actual":"0.5298333333","isCorrect":true,"inputTokens":4472,"outputTokens":4044,"latencyMs":37510.19779100001},{"questionId":"q84","format":"yaml","model":"gpt-5-nano","expected":"0.53","actual":"0.5278333333","isCorrect":true,"inputTokens":3037,"outputTokens":4556,"latencyMs":55096.31458399992},{"questionId":"q85","format":"json-pretty","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3770,"outputTokens":1351,"latencyMs":13053.103250000044},{"questionId":"q85","format":"json-compact","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2443,"outputTokens":1031,"latencyMs":11122.031167000066},{"questionId":"q85","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1641,"outputTokens":1863,"latencyMs":19323.529084000038},{"questionId":"q85","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1499,"outputTokens":1479,"latencyMs":18111.95750000002},{"questionId":"q85","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":4476,"outputTokens":1671,"latencyMs":26929.74866599997},{"questionId":"q85","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3041,"outputTokens":1479,"latencyMs":14530.826667000074},{"questionId":"q86","format":"json-pretty","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":3769,"outputTokens":1543,"latencyMs":13622.04912500002},{"questionId":"q86","format":"json-compact","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":2442,"outputTokens":2055,"latencyMs":23578.416874999995},{"questionId":"q86","format":"toon","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":1640,"outputTokens":3143,"latencyMs":42258.583417000016},{"questionId":"q86","format":"csv","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":1498,"outputTokens":2631,"latencyMs":24667.923749999958},{"questionId":"q86","format":"xml","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":4475,"outputTokens":2247,"latencyMs":20338.54649999994},{"questionId":"q86","format":"yaml","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":3040,"outputTokens":1863,"latencyMs":19668.138083999977},{"questionId":"q87","format":"json-pretty","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":3776,"outputTokens":1735,"latencyMs":17175.41733299999},{"questionId":"q87","format":"json-compact","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":2449,"outputTokens":2119,"latencyMs":21621.97758299997},{"questionId":"q87","format":"toon","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":1647,"outputTokens":2183,"latencyMs":20844.98749999993},{"questionId":"q87","format":"csv","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":1505,"outputTokens":1671,"latencyMs":17007.475209000055},{"questionId":"q87","format":"xml","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":4482,"outputTokens":2247,"latencyMs":20809.911709000007},{"questionId":"q87","format":"yaml","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":3047,"outputTokens":2439,"latencyMs":23743.970708000008},{"questionId":"q88","format":"json-pretty","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":3776,"outputTokens":1479,"latencyMs":13780.006082999986},{"questionId":"q88","format":"json-compact","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":2449,"outputTokens":2567,"latencyMs":28457.375708000036},{"questionId":"q88","format":"toon","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":1647,"outputTokens":1927,"latencyMs":19491.54999999993},{"questionId":"q88","format":"csv","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":1505,"outputTokens":1223,"latencyMs":18888.93787500006},{"questionId":"q88","format":"xml","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":4482,"outputTokens":2055,"latencyMs":19879.96829199989},{"questionId":"q88","format":"yaml","model":"gpt-5-nano","expected":"14","actual":"14","isCorrect":true,"inputTokens":3047,"outputTokens":2439,"latencyMs":37028.00633300003},{"questionId":"q89","format":"json-pretty","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3776,"outputTokens":1799,"latencyMs":20709.313749999972},{"questionId":"q89","format":"json-compact","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2449,"outputTokens":3143,"latencyMs":28282.831917000003},{"questionId":"q89","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1647,"outputTokens":2055,"latencyMs":19895.249166999944},{"questionId":"q89","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1505,"outputTokens":1927,"latencyMs":24627.73229199997},{"questionId":"q89","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":4482,"outputTokens":2631,"latencyMs":26081.29570799996},{"questionId":"q89","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3047,"outputTokens":1991,"latencyMs":19726.860791999963},{"questionId":"q90","format":"json-pretty","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3776,"outputTokens":2567,"latencyMs":24450.25691700005},{"questionId":"q90","format":"json-compact","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2449,"outputTokens":3079,"latencyMs":30192.66949999996},{"questionId":"q90","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1647,"outputTokens":2695,"latencyMs":30800.806582999998},{"questionId":"q90","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1505,"outputTokens":1735,"latencyMs":17525.293582999962},{"questionId":"q90","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":4482,"outputTokens":2503,"latencyMs":24294.877791999956},{"questionId":"q90","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3047,"outputTokens":2887,"latencyMs":27950.61812499992},{"questionId":"q91","format":"json-pretty","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":3776,"outputTokens":2695,"latencyMs":41832.30825},{"questionId":"q91","format":"json-compact","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":2449,"outputTokens":2311,"latencyMs":25780.131374999997},{"questionId":"q91","format":"toon","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":1647,"outputTokens":2247,"latencyMs":25229.332374999998},{"questionId":"q91","format":"csv","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":1505,"outputTokens":2311,"latencyMs":22338.296957999934},{"questionId":"q91","format":"xml","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":4482,"outputTokens":2887,"latencyMs":38730.74291699997},{"questionId":"q91","format":"yaml","model":"gpt-5-nano","expected":"20","actual":"20","isCorrect":true,"inputTokens":3047,"outputTokens":3655,"latencyMs":35328.02654200001},{"questionId":"q92","format":"json-pretty","model":"gpt-5-nano","expected":"32","actual":"33","isCorrect":false,"inputTokens":3775,"outputTokens":2247,"latencyMs":25864.640083999955},{"questionId":"q92","format":"json-compact","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":2448,"outputTokens":2503,"latencyMs":25477.055166000035},{"questionId":"q92","format":"toon","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":1646,"outputTokens":3399,"latencyMs":31212.052334000007},{"questionId":"q92","format":"csv","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":1504,"outputTokens":1607,"latencyMs":16476.131416000077},{"questionId":"q92","format":"xml","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":4481,"outputTokens":2695,"latencyMs":40359.28500000003},{"questionId":"q92","format":"yaml","model":"gpt-5-nano","expected":"32","actual":"32","isCorrect":true,"inputTokens":3046,"outputTokens":2439,"latencyMs":30665.69058299996},{"questionId":"q93","format":"json-pretty","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":3775,"outputTokens":1927,"latencyMs":21325.32583300001},{"questionId":"q93","format":"json-compact","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":2448,"outputTokens":2183,"latencyMs":22164.555750000058},{"questionId":"q93","format":"toon","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":1646,"outputTokens":2119,"latencyMs":21662.80249999999},{"questionId":"q93","format":"csv","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":1504,"outputTokens":1927,"latencyMs":20278.839750000043},{"questionId":"q93","format":"xml","model":"gpt-5-nano","expected":"9","actual":"9","isCorrect":true,"inputTokens":4481,"outputTokens":1479,"latencyMs":16767.767083000042},{"questionId":"q93","format":"yaml","model":"gpt-5-nano","expected":"9","actual":"2025-01-17,2025-01-20,2025-01-27,2025-01-28,2025-01-30,2025-02-06,2025-02-10,2025-02-11,2025-02-12","isCorrect":false,"inputTokens":3046,"outputTokens":2180,"latencyMs":27798.925208},{"questionId":"q94","format":"json-pretty","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3778,"outputTokens":2439,"latencyMs":34332.84691700002},{"questionId":"q94","format":"json-compact","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2451,"outputTokens":2183,"latencyMs":28876.632458999986},{"questionId":"q94","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1649,"outputTokens":3399,"latencyMs":29958.79008299997},{"questionId":"q94","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1507,"outputTokens":3463,"latencyMs":36254.60154099995},{"questionId":"q94","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":4484,"outputTokens":3399,"latencyMs":33635.2489169999},{"questionId":"q94","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3049,"outputTokens":2951,"latencyMs":26639.627958000056},{"questionId":"q95","format":"json-pretty","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3778,"outputTokens":2375,"latencyMs":24043.562916999916},{"questionId":"q95","format":"json-compact","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":2451,"outputTokens":3399,"latencyMs":36727.34020800004},{"questionId":"q95","format":"toon","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1649,"outputTokens":2695,"latencyMs":28240.418083999888},{"questionId":"q95","format":"csv","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":1507,"outputTokens":3143,"latencyMs":32131.14024999994},{"questionId":"q95","format":"xml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":4484,"outputTokens":3079,"latencyMs":30882.657292000018},{"questionId":"q95","format":"yaml","model":"gpt-5-nano","expected":"22","actual":"22","isCorrect":true,"inputTokens":3049,"outputTokens":3143,"latencyMs":34199.13054200006},{"questionId":"q96","format":"json-pretty","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":15252,"outputTokens":136,"latencyMs":3174.137834000052},{"questionId":"q96","format":"json-compact","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":11559,"outputTokens":264,"latencyMs":4427.513500000001},{"questionId":"q96","format":"toon","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":8873,"outputTokens":328,"latencyMs":4197.75774999999},{"questionId":"q96","format":"csv","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":8621,"outputTokens":264,"latencyMs":4365.791124999989},{"questionId":"q96","format":"xml","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":17198,"outputTokens":328,"latencyMs":8591.86379199999},{"questionId":"q96","format":"yaml","model":"gpt-5-nano","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":13234,"outputTokens":136,"latencyMs":3006.3902920000255},{"questionId":"q97","format":"json-pretty","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":15254,"outputTokens":328,"latencyMs":5031.756458000047},{"questionId":"q97","format":"json-compact","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":11561,"outputTokens":328,"latencyMs":4921.948124999995},{"questionId":"q97","format":"toon","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":8875,"outputTokens":328,"latencyMs":13561.781500000041},{"questionId":"q97","format":"csv","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":8623,"outputTokens":328,"latencyMs":11962.30929200002},{"questionId":"q97","format":"xml","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":17200,"outputTokens":328,"latencyMs":8242.271916999947},{"questionId":"q97","format":"yaml","model":"gpt-5-nano","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":13236,"outputTokens":264,"latencyMs":7252.942959000007},{"questionId":"q98","format":"json-pretty","model":"gpt-5-nano","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":15249,"outputTokens":264,"latencyMs":4860.508666999987},{"questionId":"q98","format":"json-compact","model":"gpt-5-nano","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":11556,"outputTokens":392,"latencyMs":5948.768499999889},{"questionId":"q98","format":"toon","model":"gpt-5-nano","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":8870,"outputTokens":264,"latencyMs":3931.105542000034},{"questionId":"q98","format":"csv","model":"gpt-5-nano","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":8618,"outputTokens":648,"latencyMs":11943.225541999913},{"questionId":"q98","format":"xml","model":"gpt-5-nano","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":17195,"outputTokens":328,"latencyMs":5136.993999999948},{"questionId":"q98","format":"yaml","model":"gpt-5-nano","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":13231,"outputTokens":264,"latencyMs":3974.5073329999577},{"questionId":"q99","format":"json-pretty","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":15255,"outputTokens":583,"latencyMs":10160.71020900004},{"questionId":"q99","format":"json-compact","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":11562,"outputTokens":519,"latencyMs":10971.125875000027},{"questionId":"q99","format":"toon","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":8876,"outputTokens":327,"latencyMs":4399.097415999975},{"questionId":"q99","format":"csv","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":8624,"outputTokens":327,"latencyMs":5930.175540999975},{"questionId":"q99","format":"xml","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":17201,"outputTokens":391,"latencyMs":7765.284375000047},{"questionId":"q99","format":"yaml","model":"gpt-5-nano","expected":"master","actual":"master","isCorrect":true,"inputTokens":13237,"outputTokens":391,"latencyMs":4973.2529159999685},{"questionId":"q100","format":"json-pretty","model":"gpt-5-nano","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":15249,"outputTokens":136,"latencyMs":3776.490165999974},{"questionId":"q100","format":"json-compact","model":"gpt-5-nano","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":11556,"outputTokens":328,"latencyMs":7673.985999999917},{"questionId":"q100","format":"toon","model":"gpt-5-nano","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":8870,"outputTokens":392,"latencyMs":8448.222042000038},{"questionId":"q100","format":"csv","model":"gpt-5-nano","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":8618,"outputTokens":328,"latencyMs":5834.679583000019},{"questionId":"q100","format":"xml","model":"gpt-5-nano","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":17195,"outputTokens":328,"latencyMs":4700.3877090000315},{"questionId":"q100","format":"yaml","model":"gpt-5-nano","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":13231,"outputTokens":328,"latencyMs":4259.143250000081},{"questionId":"q101","format":"json-pretty","model":"gpt-5-nano","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":15254,"outputTokens":264,"latencyMs":4328.258417000063},{"questionId":"q101","format":"json-compact","model":"gpt-5-nano","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":11561,"outputTokens":264,"latencyMs":3439.5950410000514},{"questionId":"q101","format":"toon","model":"gpt-5-nano","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":8875,"outputTokens":584,"latencyMs":6392.925374999992},{"questionId":"q101","format":"csv","model":"gpt-5-nano","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":8623,"outputTokens":264,"latencyMs":4369.833875000011},{"questionId":"q101","format":"xml","model":"gpt-5-nano","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":17200,"outputTokens":264,"latencyMs":4026.3382080000592},{"questionId":"q101","format":"yaml","model":"gpt-5-nano","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":13236,"outputTokens":328,"latencyMs":3988.631625000038},{"questionId":"q102","format":"json-pretty","model":"gpt-5-nano","expected":"678","actual":"678","isCorrect":true,"inputTokens":15253,"outputTokens":263,"latencyMs":6256.574583000038},{"questionId":"q102","format":"json-compact","model":"gpt-5-nano","expected":"678","actual":"678","isCorrect":true,"inputTokens":11560,"outputTokens":263,"latencyMs":4265.1440410000505},{"questionId":"q102","format":"toon","model":"gpt-5-nano","expected":"678","actual":"678","isCorrect":true,"inputTokens":8874,"outputTokens":455,"latencyMs":10965.912249999936},{"questionId":"q102","format":"csv","model":"gpt-5-nano","expected":"678","actual":"678","isCorrect":true,"inputTokens":8622,"outputTokens":391,"latencyMs":7398.446083000046},{"questionId":"q102","format":"xml","model":"gpt-5-nano","expected":"678","actual":"678","isCorrect":true,"inputTokens":17199,"outputTokens":391,"latencyMs":5909.86704199994},{"questionId":"q102","format":"yaml","model":"gpt-5-nano","expected":"678","actual":"678","isCorrect":true,"inputTokens":13235,"outputTokens":199,"latencyMs":3187.290791999898},{"questionId":"q103","format":"json-pretty","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":15252,"outputTokens":519,"latencyMs":6490.3748749999795},{"questionId":"q103","format":"json-compact","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":11559,"outputTokens":583,"latencyMs":7666.581874999916},{"questionId":"q103","format":"toon","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":8873,"outputTokens":327,"latencyMs":6757.961624999996},{"questionId":"q103","format":"csv","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":8621,"outputTokens":263,"latencyMs":4993.553249999997},{"questionId":"q103","format":"xml","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":17198,"outputTokens":327,"latencyMs":8451.123458999908},{"questionId":"q103","format":"yaml","model":"gpt-5-nano","expected":"main","actual":"main","isCorrect":true,"inputTokens":13234,"outputTokens":199,"latencyMs":3843.894541000016},{"questionId":"q104","format":"json-pretty","model":"gpt-5-nano","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":15256,"outputTokens":392,"latencyMs":11129.69512499997},{"questionId":"q104","format":"json-compact","model":"gpt-5-nano","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":11563,"outputTokens":392,"latencyMs":8905.517333000083},{"questionId":"q104","format":"toon","model":"gpt-5-nano","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":8877,"outputTokens":392,"latencyMs":6319.255083000055},{"questionId":"q104","format":"csv","model":"gpt-5-nano","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":8625,"outputTokens":328,"latencyMs":7528.265875000041},{"questionId":"q104","format":"xml","model":"gpt-5-nano","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":17202,"outputTokens":392,"latencyMs":13579.722625000053},{"questionId":"q104","format":"yaml","model":"gpt-5-nano","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":13238,"outputTokens":328,"latencyMs":10575.226957999985},{"questionId":"q105","format":"json-pretty","model":"gpt-5-nano","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":15252,"outputTokens":200,"latencyMs":5185.377082999912},{"questionId":"q105","format":"json-compact","model":"gpt-5-nano","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":11559,"outputTokens":264,"latencyMs":4393.0949580000015},{"questionId":"q105","format":"toon","model":"gpt-5-nano","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":8873,"outputTokens":456,"latencyMs":6270.737916999962},{"questionId":"q105","format":"csv","model":"gpt-5-nano","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":8621,"outputTokens":328,"latencyMs":4558.527000000002},{"questionId":"q105","format":"xml","model":"gpt-5-nano","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":17198,"outputTokens":328,"latencyMs":5035.306250000023},{"questionId":"q105","format":"yaml","model":"gpt-5-nano","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":13234,"outputTokens":328,"latencyMs":6407.646999999997},{"questionId":"q106","format":"json-pretty","model":"gpt-5-nano","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":15257,"outputTokens":136,"latencyMs":3376.4645419999724},{"questionId":"q106","format":"json-compact","model":"gpt-5-nano","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":11564,"outputTokens":392,"latencyMs":4399.214457999915},{"questionId":"q106","format":"toon","model":"gpt-5-nano","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":8878,"outputTokens":456,"latencyMs":5250.13595799997},{"questionId":"q106","format":"csv","model":"gpt-5-nano","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":8626,"outputTokens":584,"latencyMs":10384.269833000028},{"questionId":"q106","format":"xml","model":"gpt-5-nano","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":17203,"outputTokens":328,"latencyMs":4593.475250000018},{"questionId":"q106","format":"yaml","model":"gpt-5-nano","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":13239,"outputTokens":328,"latencyMs":9014.461250000051},{"questionId":"q107","format":"json-pretty","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":15248,"outputTokens":2375,"latencyMs":24542.61075000011},{"questionId":"q107","format":"json-compact","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":11555,"outputTokens":711,"latencyMs":7960.394625000074},{"questionId":"q107","format":"toon","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":8869,"outputTokens":199,"latencyMs":2844.534417000017},{"questionId":"q107","format":"csv","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":8617,"outputTokens":4167,"latencyMs":85494.36879199988},{"questionId":"q107","format":"xml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":17194,"outputTokens":2503,"latencyMs":24175.646917000064},{"questionId":"q107","format":"yaml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":13230,"outputTokens":3783,"latencyMs":37382.685666000005},{"questionId":"q108","format":"json-pretty","model":"gpt-5-nano","expected":"15413563","actual":"12284443","isCorrect":false,"inputTokens":15251,"outputTokens":4745,"latencyMs":52110.05108299991},{"questionId":"q108","format":"json-compact","model":"gpt-5-nano","expected":"15413563","actual":"15527138","isCorrect":false,"inputTokens":11558,"outputTokens":9417,"latencyMs":101241.22870800004},{"questionId":"q108","format":"toon","model":"gpt-5-nano","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":8872,"outputTokens":5257,"latencyMs":54524.812208000105},{"questionId":"q108","format":"csv","model":"gpt-5-nano","expected":"15413563","actual":"15413563","isCorrect":true,"inputTokens":8620,"outputTokens":5321,"latencyMs":62112.67933299986},{"questionId":"q108","format":"xml","model":"gpt-5-nano","expected":"15413563","actual":"18856320","isCorrect":false,"inputTokens":17197,"outputTokens":11785,"latencyMs":138915.09487500007},{"questionId":"q108","format":"yaml","model":"gpt-5-nano","expected":"15413563","actual":"18617253","isCorrect":false,"inputTokens":13233,"outputTokens":9097,"latencyMs":90010.27312499995},{"questionId":"q109","format":"json-pretty","model":"gpt-5-nano","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":15251,"outputTokens":7561,"latencyMs":100665.4815},{"questionId":"q109","format":"json-compact","model":"gpt-5-nano","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":11558,"outputTokens":5385,"latencyMs":63163.41504199989},{"questionId":"q109","format":"toon","model":"gpt-5-nano","expected":"2528243","actual":"2528243","isCorrect":true,"inputTokens":8872,"outputTokens":13577,"latencyMs":136019.44545800006},{"questionId":"q109","format":"csv","model":"gpt-5-nano","expected":"2528243","actual":"2831139","isCorrect":false,"inputTokens":8620,"outputTokens":8137,"latencyMs":83168.071},{"questionId":"q109","format":"xml","model":"gpt-5-nano","expected":"2528243","actual":"3500000","isCorrect":false,"inputTokens":17197,"outputTokens":12745,"latencyMs":298602.1220839999},{"questionId":"q109","format":"yaml","model":"gpt-5-nano","expected":"2528243","actual":"11131566","isCorrect":false,"inputTokens":13233,"outputTokens":21577,"latencyMs":197418.5528330002},{"questionId":"q110","format":"json-pretty","model":"gpt-5-nano","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":15250,"outputTokens":10762,"latencyMs":100897.18295899988},{"questionId":"q110","format":"json-compact","model":"gpt-5-nano","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":11557,"outputTokens":6602,"latencyMs":69045.86083299993},{"questionId":"q110","format":"toon","model":"gpt-5-nano","expected":"154136","actual":"154125.63","isCorrect":false,"inputTokens":8871,"outputTokens":5642,"latencyMs":53949.60254199989},{"questionId":"q110","format":"csv","model":"gpt-5-nano","expected":"154136","actual":"154135.63","isCorrect":false,"inputTokens":8619,"outputTokens":4874,"latencyMs":52207.93591700005},{"questionId":"q110","format":"xml","model":"gpt-5-nano","expected":"154136","actual":"157742.648351648","isCorrect":false,"inputTokens":17196,"outputTokens":4940,"latencyMs":71313.78808299988},{"questionId":"q110","format":"yaml","model":"gpt-5-nano","expected":"154136","actual":"154136","isCorrect":true,"inputTokens":13232,"outputTokens":5704,"latencyMs":55499.88716699998},{"questionId":"q111","format":"json-pretty","model":"gpt-5-nano","expected":"41","actual":"27","isCorrect":false,"inputTokens":15252,"outputTokens":2183,"latencyMs":21596.972249999875},{"questionId":"q111","format":"json-compact","model":"gpt-5-nano","expected":"41","actual":"41","isCorrect":true,"inputTokens":11559,"outputTokens":2631,"latencyMs":30117.775832999963},{"questionId":"q111","format":"toon","model":"gpt-5-nano","expected":"41","actual":"40","isCorrect":false,"inputTokens":8873,"outputTokens":3015,"latencyMs":37560.24325000006},{"questionId":"q111","format":"csv","model":"gpt-5-nano","expected":"41","actual":"40","isCorrect":false,"inputTokens":8621,"outputTokens":3399,"latencyMs":39291.82483300008},{"questionId":"q111","format":"xml","model":"gpt-5-nano","expected":"41","actual":"27","isCorrect":false,"inputTokens":17198,"outputTokens":9607,"latencyMs":96459.7463750001},{"questionId":"q111","format":"yaml","model":"gpt-5-nano","expected":"41","actual":"39","isCorrect":false,"inputTokens":13234,"outputTokens":4743,"latencyMs":42681.62850000011},{"questionId":"q112","format":"json-pretty","model":"gpt-5-nano","expected":"53","actual":"61","isCorrect":false,"inputTokens":15252,"outputTokens":5831,"latencyMs":55109.62650000001},{"questionId":"q112","format":"json-compact","model":"gpt-5-nano","expected":"53","actual":"54","isCorrect":false,"inputTokens":11559,"outputTokens":7175,"latencyMs":94090.2629170001},{"questionId":"q112","format":"toon","model":"gpt-5-nano","expected":"53","actual":"60","isCorrect":false,"inputTokens":8873,"outputTokens":2439,"latencyMs":22994.73654199997},{"questionId":"q112","format":"csv","model":"gpt-5-nano","expected":"53","actual":"59","isCorrect":false,"inputTokens":8621,"outputTokens":7367,"latencyMs":62292.922792},{"questionId":"q112","format":"xml","model":"gpt-5-nano","expected":"53","actual":"60","isCorrect":false,"inputTokens":17198,"outputTokens":7687,"latencyMs":68955.88562500011},{"questionId":"q112","format":"yaml","model":"gpt-5-nano","expected":"53","actual":"53","isCorrect":true,"inputTokens":13234,"outputTokens":2759,"latencyMs":25175.004457999952},{"questionId":"q113","format":"json-pretty","model":"gpt-5-nano","expected":"77","actual":"77","isCorrect":true,"inputTokens":15251,"outputTokens":2567,"latencyMs":31751.425124999834},{"questionId":"q113","format":"json-compact","model":"gpt-5-nano","expected":"77","actual":"64","isCorrect":false,"inputTokens":11558,"outputTokens":2695,"latencyMs":24752.101749999914},{"questionId":"q113","format":"toon","model":"gpt-5-nano","expected":"77","actual":"77","isCorrect":true,"inputTokens":8872,"outputTokens":4039,"latencyMs":58847.84733400005},{"questionId":"q113","format":"csv","model":"gpt-5-nano","expected":"77","actual":"77","isCorrect":true,"inputTokens":8620,"outputTokens":3015,"latencyMs":28625.518792000134},{"questionId":"q113","format":"xml","model":"gpt-5-nano","expected":"77","actual":"73","isCorrect":false,"inputTokens":17197,"outputTokens":4999,"latencyMs":43475.059833999956},{"questionId":"q113","format":"yaml","model":"gpt-5-nano","expected":"77","actual":"74","isCorrect":false,"inputTokens":13233,"outputTokens":15879,"latencyMs":133066.7952080001},{"questionId":"q114","format":"json-pretty","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":15251,"outputTokens":1607,"latencyMs":17589.96158400015},{"questionId":"q114","format":"json-compact","model":"gpt-5-nano","expected":"37","actual":"36","isCorrect":false,"inputTokens":11558,"outputTokens":10759,"latencyMs":91386.12729199999},{"questionId":"q114","format":"toon","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":8872,"outputTokens":2951,"latencyMs":28628.738499999978},{"questionId":"q114","format":"csv","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":8620,"outputTokens":2055,"latencyMs":16901.366458999924},{"questionId":"q114","format":"xml","model":"gpt-5-nano","expected":"37","actual":"27","isCorrect":false,"inputTokens":17197,"outputTokens":1863,"latencyMs":16063.757082999917},{"questionId":"q114","format":"yaml","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":13233,"outputTokens":3719,"latencyMs":40554.34516599984},{"questionId":"q115","format":"json-pretty","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":15251,"outputTokens":967,"latencyMs":15059.88116599992},{"questionId":"q115","format":"json-compact","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":11558,"outputTokens":1095,"latencyMs":10164.44991599978},{"questionId":"q115","format":"toon","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":8872,"outputTokens":3719,"latencyMs":44044.071624999866},{"questionId":"q115","format":"csv","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":8620,"outputTokens":2183,"latencyMs":19417.32862499985},{"questionId":"q115","format":"xml","model":"gpt-5-nano","expected":"16","actual":"15","isCorrect":false,"inputTokens":17197,"outputTokens":1607,"latencyMs":15047.773334000027},{"questionId":"q115","format":"yaml","model":"gpt-5-nano","expected":"16","actual":"16","isCorrect":true,"inputTokens":13233,"outputTokens":1543,"latencyMs":36046.026458000066},{"questionId":"q116","format":"json-pretty","model":"gpt-5-nano","expected":"49","actual":"49","isCorrect":true,"inputTokens":15251,"outputTokens":3271,"latencyMs":76326.07350000017},{"questionId":"q116","format":"json-compact","model":"gpt-5-nano","expected":"49","actual":"49","isCorrect":true,"inputTokens":11558,"outputTokens":3655,"latencyMs":32130.930374999996},{"questionId":"q116","format":"toon","model":"gpt-5-nano","expected":"49","actual":"49","isCorrect":true,"inputTokens":8872,"outputTokens":12615,"latencyMs":155529.18491700012},{"questionId":"q116","format":"csv","model":"gpt-5-nano","expected":"49","actual":"49","isCorrect":true,"inputTokens":8620,"outputTokens":6407,"latencyMs":55902.06070800009},{"questionId":"q116","format":"xml","model":"gpt-5-nano","expected":"49","actual":"106","isCorrect":false,"inputTokens":17197,"outputTokens":7495,"latencyMs":64000.08562499983},{"questionId":"q116","format":"yaml","model":"gpt-5-nano","expected":"49","actual":"49","isCorrect":true,"inputTokens":13233,"outputTokens":3591,"latencyMs":31902.165125000058},{"questionId":"q117","format":"json-pretty","model":"gpt-5-nano","expected":"23","actual":"29","isCorrect":false,"inputTokens":15251,"outputTokens":7751,"latencyMs":65168.02249999996},{"questionId":"q117","format":"json-compact","model":"gpt-5-nano","expected":"23","actual":"21","isCorrect":false,"inputTokens":11558,"outputTokens":3207,"latencyMs":28594.051167000085},{"questionId":"q117","format":"toon","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":8872,"outputTokens":10503,"latencyMs":98151.87975000008},{"questionId":"q117","format":"csv","model":"gpt-5-nano","expected":"23","actual":"21","isCorrect":false,"inputTokens":8620,"outputTokens":2183,"latencyMs":20484.786165999947},{"questionId":"q117","format":"xml","model":"gpt-5-nano","expected":"23","actual":"21","isCorrect":false,"inputTokens":17197,"outputTokens":4615,"latencyMs":43518.77370800008},{"questionId":"q117","format":"yaml","model":"gpt-5-nano","expected":"23","actual":"23","isCorrect":true,"inputTokens":13233,"outputTokens":3975,"latencyMs":35627.89633300016},{"questionId":"q118","format":"json-pretty","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":15251,"outputTokens":3015,"latencyMs":26725.319833999965},{"questionId":"q118","format":"json-compact","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":11558,"outputTokens":1799,"latencyMs":18761.737124999985},{"questionId":"q118","format":"toon","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":8872,"outputTokens":2759,"latencyMs":24030.234415999847},{"questionId":"q118","format":"csv","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":8620,"outputTokens":2311,"latencyMs":20654.35191700002},{"questionId":"q118","format":"xml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":17197,"outputTokens":1799,"latencyMs":17041.010666999966},{"questionId":"q118","format":"yaml","model":"gpt-5-nano","expected":"4","actual":"5","isCorrect":false,"inputTokens":13233,"outputTokens":3463,"latencyMs":33392.745709000155},{"questionId":"q119","format":"json-pretty","model":"gpt-5-nano","expected":"57","actual":"56","isCorrect":false,"inputTokens":15258,"outputTokens":4103,"latencyMs":38199.90779100009},{"questionId":"q119","format":"json-compact","model":"gpt-5-nano","expected":"57","actual":"73","isCorrect":false,"inputTokens":11565,"outputTokens":12743,"latencyMs":131865.93516699993},{"questionId":"q119","format":"toon","model":"gpt-5-nano","expected":"57","actual":"57","isCorrect":true,"inputTokens":8879,"outputTokens":4167,"latencyMs":39757.292166},{"questionId":"q119","format":"csv","model":"gpt-5-nano","expected":"57","actual":"57","isCorrect":true,"inputTokens":8627,"outputTokens":7239,"latencyMs":81548.7545830002},{"questionId":"q119","format":"xml","model":"gpt-5-nano","expected":"57","actual":"60","isCorrect":false,"inputTokens":17204,"outputTokens":15303,"latencyMs":134679.453584},{"questionId":"q119","format":"yaml","model":"gpt-5-nano","expected":"57","actual":"57","isCorrect":true,"inputTokens":13240,"outputTokens":4167,"latencyMs":38586.18929200014},{"questionId":"q120","format":"json-pretty","model":"gpt-5-nano","expected":"43","actual":"57","isCorrect":false,"inputTokens":15258,"outputTokens":6407,"latencyMs":74895.58804200008},{"questionId":"q120","format":"json-compact","model":"gpt-5-nano","expected":"43","actual":"32","isCorrect":false,"inputTokens":11565,"outputTokens":3015,"latencyMs":35560.51104200003},{"questionId":"q120","format":"toon","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":8879,"outputTokens":5191,"latencyMs":56691.20683400007},{"questionId":"q120","format":"csv","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":8627,"outputTokens":4231,"latencyMs":38942.962958000135},{"questionId":"q120","format":"xml","model":"gpt-5-nano","expected":"43","actual":"39","isCorrect":false,"inputTokens":17204,"outputTokens":11847,"latencyMs":143298.53979200008},{"questionId":"q120","format":"yaml","model":"gpt-5-nano","expected":"43","actual":"43","isCorrect":true,"inputTokens":13240,"outputTokens":5767,"latencyMs":58260.77429199987},{"questionId":"q121","format":"json-pretty","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":15258,"outputTokens":2951,"latencyMs":30454.88316700002},{"questionId":"q121","format":"json-compact","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":11565,"outputTokens":4423,"latencyMs":44076.22399999993},{"questionId":"q121","format":"toon","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":8879,"outputTokens":4231,"latencyMs":37241.785167000024},{"questionId":"q121","format":"csv","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":8627,"outputTokens":5895,"latencyMs":90620.1480419999},{"questionId":"q121","format":"xml","model":"gpt-5-nano","expected":"25","actual":"18","isCorrect":false,"inputTokens":17204,"outputTokens":3975,"latencyMs":40301.10383399995},{"questionId":"q121","format":"yaml","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":13240,"outputTokens":3911,"latencyMs":53672.49758299999},{"questionId":"q122","format":"json-pretty","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":15258,"outputTokens":3463,"latencyMs":35417.804375000065},{"questionId":"q122","format":"json-compact","model":"gpt-5-nano","expected":"6","actual":"5","isCorrect":false,"inputTokens":11565,"outputTokens":4487,"latencyMs":46868.0803749999},{"questionId":"q122","format":"toon","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":8879,"outputTokens":4551,"latencyMs":45573.12279199995},{"questionId":"q122","format":"csv","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":8627,"outputTokens":2887,"latencyMs":28422.857124999864},{"questionId":"q122","format":"xml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":17204,"outputTokens":3143,"latencyMs":29955.825083999895},{"questionId":"q122","format":"yaml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":13240,"outputTokens":5575,"latencyMs":48339.364959000144},{"questionId":"q123","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":15258,"outputTokens":3143,"latencyMs":25295.857915999833},{"questionId":"q123","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":11565,"outputTokens":3079,"latencyMs":29557.22145800013},{"questionId":"q123","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":8879,"outputTokens":2503,"latencyMs":22121.652333999984},{"questionId":"q123","format":"csv","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":8627,"outputTokens":2695,"latencyMs":31378.58437499986},{"questionId":"q123","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":17204,"outputTokens":1799,"latencyMs":20389.638000000035},{"questionId":"q123","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":13240,"outputTokens":3079,"latencyMs":28901.529666000046},{"questionId":"q124","format":"json-pretty","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":6905,"outputTokens":263,"latencyMs":3115.7104579999577},{"questionId":"q124","format":"json-compact","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":4927,"outputTokens":327,"latencyMs":7230.5603749998845},{"questionId":"q124","format":"toon","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":5906,"outputTokens":263,"latencyMs":6441.958833999932},{"questionId":"q124","format":"xml","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":7815,"outputTokens":199,"latencyMs":3481.11870799982},{"questionId":"q124","format":"yaml","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":5933,"outputTokens":135,"latencyMs":2440.1648330001626},{"questionId":"q125","format":"json-pretty","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":6905,"outputTokens":841,"latencyMs":9103.615125000011},{"questionId":"q125","format":"json-compact","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":4927,"outputTokens":1097,"latencyMs":10853.658833999885},{"questionId":"q125","format":"toon","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":5906,"outputTokens":393,"latencyMs":4811.6409579999745},{"questionId":"q125","format":"xml","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":7815,"outputTokens":265,"latencyMs":3462.576958999969},{"questionId":"q125","format":"yaml","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":5933,"outputTokens":201,"latencyMs":4551.6412080000155},{"questionId":"q126","format":"json-pretty","model":"gpt-5-nano","expected":"424","actual":"424","isCorrect":true,"inputTokens":6906,"outputTokens":199,"latencyMs":5946.616957999999},{"questionId":"q126","format":"json-compact","model":"gpt-5-nano","expected":"424","actual":"424","isCorrect":true,"inputTokens":4928,"outputTokens":391,"latencyMs":4862.764540999895},{"questionId":"q126","format":"toon","model":"gpt-5-nano","expected":"424","actual":"424","isCorrect":true,"inputTokens":5907,"outputTokens":199,"latencyMs":4436.1078329999},{"questionId":"q126","format":"xml","model":"gpt-5-nano","expected":"424","actual":"424","isCorrect":true,"inputTokens":7816,"outputTokens":263,"latencyMs":3710.672332999995},{"questionId":"q126","format":"yaml","model":"gpt-5-nano","expected":"424","actual":"424","isCorrect":true,"inputTokens":5934,"outputTokens":263,"latencyMs":3584.6445420000236},{"questionId":"q127","format":"json-pretty","model":"gpt-5-nano","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":6906,"outputTokens":264,"latencyMs":3332.7081249998882},{"questionId":"q127","format":"json-compact","model":"gpt-5-nano","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":4928,"outputTokens":328,"latencyMs":6029.872375000035},{"questionId":"q127","format":"toon","model":"gpt-5-nano","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":5907,"outputTokens":264,"latencyMs":6360.82320899982},{"questionId":"q127","format":"xml","model":"gpt-5-nano","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":7816,"outputTokens":328,"latencyMs":4304.171290999977},{"questionId":"q127","format":"yaml","model":"gpt-5-nano","expected":"2849","actual":"2849","isCorrect":true,"inputTokens":5934,"outputTokens":328,"latencyMs":6850.6273750001565},{"questionId":"q128","format":"json-pretty","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":6905,"outputTokens":1095,"latencyMs":25444.60245799995},{"questionId":"q128","format":"json-compact","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":4927,"outputTokens":839,"latencyMs":11782.655000000028},{"questionId":"q128","format":"toon","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":5906,"outputTokens":263,"latencyMs":5465.98116700002},{"questionId":"q128","format":"xml","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":7815,"outputTokens":263,"latencyMs":3371.2434590000194},{"questionId":"q128","format":"yaml","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":5933,"outputTokens":263,"latencyMs":3541.5000830001663},{"questionId":"q129","format":"json-pretty","model":"gpt-5-nano","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":6905,"outputTokens":905,"latencyMs":9463.865125000011},{"questionId":"q129","format":"json-compact","model":"gpt-5-nano","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":4927,"outputTokens":649,"latencyMs":6622.134208000032},{"questionId":"q129","format":"toon","model":"gpt-5-nano","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":5906,"outputTokens":329,"latencyMs":4798.912999999942},{"questionId":"q129","format":"xml","model":"gpt-5-nano","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":7815,"outputTokens":265,"latencyMs":5167.431041999953},{"questionId":"q129","format":"yaml","model":"gpt-5-nano","expected":"/api/orders","actual":"/api/orders","isCorrect":true,"inputTokens":5933,"outputTokens":393,"latencyMs":4693.827333000023},{"questionId":"q130","format":"json-pretty","model":"gpt-5-nano","expected":"435","actual":"435","isCorrect":true,"inputTokens":6906,"outputTokens":199,"latencyMs":2821.266375000123},{"questionId":"q130","format":"json-compact","model":"gpt-5-nano","expected":"435","actual":"435","isCorrect":true,"inputTokens":4928,"outputTokens":839,"latencyMs":8631.246000000043},{"questionId":"q130","format":"toon","model":"gpt-5-nano","expected":"435","actual":"435","isCorrect":true,"inputTokens":5907,"outputTokens":327,"latencyMs":4855.562292000046},{"questionId":"q130","format":"xml","model":"gpt-5-nano","expected":"435","actual":"435","isCorrect":true,"inputTokens":7816,"outputTokens":519,"latencyMs":7240.806624999968},{"questionId":"q130","format":"yaml","model":"gpt-5-nano","expected":"435","actual":"435","isCorrect":true,"inputTokens":5934,"outputTokens":1031,"latencyMs":10435.050374999875},{"questionId":"q131","format":"json-pretty","model":"gpt-5-nano","expected":"408","actual":"408","isCorrect":true,"inputTokens":6906,"outputTokens":199,"latencyMs":3034.7289579999633},{"questionId":"q131","format":"json-compact","model":"gpt-5-nano","expected":"408","actual":"408","isCorrect":true,"inputTokens":4928,"outputTokens":327,"latencyMs":3241.3320420000236},{"questionId":"q131","format":"toon","model":"gpt-5-nano","expected":"408","actual":"408","isCorrect":true,"inputTokens":5907,"outputTokens":391,"latencyMs":5222.304125000024},{"questionId":"q131","format":"xml","model":"gpt-5-nano","expected":"408","actual":"408","isCorrect":true,"inputTokens":7816,"outputTokens":263,"latencyMs":3285.6503329998814},{"questionId":"q131","format":"yaml","model":"gpt-5-nano","expected":"408","actual":"408","isCorrect":true,"inputTokens":5934,"outputTokens":135,"latencyMs":3403.779457999859},{"questionId":"q132","format":"json-pretty","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":6905,"outputTokens":967,"latencyMs":12630.437167000026},{"questionId":"q132","format":"json-compact","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":4927,"outputTokens":1095,"latencyMs":12474.425874999957},{"questionId":"q132","format":"toon","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":5906,"outputTokens":1863,"latencyMs":15583.305916999932},{"questionId":"q132","format":"xml","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":7815,"outputTokens":455,"latencyMs":6376.325249999994},{"questionId":"q132","format":"yaml","model":"gpt-5-nano","expected":"error","actual":"error","isCorrect":true,"inputTokens":5933,"outputTokens":327,"latencyMs":5189.892333999975},{"questionId":"q133","format":"json-pretty","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":6905,"outputTokens":457,"latencyMs":4789.633792000124},{"questionId":"q133","format":"json-compact","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":4927,"outputTokens":969,"latencyMs":9603.09604199999},{"questionId":"q133","format":"toon","model":"gpt-5-nano","expected":"/api/users","actual":"/api/users","isCorrect":true,"inputTokens":5906,"outputTokens":265,"latencyMs":3472.784415999893},{"questionId":"q133","format":"xml","model":"gpt-5-nano","expected":"/api/users","actual":"/api/payments","isCorrect":false,"inputTokens":7815,"outputTokens":1098,"latencyMs":14847.909750000108},{"questionId":"q133","format":"yaml","model":"gpt-5-nano","expected":"/api/users","actual":"/api/payments","isCorrect":false,"inputTokens":5933,"outputTokens":1354,"latencyMs":14238.748833000194},{"questionId":"q134","format":"json-pretty","model":"gpt-5-nano","expected":"75","actual":"75","isCorrect":true,"inputTokens":6889,"outputTokens":3719,"latencyMs":29588.24908400001},{"questionId":"q134","format":"json-compact","model":"gpt-5-nano","expected":"75","actual":"75","isCorrect":true,"inputTokens":4911,"outputTokens":5319,"latencyMs":40931.71183300018},{"questionId":"q134","format":"toon","model":"gpt-5-nano","expected":"75","actual":"75","isCorrect":true,"inputTokens":5890,"outputTokens":391,"latencyMs":5362.043415999971},{"questionId":"q134","format":"xml","model":"gpt-5-nano","expected":"75","actual":"100","isCorrect":false,"inputTokens":7799,"outputTokens":2247,"latencyMs":34099.03204199998},{"questionId":"q134","format":"yaml","model":"gpt-5-nano","expected":"75","actual":"100","isCorrect":false,"inputTokens":5917,"outputTokens":20167,"latencyMs":191462.27824999997},{"questionId":"q135","format":"json-pretty","model":"gpt-5-nano","expected":"2453.41","actual":"2413.3866666667","isCorrect":false,"inputTokens":6890,"outputTokens":10189,"latencyMs":114932.37154199998},{"questionId":"q135","format":"json-compact","model":"gpt-5-nano","expected":"2453.41","actual":"2344","isCorrect":false,"inputTokens":4912,"outputTokens":12488,"latencyMs":178401.16920799995},{"questionId":"q135","format":"toon","model":"gpt-5-nano","expected":"2453.41","actual":"2399.5942028985507","isCorrect":false,"inputTokens":5891,"outputTokens":12494,"latencyMs":106734.32024999987},{"questionId":"q135","format":"xml","model":"gpt-5-nano","expected":"2453.41","actual":"2453.4133333333","isCorrect":true,"inputTokens":7800,"outputTokens":9613,"latencyMs":79749.86854199995},{"questionId":"q135","format":"yaml","model":"gpt-5-nano","expected":"2453.41","actual":"2513","isCorrect":false,"inputTokens":5918,"outputTokens":9352,"latencyMs":79398.59566699993},{"questionId":"q136","format":"json-pretty","model":"gpt-5-nano","expected":"29","actual":"29","isCorrect":true,"inputTokens":6889,"outputTokens":4615,"latencyMs":42187.74462500005},{"questionId":"q136","format":"json-compact","model":"gpt-5-nano","expected":"29","actual":"29","isCorrect":true,"inputTokens":4911,"outputTokens":3527,"latencyMs":32269.509624999948},{"questionId":"q136","format":"toon","model":"gpt-5-nano","expected":"29","actual":"23","isCorrect":false,"inputTokens":5890,"outputTokens":9671,"latencyMs":76910.37262500008},{"questionId":"q136","format":"xml","model":"gpt-5-nano","expected":"29","actual":"28","isCorrect":false,"inputTokens":7799,"outputTokens":10887,"latencyMs":88385.73287499999},{"questionId":"q136","format":"yaml","model":"gpt-5-nano","expected":"29","actual":"33","isCorrect":false,"inputTokens":5917,"outputTokens":13319,"latencyMs":135069.12662500003},{"questionId":"q137","format":"json-pretty","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":6889,"outputTokens":2503,"latencyMs":24155.693625000073},{"questionId":"q137","format":"json-compact","model":"gpt-5-nano","expected":"17","actual":"17","isCorrect":true,"inputTokens":4911,"outputTokens":8519,"latencyMs":80310.74116600002},{"questionId":"q137","format":"toon","model":"gpt-5-nano","expected":"17","actual":"13","isCorrect":false,"inputTokens":5890,"outputTokens":6855,"latencyMs":60660.83295800001},{"questionId":"q137","format":"xml","model":"gpt-5-nano","expected":"17","actual":"13","isCorrect":false,"inputTokens":7799,"outputTokens":3847,"latencyMs":60666.268124999944},{"questionId":"q137","format":"yaml","model":"gpt-5-nano","expected":"17","actual":"14","isCorrect":false,"inputTokens":5917,"outputTokens":7303,"latencyMs":57974.093916999875},{"questionId":"q138","format":"json-pretty","model":"gpt-5-nano","expected":"29","actual":"23","isCorrect":false,"inputTokens":6889,"outputTokens":3463,"latencyMs":38306.33962500002},{"questionId":"q138","format":"json-compact","model":"gpt-5-nano","expected":"29","actual":"29","isCorrect":true,"inputTokens":4911,"outputTokens":3591,"latencyMs":29538.051624999847},{"questionId":"q138","format":"toon","model":"gpt-5-nano","expected":"29","actual":"38","isCorrect":false,"inputTokens":5890,"outputTokens":15815,"latencyMs":129636.9376660001},{"questionId":"q138","format":"xml","model":"gpt-5-nano","expected":"29","actual":"22","isCorrect":false,"inputTokens":7799,"outputTokens":10503,"latencyMs":76536.67662499985},{"questionId":"q138","format":"yaml","model":"gpt-5-nano","expected":"29","actual":"50","isCorrect":false,"inputTokens":5917,"outputTokens":13319,"latencyMs":151121.41308299988},{"questionId":"q139","format":"json-pretty","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":6891,"outputTokens":4103,"latencyMs":56042.69449999998},{"questionId":"q139","format":"json-compact","model":"gpt-5-nano","expected":"11","actual":"10","isCorrect":false,"inputTokens":4913,"outputTokens":4743,"latencyMs":37126.876166999806},{"questionId":"q139","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5892,"outputTokens":5639,"latencyMs":40804.70775000006},{"questionId":"q139","format":"xml","model":"gpt-5-nano","expected":"11","actual":"6","isCorrect":false,"inputTokens":7801,"outputTokens":9799,"latencyMs":92226.40016699978},{"questionId":"q139","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":5919,"outputTokens":12039,"latencyMs":93280.16320900014},{"questionId":"q140","format":"json-pretty","model":"gpt-5-nano","expected":"18","actual":"18","isCorrect":true,"inputTokens":6891,"outputTokens":4295,"latencyMs":41815.3666660001},{"questionId":"q140","format":"json-compact","model":"gpt-5-nano","expected":"18","actual":"18","isCorrect":true,"inputTokens":4913,"outputTokens":3335,"latencyMs":29692.3865410001},{"questionId":"q140","format":"toon","model":"gpt-5-nano","expected":"18","actual":"17","isCorrect":false,"inputTokens":5892,"outputTokens":6599,"latencyMs":97809.67475},{"questionId":"q140","format":"xml","model":"gpt-5-nano","expected":"18","actual":"15","isCorrect":false,"inputTokens":7801,"outputTokens":12039,"latencyMs":123416.02841699985},{"questionId":"q140","format":"yaml","model":"gpt-5-nano","expected":"18","actual":"17","isCorrect":false,"inputTokens":5919,"outputTokens":10567,"latencyMs":91551.0178749999},{"questionId":"q141","format":"json-pretty","model":"gpt-5-nano","expected":"33","actual":"33","isCorrect":true,"inputTokens":6896,"outputTokens":3271,"latencyMs":30817.282166999998},{"questionId":"q141","format":"json-compact","model":"gpt-5-nano","expected":"33","actual":"33","isCorrect":true,"inputTokens":4918,"outputTokens":5703,"latencyMs":63405.342667000135},{"questionId":"q141","format":"toon","model":"gpt-5-nano","expected":"33","actual":"34","isCorrect":false,"inputTokens":5897,"outputTokens":14151,"latencyMs":112034.90975000011},{"questionId":"q141","format":"xml","model":"gpt-5-nano","expected":"33","actual":"37","isCorrect":false,"inputTokens":7806,"outputTokens":15175,"latencyMs":145415.13762499997},{"questionId":"q141","format":"yaml","model":"gpt-5-nano","expected":"33","actual":"63","isCorrect":false,"inputTokens":5924,"outputTokens":18759,"latencyMs":151461.663834},{"questionId":"q142","format":"json-pretty","model":"gpt-5-nano","expected":"42","actual":"43","isCorrect":false,"inputTokens":6894,"outputTokens":5959,"latencyMs":47470.96787500009},{"questionId":"q142","format":"json-compact","model":"gpt-5-nano","expected":"42","actual":"42","isCorrect":true,"inputTokens":4916,"outputTokens":5383,"latencyMs":53329.43183299992},{"questionId":"q142","format":"toon","model":"gpt-5-nano","expected":"42","actual":"43","isCorrect":false,"inputTokens":5895,"outputTokens":10375,"latencyMs":85387.93562499993},{"questionId":"q142","format":"xml","model":"gpt-5-nano","expected":"42","actual":"60","isCorrect":false,"inputTokens":7804,"outputTokens":12167,"latencyMs":93500.84408300021},{"questionId":"q142","format":"yaml","model":"gpt-5-nano","expected":"42","actual":"80","isCorrect":false,"inputTokens":5922,"outputTokens":9671,"latencyMs":379540.6658340001},{"questionId":"q143","format":"json-pretty","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":6890,"outputTokens":4743,"latencyMs":38821.55270899995},{"questionId":"q143","format":"json-compact","model":"gpt-5-nano","expected":"25","actual":"25","isCorrect":true,"inputTokens":4912,"outputTokens":6727,"latencyMs":57143.98108300008},{"questionId":"q143","format":"toon","model":"gpt-5-nano","expected":"25","actual":"27","isCorrect":false,"inputTokens":5891,"outputTokens":14407,"latencyMs":121313.45120800007},{"questionId":"q143","format":"xml","model":"gpt-5-nano","expected":"25","actual":"19","isCorrect":false,"inputTokens":7800,"outputTokens":7559,"latencyMs":69642.35850000009},{"questionId":"q143","format":"yaml","model":"gpt-5-nano","expected":"25","actual":"17","isCorrect":false,"inputTokens":5918,"outputTokens":16135,"latencyMs":161343.49933400005},{"questionId":"q144","format":"json-pretty","model":"gpt-5-nano","expected":"29","actual":"34","isCorrect":false,"inputTokens":6896,"outputTokens":20167,"latencyMs":230741.27374999993},{"questionId":"q144","format":"json-compact","model":"gpt-5-nano","expected":"29","actual":"29","isCorrect":true,"inputTokens":4918,"outputTokens":7495,"latencyMs":63636.350584000116},{"questionId":"q144","format":"toon","model":"gpt-5-nano","expected":"29","actual":"36","isCorrect":false,"inputTokens":5897,"outputTokens":16263,"latencyMs":130179.59895799984},{"questionId":"q144","format":"xml","model":"gpt-5-nano","expected":"29","actual":"37","isCorrect":false,"inputTokens":7806,"outputTokens":11591,"latencyMs":113961.67562500015},{"questionId":"q144","format":"yaml","model":"gpt-5-nano","expected":"29","actual":"37","isCorrect":false,"inputTokens":5924,"outputTokens":16519,"latencyMs":140941.021834},{"questionId":"q145","format":"json-pretty","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":6896,"outputTokens":5447,"latencyMs":46888.73112499993},{"questionId":"q145","format":"json-compact","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":4918,"outputTokens":7111,"latencyMs":74291.24454099988},{"questionId":"q145","format":"toon","model":"gpt-5-nano","expected":"4","actual":"3","isCorrect":false,"inputTokens":5897,"outputTokens":4423,"latencyMs":38978.35370799992},{"questionId":"q145","format":"xml","model":"gpt-5-nano","expected":"4","actual":"3","isCorrect":false,"inputTokens":7806,"outputTokens":4039,"latencyMs":36546.21775000007},{"questionId":"q145","format":"yaml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":5924,"outputTokens":4167,"latencyMs":39038.52904099994},{"questionId":"q146","format":"json-pretty","model":"gpt-5-nano","expected":"5","actual":"4","isCorrect":false,"inputTokens":6898,"outputTokens":3015,"latencyMs":26637.197584000183},{"questionId":"q146","format":"json-compact","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":4920,"outputTokens":2887,"latencyMs":26040.22404200025},{"questionId":"q146","format":"toon","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":5899,"outputTokens":3463,"latencyMs":29262.439125000034},{"questionId":"q146","format":"xml","model":"gpt-5-nano","expected":"5","actual":"6","isCorrect":false,"inputTokens":7808,"outputTokens":6215,"latencyMs":58986.4326249999},{"questionId":"q146","format":"yaml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":5926,"outputTokens":4743,"latencyMs":38633.85925000021},{"questionId":"q147","format":"json-pretty","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":6898,"outputTokens":3079,"latencyMs":27888.549333999865},{"questionId":"q147","format":"json-compact","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":4920,"outputTokens":4679,"latencyMs":43776.82637499971},{"questionId":"q147","format":"toon","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":5899,"outputTokens":5447,"latencyMs":50479.701083000284},{"questionId":"q147","format":"xml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":7808,"outputTokens":4999,"latencyMs":58853.683209000155},{"questionId":"q147","format":"yaml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":5926,"outputTokens":3655,"latencyMs":31553.769249999896},{"questionId":"q148","format":"json-pretty","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":6898,"outputTokens":2759,"latencyMs":47123.68912500003},{"questionId":"q148","format":"json-compact","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":4920,"outputTokens":4359,"latencyMs":41352.168209000025},{"questionId":"q148","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":5899,"outputTokens":6343,"latencyMs":55856.09641700005},{"questionId":"q148","format":"xml","model":"gpt-5-nano","expected":"3","actual":"2","isCorrect":false,"inputTokens":7808,"outputTokens":4103,"latencyMs":35685.38595899986},{"questionId":"q148","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":5926,"outputTokens":6343,"latencyMs":51239.12116699992},{"questionId":"q149","format":"json-pretty","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":6896,"outputTokens":6855,"latencyMs":90414.68912500003},{"questionId":"q149","format":"json-compact","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":4918,"outputTokens":2695,"latencyMs":27534.36641700007},{"questionId":"q149","format":"toon","model":"gpt-5-nano","expected":"4","actual":"3","isCorrect":false,"inputTokens":5897,"outputTokens":4487,"latencyMs":42209.12216599984},{"questionId":"q149","format":"xml","model":"gpt-5-nano","expected":"4","actual":"3","isCorrect":false,"inputTokens":7806,"outputTokens":6279,"latencyMs":75733.36095800018},{"questionId":"q149","format":"yaml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":5924,"outputTokens":7687,"latencyMs":63058.29333399981},{"questionId":"q150","format":"json-pretty","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":6896,"outputTokens":4103,"latencyMs":40312.71462500002},{"questionId":"q150","format":"json-compact","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":4918,"outputTokens":4167,"latencyMs":35873.915792000014},{"questionId":"q150","format":"toon","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":5897,"outputTokens":5703,"latencyMs":57978.6901250002},{"questionId":"q150","format":"xml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":7806,"outputTokens":6023,"latencyMs":54796.04729200015},{"questionId":"q150","format":"yaml","model":"gpt-5-nano","expected":"5","actual":"4","isCorrect":false,"inputTokens":5924,"outputTokens":5127,"latencyMs":43498.875916999765},{"questionId":"q151","format":"json-pretty","model":"gpt-5-nano","expected":"development","actual":"development","isCorrect":true,"inputTokens":1023,"outputTokens":583,"latencyMs":6742.807415999938},{"questionId":"q151","format":"json-compact","model":"gpt-5-nano","expected":"development","actual":"development","isCorrect":true,"inputTokens":665,"outputTokens":455,"latencyMs":8014.11641700007},{"questionId":"q151","format":"toon","model":"gpt-5-nano","expected":"development","actual":"development","isCorrect":true,"inputTokens":756,"outputTokens":135,"latencyMs":3002.658749999944},{"questionId":"q151","format":"xml","model":"gpt-5-nano","expected":"development","actual":"development","isCorrect":true,"inputTokens":1107,"outputTokens":135,"latencyMs":2293.2929159998894},{"questionId":"q151","format":"yaml","model":"gpt-5-nano","expected":"development","actual":"development","isCorrect":true,"inputTokens":775,"outputTokens":135,"latencyMs":3505.548708999995},{"questionId":"q152","format":"json-pretty","model":"gpt-5-nano","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":1021,"outputTokens":268,"latencyMs":3357.363291999791},{"questionId":"q152","format":"json-compact","model":"gpt-5-nano","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":663,"outputTokens":396,"latencyMs":4544.950250000227},{"questionId":"q152","format":"toon","model":"gpt-5-nano","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":754,"outputTokens":204,"latencyMs":3910.6737079997547},{"questionId":"q152","format":"xml","model":"gpt-5-nano","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":1105,"outputTokens":140,"latencyMs":2807.8532920000143},{"questionId":"q152","format":"yaml","model":"gpt-5-nano","expected":"guilty-cake.org","actual":"guilty-cake.org","isCorrect":true,"inputTokens":773,"outputTokens":204,"latencyMs":6755.754457999952},{"questionId":"q153","format":"json-pretty","model":"gpt-5-nano","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1021,"outputTokens":136,"latencyMs":2664.359124999959},{"questionId":"q153","format":"json-compact","model":"gpt-5-nano","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":663,"outputTokens":136,"latencyMs":2253.774665999692},{"questionId":"q153","format":"toon","model":"gpt-5-nano","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":754,"outputTokens":200,"latencyMs":5005.998707999941},{"questionId":"q153","format":"xml","model":"gpt-5-nano","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1105,"outputTokens":72,"latencyMs":2703.680333000142},{"questionId":"q153","format":"yaml","model":"gpt-5-nano","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":773,"outputTokens":72,"latencyMs":1897.0315409996547},{"questionId":"q154","format":"json-pretty","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":1023,"outputTokens":135,"latencyMs":3792.394541000016},{"questionId":"q154","format":"json-compact","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":665,"outputTokens":135,"latencyMs":4632.95924999984},{"questionId":"q154","format":"toon","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":756,"outputTokens":135,"latencyMs":2394.134250000119},{"questionId":"q154","format":"xml","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":1107,"outputTokens":135,"latencyMs":2430.5971249998547},{"questionId":"q154","format":"yaml","model":"gpt-5-nano","expected":"37","actual":"37","isCorrect":true,"inputTokens":775,"outputTokens":135,"latencyMs":2441.4810000001453},{"questionId":"q155","format":"json-pretty","model":"gpt-5-nano","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1021,"outputTokens":72,"latencyMs":1814.3805419998243},{"questionId":"q155","format":"json-compact","model":"gpt-5-nano","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":663,"outputTokens":200,"latencyMs":3557.1527090002783},{"questionId":"q155","format":"toon","model":"gpt-5-nano","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":754,"outputTokens":136,"latencyMs":3990.5303329997696},{"questionId":"q155","format":"xml","model":"gpt-5-nano","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1105,"outputTokens":136,"latencyMs":3151.5690419999883},{"questionId":"q155","format":"yaml","model":"gpt-5-nano","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":773,"outputTokens":136,"latencyMs":2329.0516249998473},{"questionId":"q156","format":"json-pretty","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1023,"outputTokens":135,"latencyMs":1982.217999999877},{"questionId":"q156","format":"json-compact","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":665,"outputTokens":199,"latencyMs":2489.357166999951},{"questionId":"q156","format":"toon","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":756,"outputTokens":199,"latencyMs":3184.5403330000117},{"questionId":"q156","format":"xml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1107,"outputTokens":199,"latencyMs":3035.3530419999734},{"questionId":"q156","format":"yaml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":775,"outputTokens":199,"latencyMs":2576.0412079999223},{"questionId":"q157","format":"json-pretty","model":"gpt-5-nano","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1023,"outputTokens":136,"latencyMs":15388.45091599971},{"questionId":"q157","format":"json-compact","model":"gpt-5-nano","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":665,"outputTokens":200,"latencyMs":3027.245583000127},{"questionId":"q157","format":"toon","model":"gpt-5-nano","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":756,"outputTokens":200,"latencyMs":2682.324666999746},{"questionId":"q157","format":"xml","model":"gpt-5-nano","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1107,"outputTokens":72,"latencyMs":2551.2237090002745},{"questionId":"q157","format":"yaml","model":"gpt-5-nano","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":775,"outputTokens":136,"latencyMs":4109.2170409997925},{"questionId":"q158","format":"json-pretty","model":"gpt-5-nano","expected":"real","actual":"real","isCorrect":true,"inputTokens":1021,"outputTokens":199,"latencyMs":3716.7424159999937},{"questionId":"q158","format":"json-compact","model":"gpt-5-nano","expected":"real","actual":"real","isCorrect":true,"inputTokens":663,"outputTokens":455,"latencyMs":6808.871625000145},{"questionId":"q158","format":"toon","model":"gpt-5-nano","expected":"real","actual":"real","isCorrect":true,"inputTokens":754,"outputTokens":327,"latencyMs":4373.244665999897},{"questionId":"q158","format":"xml","model":"gpt-5-nano","expected":"real","actual":"real","isCorrect":true,"inputTokens":1105,"outputTokens":135,"latencyMs":2383.4876660001464},{"questionId":"q158","format":"yaml","model":"gpt-5-nano","expected":"real","actual":"real","isCorrect":true,"inputTokens":773,"outputTokens":263,"latencyMs":3284.6608330002055},{"questionId":"q159","format":"json-pretty","model":"gpt-5-nano","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1022,"outputTokens":136,"latencyMs":2568.9396250001155},{"questionId":"q159","format":"json-compact","model":"gpt-5-nano","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":664,"outputTokens":136,"latencyMs":2548.5491659999825},{"questionId":"q159","format":"toon","model":"gpt-5-nano","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":755,"outputTokens":200,"latencyMs":2528.12133300025},{"questionId":"q159","format":"xml","model":"gpt-5-nano","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1106,"outputTokens":200,"latencyMs":3261.567334000021},{"questionId":"q159","format":"yaml","model":"gpt-5-nano","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":774,"outputTokens":264,"latencyMs":5840.089374999981},{"questionId":"q160","format":"json-pretty","model":"gpt-5-nano","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":1023,"outputTokens":459,"latencyMs":4751.093458000105},{"questionId":"q160","format":"json-compact","model":"gpt-5-nano","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":665,"outputTokens":715,"latencyMs":9806.253459000029},{"questionId":"q160","format":"toon","model":"gpt-5-nano","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":756,"outputTokens":267,"latencyMs":3160.0135419997387},{"questionId":"q160","format":"xml","model":"gpt-5-nano","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":1107,"outputTokens":459,"latencyMs":4843.594291999936},{"questionId":"q160","format":"yaml","model":"gpt-5-nano","expected":"6.8.3","actual":"6.8.3","isCorrect":true,"inputTokens":775,"outputTokens":395,"latencyMs":5162.69116699975},{"questionId":"q161","format":"json-pretty","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":1023,"outputTokens":135,"latencyMs":3906.6883749999106},{"questionId":"q161","format":"json-compact","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":665,"outputTokens":263,"latencyMs":3832.8434589998797},{"questionId":"q161","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":756,"outputTokens":263,"latencyMs":4753.713458000217},{"questionId":"q161","format":"xml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":1107,"outputTokens":199,"latencyMs":6292.803332999814},{"questionId":"q161","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":775,"outputTokens":199,"latencyMs":6083.018833000213},{"questionId":"q162","format":"json-pretty","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1023,"outputTokens":135,"latencyMs":2117.091250000056},{"questionId":"q162","format":"json-compact","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":665,"outputTokens":199,"latencyMs":5661.312124999706},{"questionId":"q162","format":"toon","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":756,"outputTokens":263,"latencyMs":5603.599000000395},{"questionId":"q162","format":"xml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1107,"outputTokens":135,"latencyMs":4710.21570800012},{"questionId":"q162","format":"yaml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":775,"outputTokens":199,"latencyMs":7471.390374999959},{"questionId":"q163","format":"json-pretty","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1022,"outputTokens":199,"latencyMs":4701.419417000376},{"questionId":"q163","format":"json-compact","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":664,"outputTokens":199,"latencyMs":3847.8547080000862},{"questionId":"q163","format":"toon","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":755,"outputTokens":263,"latencyMs":3375.736333000008},{"questionId":"q163","format":"xml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1106,"outputTokens":263,"latencyMs":4812.272791000083},{"questionId":"q163","format":"yaml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":774,"outputTokens":199,"latencyMs":2934.4373750002123},{"questionId":"q164","format":"json-pretty","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1022,"outputTokens":135,"latencyMs":2458.2781249997206},{"questionId":"q164","format":"json-compact","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":664,"outputTokens":263,"latencyMs":3522.378250000067},{"questionId":"q164","format":"toon","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":755,"outputTokens":327,"latencyMs":4799.6738340002485},{"questionId":"q164","format":"xml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":1106,"outputTokens":199,"latencyMs":3384.5427500000224},{"questionId":"q164","format":"yaml","model":"gpt-5-nano","expected":"2","actual":"2","isCorrect":true,"inputTokens":774,"outputTokens":263,"latencyMs":5075.06341599999},{"questionId":"q165","format":"json-pretty","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":1022,"outputTokens":199,"latencyMs":2597.252208999824},{"questionId":"q165","format":"json-compact","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":664,"outputTokens":199,"latencyMs":2931.3202499998733},{"questionId":"q165","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":755,"outputTokens":71,"latencyMs":2898.455083000008},{"questionId":"q165","format":"xml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":1106,"outputTokens":263,"latencyMs":3072.9117919998243},{"questionId":"q165","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":774,"outputTokens":71,"latencyMs":2456.2880830001086},{"questionId":"q166","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1026,"outputTokens":199,"latencyMs":2803.085833000019},{"questionId":"q166","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":668,"outputTokens":839,"latencyMs":7020.810707999859},{"questionId":"q166","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":759,"outputTokens":199,"latencyMs":2661.654792000074},{"questionId":"q166","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1110,"outputTokens":327,"latencyMs":5022.77420799993},{"questionId":"q166","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":778,"outputTokens":263,"latencyMs":3315.3438749997877},{"questionId":"q167","format":"json-pretty","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":1022,"outputTokens":263,"latencyMs":4004.776000000071},{"questionId":"q167","format":"json-compact","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":664,"outputTokens":327,"latencyMs":4605.751166999806},{"questionId":"q167","format":"toon","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":755,"outputTokens":327,"latencyMs":5653.84929200029},{"questionId":"q167","format":"xml","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":1106,"outputTokens":263,"latencyMs":3888.8481250000186},{"questionId":"q167","format":"yaml","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":774,"outputTokens":263,"latencyMs":4843.462165999692},{"questionId":"q168","format":"json-pretty","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":1024,"outputTokens":199,"latencyMs":2777.7275000000373},{"questionId":"q168","format":"json-compact","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":666,"outputTokens":711,"latencyMs":10276.33304100018},{"questionId":"q168","format":"toon","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":757,"outputTokens":199,"latencyMs":4521.871375000104},{"questionId":"q168","format":"xml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":1108,"outputTokens":199,"latencyMs":5794.047832999844},{"questionId":"q168","format":"yaml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":776,"outputTokens":135,"latencyMs":3790.2669999999925},{"questionId":"q169","format":"json-pretty","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":1026,"outputTokens":391,"latencyMs":5940.847500000149},{"questionId":"q169","format":"json-compact","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":668,"outputTokens":455,"latencyMs":4762.850875000004},{"questionId":"q169","format":"toon","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":759,"outputTokens":455,"latencyMs":4719.768250000197},{"questionId":"q169","format":"xml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":1110,"outputTokens":327,"latencyMs":6340.48066599993},{"questionId":"q169","format":"yaml","model":"gpt-5-nano","expected":"8","actual":"8","isCorrect":true,"inputTokens":778,"outputTokens":263,"latencyMs":3185.5706250001676},{"questionId":"q170","format":"json-pretty","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":1025,"outputTokens":455,"latencyMs":4706.524750000332},{"questionId":"q170","format":"json-compact","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":667,"outputTokens":327,"latencyMs":5000.733999999706},{"questionId":"q170","format":"toon","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":758,"outputTokens":455,"latencyMs":6487.487999999896},{"questionId":"q170","format":"xml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":1109,"outputTokens":455,"latencyMs":8960.450375000015},{"questionId":"q170","format":"yaml","model":"gpt-5-nano","expected":"5","actual":"5","isCorrect":true,"inputTokens":777,"outputTokens":391,"latencyMs":5475.017541000154},{"questionId":"q171","format":"json-pretty","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":1027,"outputTokens":263,"latencyMs":3693.9329169997945},{"questionId":"q171","format":"json-compact","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":669,"outputTokens":263,"latencyMs":3648.1655830000527},{"questionId":"q171","format":"toon","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":760,"outputTokens":967,"latencyMs":8853.470167000312},{"questionId":"q171","format":"xml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":1111,"outputTokens":263,"latencyMs":3237.0124579998665},{"questionId":"q171","format":"yaml","model":"gpt-5-nano","expected":"3","actual":"3","isCorrect":true,"inputTokens":779,"outputTokens":263,"latencyMs":4246.907124999911},{"questionId":"q172","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1027,"outputTokens":199,"latencyMs":3634.9288749997504},{"questionId":"q172","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":669,"outputTokens":327,"latencyMs":20629.421875},{"questionId":"q172","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":760,"outputTokens":199,"latencyMs":3223.972542000003},{"questionId":"q172","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1111,"outputTokens":327,"latencyMs":13383.079999999609},{"questionId":"q172","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":779,"outputTokens":263,"latencyMs":4296.551624999847},{"questionId":"q173","format":"json-pretty","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":1028,"outputTokens":199,"latencyMs":2751.2610419997945},{"questionId":"q173","format":"json-compact","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":670,"outputTokens":327,"latencyMs":4416.645750000142},{"questionId":"q173","format":"toon","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":761,"outputTokens":519,"latencyMs":7847.565500000026},{"questionId":"q173","format":"xml","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":1112,"outputTokens":263,"latencyMs":6915.099750000052},{"questionId":"q173","format":"yaml","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":780,"outputTokens":391,"latencyMs":6582.503958000336},{"questionId":"q174","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1025,"outputTokens":135,"latencyMs":2869.2113329996355},{"questionId":"q174","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":667,"outputTokens":263,"latencyMs":3683.578083000146},{"questionId":"q174","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":758,"outputTokens":327,"latencyMs":11444.25062499987},{"questionId":"q174","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1109,"outputTokens":327,"latencyMs":11302.633040999994},{"questionId":"q174","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":777,"outputTokens":135,"latencyMs":3082.4122499995865},{"questionId":"q175","format":"json-pretty","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":1029,"outputTokens":327,"latencyMs":4507.2793749999255},{"questionId":"q175","format":"json-compact","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":671,"outputTokens":263,"latencyMs":4098.119249999989},{"questionId":"q175","format":"toon","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":762,"outputTokens":327,"latencyMs":10054.765292000026},{"questionId":"q175","format":"xml","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":1113,"outputTokens":391,"latencyMs":6727.330041999929},{"questionId":"q175","format":"yaml","model":"gpt-5-nano","expected":"0","actual":"0","isCorrect":true,"inputTokens":781,"outputTokens":391,"latencyMs":7630.485625000205},{"questionId":"q176","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1023,"outputTokens":135,"latencyMs":2451.910416999832},{"questionId":"q176","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":665,"outputTokens":263,"latencyMs":3689.5402919999324},{"questionId":"q176","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":756,"outputTokens":199,"latencyMs":2873.5955409999005},{"questionId":"q176","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1107,"outputTokens":199,"latencyMs":3277.6729580000974},{"questionId":"q176","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":775,"outputTokens":263,"latencyMs":4645.139667000156},{"questionId":"q177","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1031,"outputTokens":327,"latencyMs":3554.097957999911},{"questionId":"q177","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":673,"outputTokens":263,"latencyMs":3744.1457079998218},{"questionId":"q177","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":764,"outputTokens":263,"latencyMs":6140.8108749999665},{"questionId":"q177","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1115,"outputTokens":327,"latencyMs":7183.078166999854},{"questionId":"q177","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":783,"outputTokens":263,"latencyMs":5394.631290999707},{"questionId":"q178","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1026,"outputTokens":135,"latencyMs":2505.3744160002097},{"questionId":"q178","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":668,"outputTokens":775,"latencyMs":11512.826999999583},{"questionId":"q178","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":759,"outputTokens":583,"latencyMs":9364.836542000063},{"questionId":"q178","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1110,"outputTokens":199,"latencyMs":4985.15625},{"questionId":"q178","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":778,"outputTokens":199,"latencyMs":3276.027791999746},{"questionId":"q179","format":"json-pretty","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1025,"outputTokens":199,"latencyMs":4783.0132499998435},{"questionId":"q179","format":"json-compact","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":667,"outputTokens":839,"latencyMs":9481.153750000056},{"questionId":"q179","format":"toon","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":758,"outputTokens":199,"latencyMs":4011.8218750003725},{"questionId":"q179","format":"xml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":1109,"outputTokens":327,"latencyMs":4096.573667000048},{"questionId":"q179","format":"yaml","model":"gpt-5-nano","expected":"1","actual":"1","isCorrect":true,"inputTokens":777,"outputTokens":455,"latencyMs":5798.487083999906},{"questionId":"q180","format":"json-pretty","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":6451,"outputTokens":135,"latencyMs":5243.732333999593},{"questionId":"q180","format":"json-compact","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":4044,"outputTokens":135,"latencyMs":2556.379874999635},{"questionId":"q180","format":"toon","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":2608,"outputTokens":135,"latencyMs":1891.7810420002788},{"questionId":"q180","format":"csv","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":2442,"outputTokens":263,"latencyMs":5116.138582999818},{"questionId":"q180","format":"xml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":7413,"outputTokens":135,"latencyMs":5086.265749999788},{"questionId":"q180","format":"yaml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":5071,"outputTokens":135,"latencyMs":2536.9129999997094},{"questionId":"q181","format":"json-pretty","model":"gpt-5-nano","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":6456,"outputTokens":338,"latencyMs":7833.342083000112},{"questionId":"q181","format":"json-compact","model":"gpt-5-nano","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":4049,"outputTokens":594,"latencyMs":5790.048458999954},{"questionId":"q181","format":"toon","model":"gpt-5-nano","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":2613,"outputTokens":146,"latencyMs":3634.676667000167},{"questionId":"q181","format":"csv","model":"gpt-5-nano","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":2447,"outputTokens":210,"latencyMs":3299.1852080002427},{"questionId":"q181","format":"xml","model":"gpt-5-nano","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":7418,"outputTokens":466,"latencyMs":5790.619957999792},{"questionId":"q181","format":"yaml","model":"gpt-5-nano","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":5076,"outputTokens":466,"latencyMs":5859.513791000005},{"questionId":"q182","format":"json-pretty","model":"gpt-5-nano","expected":"email","actual":"email","isCorrect":true,"inputTokens":6454,"outputTokens":775,"latencyMs":11483.762374999933},{"questionId":"q182","format":"json-compact","model":"gpt-5-nano","expected":"email","actual":"email","isCorrect":true,"inputTokens":4047,"outputTokens":519,"latencyMs":8336.065458999947},{"questionId":"q182","format":"toon","model":"gpt-5-nano","expected":"email","actual":"email","isCorrect":true,"inputTokens":2611,"outputTokens":327,"latencyMs":4580.0390419997275},{"questionId":"q182","format":"csv","model":"gpt-5-nano","expected":"email","actual":"email","isCorrect":true,"inputTokens":2445,"outputTokens":135,"latencyMs":4742.354375000112},{"questionId":"q182","format":"xml","model":"gpt-5-nano","expected":"email","actual":"email","isCorrect":true,"inputTokens":7416,"outputTokens":263,"latencyMs":4211.900249999948},{"questionId":"q182","format":"yaml","model":"gpt-5-nano","expected":"email","actual":"email","isCorrect":true,"inputTokens":5074,"outputTokens":327,"latencyMs":4381.881250000093},{"questionId":"q183","format":"json-pretty","model":"gpt-5-nano","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":6455,"outputTokens":327,"latencyMs":4497.625540999696},{"questionId":"q183","format":"json-compact","model":"gpt-5-nano","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":4048,"outputTokens":199,"latencyMs":3268.267374999821},{"questionId":"q183","format":"toon","model":"gpt-5-nano","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":2612,"outputTokens":263,"latencyMs":5449.473415999673},{"questionId":"q183","format":"csv","model":"gpt-5-nano","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":2446,"outputTokens":263,"latencyMs":4097.444000000134},{"questionId":"q183","format":"xml","model":"gpt-5-nano","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":7417,"outputTokens":263,"latencyMs":4184.440708999988},{"questionId":"q183","format":"yaml","model":"gpt-5-nano","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":5075,"outputTokens":135,"latencyMs":2946.645917000249},{"questionId":"q184","format":"json-pretty","model":"gpt-5-nano","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":6455,"outputTokens":204,"latencyMs":9614.618167000357},{"questionId":"q184","format":"json-compact","model":"gpt-5-nano","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":4048,"outputTokens":140,"latencyMs":2873.793583000079},{"questionId":"q184","format":"toon","model":"gpt-5-nano","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":2612,"outputTokens":140,"latencyMs":2877.5963750001974},{"questionId":"q184","format":"csv","model":"gpt-5-nano","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":2446,"outputTokens":204,"latencyMs":3551.0457079997286},{"questionId":"q184","format":"xml","model":"gpt-5-nano","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":7417,"outputTokens":140,"latencyMs":2625.599458000157},{"questionId":"q184","format":"yaml","model":"gpt-5-nano","expected":"Mrs. Sherri Ritchie","actual":"Mrs. Sherri Ritchie","isCorrect":true,"inputTokens":5075,"outputTokens":204,"latencyMs":2858.9412090000696},{"questionId":"q185","format":"json-pretty","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":6452,"outputTokens":199,"latencyMs":3881.6359999999404},{"questionId":"q185","format":"json-compact","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":4045,"outputTokens":263,"latencyMs":20388.577374999877},{"questionId":"q185","format":"toon","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":2609,"outputTokens":199,"latencyMs":3270.6690000002272},{"questionId":"q185","format":"csv","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":2443,"outputTokens":3591,"latencyMs":29106.583208000287},{"questionId":"q185","format":"xml","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":7414,"outputTokens":391,"latencyMs":4287.198749999981},{"questionId":"q185","format":"yaml","model":"gpt-5-nano","expected":"7","actual":"7","isCorrect":true,"inputTokens":5072,"outputTokens":327,"latencyMs":3268.473958000075},{"questionId":"q186","format":"json-pretty","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":10804,"outputTokens":583,"latencyMs":5497.04287499981},{"questionId":"q186","format":"json-compact","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":6885,"outputTokens":519,"latencyMs":8948.515999999829},{"questionId":"q186","format":"toon","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":7322,"outputTokens":199,"latencyMs":4607.618165999651},{"questionId":"q186","format":"xml","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":12112,"outputTokens":263,"latencyMs":3848.347834000364},{"questionId":"q186","format":"yaml","model":"gpt-5-nano","expected":"50","actual":"50","isCorrect":true,"inputTokens":8436,"outputTokens":263,"latencyMs":3796.977042000275},{"questionId":"q187","format":"json-pretty","model":"gpt-5-nano","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orders","isCorrect":false,"inputTokens":10811,"outputTokens":455,"latencyMs":5749.156665999908},{"questionId":"q187","format":"json-compact","model":"gpt-5-nano","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":6892,"outputTokens":725,"latencyMs":8672.660540999845},{"questionId":"q187","format":"toon","model":"gpt-5-nano","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":7329,"outputTokens":533,"latencyMs":5036.442500000354},{"questionId":"q187","format":"xml","model":"gpt-5-nano","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":12119,"outputTokens":405,"latencyMs":4477.925249999855},{"questionId":"q187","format":"yaml","model":"gpt-5-nano","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":8443,"outputTokens":533,"latencyMs":5181.2483749999665},{"questionId":"q188","format":"json-pretty","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":10807,"outputTokens":263,"latencyMs":4242.422333000228},{"questionId":"q188","format":"json-compact","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":6888,"outputTokens":519,"latencyMs":5718.889750000089},{"questionId":"q188","format":"toon","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":7325,"outputTokens":327,"latencyMs":5340.791166000068},{"questionId":"q188","format":"xml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":12115,"outputTokens":967,"latencyMs":9309.043292000424},{"questionId":"q188","format":"yaml","model":"gpt-5-nano","expected":"4","actual":"4","isCorrect":true,"inputTokens":8439,"outputTokens":583,"latencyMs":6073.824750000145},{"questionId":"q189","format":"json-pretty","model":"gpt-5-nano","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":10812,"outputTokens":140,"latencyMs":4690.233665999956},{"questionId":"q189","format":"json-compact","model":"gpt-5-nano","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":6893,"outputTokens":268,"latencyMs":5466.577833000105},{"questionId":"q189","format":"toon","model":"gpt-5-nano","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":7330,"outputTokens":204,"latencyMs":2964.15870800009},{"questionId":"q189","format":"xml","model":"gpt-5-nano","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":12120,"outputTokens":332,"latencyMs":4515.02179200016},{"questionId":"q189","format":"yaml","model":"gpt-5-nano","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":8444,"outputTokens":268,"latencyMs":8201.164624999743},{"questionId":"q190","format":"json-pretty","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":10808,"outputTokens":200,"latencyMs":4878.779084000271},{"questionId":"q190","format":"json-compact","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":6889,"outputTokens":200,"latencyMs":3476.9356669997796},{"questionId":"q190","format":"toon","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":7326,"outputTokens":200,"latencyMs":2631.7864590003155},{"questionId":"q190","format":"xml","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":12116,"outputTokens":200,"latencyMs":3225.6047499999404},{"questionId":"q190","format":"yaml","model":"gpt-5-nano","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8440,"outputTokens":264,"latencyMs":6446.187125000171},{"questionId":"q191","format":"json-pretty","model":"gpt-5-nano","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":10813,"outputTokens":395,"latencyMs":4756.288000000175},{"questionId":"q191","format":"json-compact","model":"gpt-5-nano","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":6894,"outputTokens":267,"latencyMs":4393.102415999863},{"questionId":"q191","format":"toon","model":"gpt-5-nano","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":7331,"outputTokens":267,"latencyMs":5854.2481249999255},{"questionId":"q191","format":"xml","model":"gpt-5-nano","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":12121,"outputTokens":267,"latencyMs":6437.172999999952},{"questionId":"q191","format":"yaml","model":"gpt-5-nano","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":8445,"outputTokens":203,"latencyMs":3253.0284589999355},{"questionId":"q192","format":"json-pretty","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":3768,"outputTokens":839,"latencyMs":7548.902958999854},{"questionId":"q192","format":"json-compact","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":2441,"outputTokens":647,"latencyMs":8269.41454199981},{"questionId":"q192","format":"toon","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":1639,"outputTokens":263,"latencyMs":8128.112708000001},{"questionId":"q192","format":"csv","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":1497,"outputTokens":967,"latencyMs":14334.201832999941},{"questionId":"q192","format":"xml","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":4474,"outputTokens":903,"latencyMs":11843.660625000019},{"questionId":"q192","format":"yaml","model":"gpt-5-nano","expected":"60","actual":"60","isCorrect":true,"inputTokens":3039,"outputTokens":839,"latencyMs":12383.660041000228},{"questionId":"q193","format":"json-pretty","model":"gpt-5-nano","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":3772,"outputTokens":212,"latencyMs":3716.6943749999627},{"questionId":"q193","format":"json-compact","model":"gpt-5-nano","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":2445,"outputTokens":276,"latencyMs":4321.190417000093},{"questionId":"q193","format":"toon","model":"gpt-5-nano","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":1643,"outputTokens":276,"latencyMs":3628.611583000049},{"questionId":"q193","format":"csv","model":"gpt-5-nano","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":1501,"outputTokens":276,"latencyMs":3329.8520830003545},{"questionId":"q193","format":"xml","model":"gpt-5-nano","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":4478,"outputTokens":276,"latencyMs":8496.843958000187},{"questionId":"q193","format":"yaml","model":"gpt-5-nano","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":3043,"outputTokens":340,"latencyMs":4394.631957999896},{"questionId":"q194","format":"json-pretty","model":"gpt-5-nano","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":3771,"outputTokens":392,"latencyMs":7423.73466600012},{"questionId":"q194","format":"json-compact","model":"gpt-5-nano","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":2444,"outputTokens":584,"latencyMs":5982.649750000332},{"questionId":"q194","format":"toon","model":"gpt-5-nano","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":1642,"outputTokens":200,"latencyMs":4239.526542000007},{"questionId":"q194","format":"csv","model":"gpt-5-nano","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":1500,"outputTokens":200,"latencyMs":5031.122084000148},{"questionId":"q194","format":"xml","model":"gpt-5-nano","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":4477,"outputTokens":264,"latencyMs":7930.7731669996865},{"questionId":"q194","format":"yaml","model":"gpt-5-nano","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":3042,"outputTokens":264,"latencyMs":3106.251792000141},{"questionId":"q195","format":"json-pretty","model":"gpt-5-nano","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":3772,"outputTokens":524,"latencyMs":8153.412459000014},{"questionId":"q195","format":"json-compact","model":"gpt-5-nano","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":2445,"outputTokens":588,"latencyMs":7503.793999999762},{"questionId":"q195","format":"toon","model":"gpt-5-nano","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":1643,"outputTokens":204,"latencyMs":3050.2509169997647},{"questionId":"q195","format":"csv","model":"gpt-5-nano","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":1501,"outputTokens":140,"latencyMs":2617.243791999761},{"questionId":"q195","format":"xml","model":"gpt-5-nano","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":4478,"outputTokens":204,"latencyMs":3606.4029169999994},{"questionId":"q195","format":"yaml","model":"gpt-5-nano","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":3043,"outputTokens":140,"latencyMs":2635.89858300006},{"questionId":"q196","format":"json-pretty","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":3768,"outputTokens":199,"latencyMs":2703.3255830002017},{"questionId":"q196","format":"json-compact","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":2441,"outputTokens":327,"latencyMs":8638.098624999635},{"questionId":"q196","format":"toon","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":1639,"outputTokens":263,"latencyMs":5401.915333000012},{"questionId":"q196","format":"csv","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":1497,"outputTokens":327,"latencyMs":5221.692542000208},{"questionId":"q196","format":"xml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":4474,"outputTokens":263,"latencyMs":3620.0856249998324},{"questionId":"q196","format":"yaml","model":"gpt-5-nano","expected":"6","actual":"6","isCorrect":true,"inputTokens":3039,"outputTokens":263,"latencyMs":3699.8902920000255},{"questionId":"q197","format":"json-pretty","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":15248,"outputTokens":1415,"latencyMs":16647.309458999895},{"questionId":"q197","format":"json-compact","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":11555,"outputTokens":1415,"latencyMs":28783.25216699997},{"questionId":"q197","format":"toon","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":8869,"outputTokens":199,"latencyMs":2771.9329590001144},{"questionId":"q197","format":"csv","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":8617,"outputTokens":1543,"latencyMs":13179.85262500029},{"questionId":"q197","format":"xml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":17194,"outputTokens":2439,"latencyMs":22739.320625000168},{"questionId":"q197","format":"yaml","model":"gpt-5-nano","expected":"100","actual":"100","isCorrect":true,"inputTokens":13230,"outputTokens":6023,"latencyMs":51610.507999999914},{"questionId":"q198","format":"json-pretty","model":"gpt-5-nano","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":15253,"outputTokens":415,"latencyMs":4503.3462910000235},{"questionId":"q198","format":"json-compact","model":"gpt-5-nano","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":11560,"outputTokens":543,"latencyMs":5954.608874999918},{"questionId":"q198","format":"toon","model":"gpt-5-nano","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":8874,"outputTokens":351,"latencyMs":5119.701166999992},{"questionId":"q198","format":"csv","model":"gpt-5-nano","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":8622,"outputTokens":223,"latencyMs":3383.965749999974},{"questionId":"q198","format":"xml","model":"gpt-5-nano","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":17199,"outputTokens":351,"latencyMs":8255.69604199985},{"questionId":"q198","format":"yaml","model":"gpt-5-nano","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":13235,"outputTokens":415,"latencyMs":5688.796708000358},{"questionId":"q199","format":"json-pretty","model":"gpt-5-nano","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":15253,"outputTokens":713,"latencyMs":6935.332459000405},{"questionId":"q199","format":"json-compact","model":"gpt-5-nano","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":11560,"outputTokens":777,"latencyMs":7743.049292000011},{"questionId":"q199","format":"toon","model":"gpt-5-nano","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":8874,"outputTokens":329,"latencyMs":7626.734916999936},{"questionId":"q199","format":"csv","model":"gpt-5-nano","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":8622,"outputTokens":201,"latencyMs":3741.813084000256},{"questionId":"q199","format":"xml","model":"gpt-5-nano","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":17199,"outputTokens":329,"latencyMs":4252.712875000201},{"questionId":"q199","format":"yaml","model":"gpt-5-nano","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":13235,"outputTokens":329,"latencyMs":4048.160000000149},{"questionId":"q200","format":"json-pretty","model":"gpt-5-nano","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":15252,"outputTokens":265,"latencyMs":4140.997250000015},{"questionId":"q200","format":"json-compact","model":"gpt-5-nano","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":11559,"outputTokens":457,"latencyMs":4518.3047080002725},{"questionId":"q200","format":"toon","model":"gpt-5-nano","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":8873,"outputTokens":393,"latencyMs":4574.140791999642},{"questionId":"q200","format":"csv","model":"gpt-5-nano","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":8621,"outputTokens":585,"latencyMs":5762.444084000308},{"questionId":"q200","format":"xml","model":"gpt-5-nano","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":17198,"outputTokens":393,"latencyMs":4484.691707999911},{"questionId":"q200","format":"yaml","model":"gpt-5-nano","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":13234,"outputTokens":265,"latencyMs":4706.706375000067},{"questionId":"q201","format":"json-pretty","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":15249,"outputTokens":327,"latencyMs":3816.0244579999708},{"questionId":"q201","format":"json-compact","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":11556,"outputTokens":455,"latencyMs":6546.3870419999585},{"questionId":"q201","format":"toon","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":8870,"outputTokens":263,"latencyMs":4477.5936670000665},{"questionId":"q201","format":"csv","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":8618,"outputTokens":455,"latencyMs":5239.246875000186},{"questionId":"q201","format":"xml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":17195,"outputTokens":647,"latencyMs":7984.748166000005},{"questionId":"q201","format":"yaml","model":"gpt-5-nano","expected":"11","actual":"11","isCorrect":true,"inputTokens":13231,"outputTokens":263,"latencyMs":8407.612875000108},{"questionId":"q202","format":"json-pretty","model":"gpt-5-nano","expected":"75","actual":"75","isCorrect":true,"inputTokens":6889,"outputTokens":2951,"latencyMs":27031.601250000298},{"questionId":"q202","format":"json-compact","model":"gpt-5-nano","expected":"75","actual":"75","isCorrect":true,"inputTokens":4911,"outputTokens":10631,"latencyMs":81286.11437499989},{"questionId":"q202","format":"toon","model":"gpt-5-nano","expected":"75","actual":"75","isCorrect":true,"inputTokens":5890,"outputTokens":135,"latencyMs":3970.110458999872},{"questionId":"q202","format":"xml","model":"gpt-5-nano","expected":"75","actual":"58","isCorrect":false,"inputTokens":7799,"outputTokens":4871,"latencyMs":149805.12412499962},{"questionId":"q202","format":"yaml","model":"gpt-5-nano","expected":"75","actual":"100","isCorrect":false,"inputTokens":5917,"outputTokens":13703,"latencyMs":155739.1889589997},{"questionId":"q203","format":"json-pretty","model":"gpt-5-nano","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":6898,"outputTokens":466,"latencyMs":9108.469208000228},{"questionId":"q203","format":"json-compact","model":"gpt-5-nano","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":4920,"outputTokens":1106,"latencyMs":10545.836332999635},{"questionId":"q203","format":"toon","model":"gpt-5-nano","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":5899,"outputTokens":594,"latencyMs":8183.054624999873},{"questionId":"q203","format":"xml","model":"gpt-5-nano","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":7808,"outputTokens":1426,"latencyMs":11678.348749999888},{"questionId":"q203","format":"yaml","model":"gpt-5-nano","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":5926,"outputTokens":850,"latencyMs":8783.712416999973},{"questionId":"q204","format":"json-pretty","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":6893,"outputTokens":455,"latencyMs":4951.810749999713},{"questionId":"q204","format":"json-compact","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":4915,"outputTokens":583,"latencyMs":8194.983541999944},{"questionId":"q204","format":"toon","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":5894,"outputTokens":263,"latencyMs":8032.524958000053},{"questionId":"q204","format":"xml","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":7803,"outputTokens":263,"latencyMs":5646.691916999873},{"questionId":"q204","format":"yaml","model":"gpt-5-nano","expected":"info","actual":"info","isCorrect":true,"inputTokens":5921,"outputTokens":199,"latencyMs":3208.1868750001304},{"questionId":"q205","format":"json-pretty","model":"gpt-5-nano","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1378,"outputTokens":455,"latencyMs":5234.0922079999},{"questionId":"q205","format":"json-compact","model":"gpt-5-nano","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":891,"outputTokens":1159,"latencyMs":13605.088709000032},{"questionId":"q205","format":"toon","model":"gpt-5-nano","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":639,"outputTokens":711,"latencyMs":6515.72516599996},{"questionId":"q205","format":"csv","model":"gpt-5-nano","expected":"YES","actual":"NO","isCorrect":false,"inputTokens":587,"outputTokens":1479,"latencyMs":14114.901416999754},{"questionId":"q205","format":"xml","model":"gpt-5-nano","expected":"YES","actual":"NO","isCorrect":false,"inputTokens":1561,"outputTokens":327,"latencyMs":5021.389500000048},{"questionId":"q205","format":"yaml","model":"gpt-5-nano","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1097,"outputTokens":455,"latencyMs":5526.448458000086},{"questionId":"q206","format":"json-pretty","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1192,"outputTokens":647,"latencyMs":6677.833459000103},{"questionId":"q206","format":"json-compact","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":777,"outputTokens":455,"latencyMs":5277.759208000265},{"questionId":"q206","format":"toon","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":569,"outputTokens":1543,"latencyMs":16482.88245799998},{"questionId":"q206","format":"csv","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":520,"outputTokens":647,"latencyMs":8937.574082999956},{"questionId":"q206","format":"xml","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1345,"outputTokens":391,"latencyMs":5002.396875000093},{"questionId":"q206","format":"yaml","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":953,"outputTokens":711,"latencyMs":6825.064792000223},{"questionId":"q207","format":"json-pretty","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1572,"outputTokens":1351,"latencyMs":11435.325457999948},{"questionId":"q207","format":"json-compact","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1013,"outputTokens":1671,"latencyMs":17690.187415999826},{"questionId":"q207","format":"toon","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":716,"outputTokens":3399,"latencyMs":31119.575375000015},{"questionId":"q207","format":"csv","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":661,"outputTokens":1031,"latencyMs":13081.246334000025},{"questionId":"q207","format":"xml","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1786,"outputTokens":455,"latencyMs":8677.37220800016},{"questionId":"q207","format":"yaml","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1251,"outputTokens":903,"latencyMs":8989.367707999889},{"questionId":"q208","format":"json-pretty","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1370,"outputTokens":775,"latencyMs":7549.4047079999},{"questionId":"q208","format":"json-compact","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":886,"outputTokens":1095,"latencyMs":9804.55941700004},{"questionId":"q208","format":"toon","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1114,"outputTokens":967,"latencyMs":8704.191707999911},{"questionId":"q208","format":"csv","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":584,"outputTokens":519,"latencyMs":8088.646707999986},{"questionId":"q208","format":"xml","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1552,"outputTokens":583,"latencyMs":8100.714499999769},{"questionId":"q208","format":"yaml","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1090,"outputTokens":967,"latencyMs":8288.674542000052},{"questionId":"q209","format":"json-pretty","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1326,"outputTokens":647,"latencyMs":6657.2364159999415},{"questionId":"q209","format":"json-compact","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":851,"outputTokens":1159,"latencyMs":13106.269707999658},{"questionId":"q209","format":"toon","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1078,"outputTokens":1351,"latencyMs":22118.10566599993},{"questionId":"q209","format":"csv","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":424,"outputTokens":1031,"latencyMs":10323.110000000335},{"questionId":"q209","format":"xml","model":"gpt-5-nano","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1505,"outputTokens":455,"latencyMs":5428.757249999791},{"questionId":"q209","format":"yaml","model":"gpt-5-nano","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1054,"outputTokens":1671,"latencyMs":15239.667082999833}] ================================================ FILE: benchmarks/results/accuracy/models/grok-4-1-fast-non-reasoning ================================================ [{"questionId":"q1","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":6531,"outputTokens":2,"latencyMs":744.4004170000003},{"questionId":"q1","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":4122,"outputTokens":2,"latencyMs":624.5607500000001},{"questionId":"q1","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":2701,"outputTokens":2,"latencyMs":783.8468750000002},{"questionId":"q1","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":2538,"outputTokens":2,"latencyMs":612.0763750000001},{"questionId":"q1","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":7470,"outputTokens":2,"latencyMs":870.1430420000002},{"questionId":"q1","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"146288","actual":"146288","isCorrect":true,"inputTokens":5170,"outputTokens":2,"latencyMs":606.9509579999999},{"questionId":"q2","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":6531,"outputTokens":1,"latencyMs":873.3557919999998},{"questionId":"q2","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":4122,"outputTokens":1,"latencyMs":613.800542},{"questionId":"q2","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":2701,"outputTokens":1,"latencyMs":754.9637910000001},{"questionId":"q2","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":2538,"outputTokens":1,"latencyMs":867.420541},{"questionId":"q2","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":7470,"outputTokens":1,"latencyMs":817.9377920000002},{"questionId":"q2","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"Marketing","actual":"Marketing","isCorrect":true,"inputTokens":5170,"outputTokens":1,"latencyMs":469.9504579999998},{"questionId":"q3","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":6531,"outputTokens":7,"latencyMs":661.1184170000001},{"questionId":"q3","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":4122,"outputTokens":7,"latencyMs":488.89708299999984},{"questionId":"q3","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":2701,"outputTokens":7,"latencyMs":441.70254200000045},{"questionId":"q3","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":2538,"outputTokens":7,"latencyMs":532.721708},{"questionId":"q3","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":7470,"outputTokens":7,"latencyMs":634.2196670000003},{"questionId":"q3","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"ramon.wiegand@hotmail.com","actual":"ramon.wiegand@hotmail.com","isCorrect":true,"inputTokens":5170,"outputTokens":7,"latencyMs":696.9583749999997},{"questionId":"q4","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":13224,"outputTokens":1,"latencyMs":917.3097500000003},{"questionId":"q4","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":4123,"outputTokens":1,"latencyMs":716.4699999999998},{"questionId":"q4","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":2702,"outputTokens":1,"latencyMs":580.384791},{"questionId":"q4","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":2539,"outputTokens":1,"latencyMs":416.5690419999996},{"questionId":"q4","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":7471,"outputTokens":1,"latencyMs":634.5267919999997},{"questionId":"q4","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":5171,"outputTokens":1,"latencyMs":643.9792089999996},{"questionId":"q5","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":6529,"outputTokens":1,"latencyMs":702.2764589999997},{"questionId":"q5","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":4120,"outputTokens":1,"latencyMs":659.5474580000009},{"questionId":"q5","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":2699,"outputTokens":1,"latencyMs":577.9544170000008},{"questionId":"q5","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"yes","isCorrect":true,"inputTokens":2536,"outputTokens":1,"latencyMs":465.2838330000013},{"questionId":"q5","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":7468,"outputTokens":1,"latencyMs":634.7964580000043},{"questionId":"q5","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":5168,"outputTokens":1,"latencyMs":668.1913339999955},{"questionId":"q6","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":6531,"outputTokens":2,"latencyMs":643.9961659999972},{"questionId":"q6","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":4122,"outputTokens":2,"latencyMs":464.65929199999664},{"questionId":"q6","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":2701,"outputTokens":2,"latencyMs":466.872292},{"questionId":"q6","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":2538,"outputTokens":2,"latencyMs":491.4008329999997},{"questionId":"q6","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":7470,"outputTokens":2,"latencyMs":607.0218329999989},{"questionId":"q6","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"114426","actual":"114426","isCorrect":true,"inputTokens":5170,"outputTokens":2,"latencyMs":450.4032909999951},{"questionId":"q7","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":6533,"outputTokens":1,"latencyMs":521.4899160000059},{"questionId":"q7","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":4124,"outputTokens":1,"latencyMs":594.7228749999995},{"questionId":"q7","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":2703,"outputTokens":1,"latencyMs":423.5501250000016},{"questionId":"q7","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":2540,"outputTokens":1,"latencyMs":449.349000000002},{"questionId":"q7","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":7472,"outputTokens":1,"latencyMs":562.4655000000057},{"questionId":"q7","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"Engineering","actual":"Engineering","isCorrect":true,"inputTokens":5172,"outputTokens":1,"latencyMs":422.5996249999953},{"questionId":"q8","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":6532,"outputTokens":6,"latencyMs":404.39295899999706},{"questionId":"q8","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":4123,"outputTokens":6,"latencyMs":529.7585000000036},{"questionId":"q8","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":2702,"outputTokens":6,"latencyMs":498.1508329999997},{"questionId":"q8","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":2539,"outputTokens":6,"latencyMs":432.265790999998},{"questionId":"q8","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":7471,"outputTokens":6,"latencyMs":743.6265419999982},{"questionId":"q8","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"evan_metz@yahoo.com","actual":"evan_metz@yahoo.com","isCorrect":true,"inputTokens":10502,"outputTokens":6,"latencyMs":817.4960830000055},{"questionId":"q9","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":6534,"outputTokens":1,"latencyMs":584.2985830000034},{"questionId":"q9","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":4125,"outputTokens":1,"latencyMs":439.3900419999991},{"questionId":"q9","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":2704,"outputTokens":1,"latencyMs":584.6250419999997},{"questionId":"q9","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":2541,"outputTokens":1,"latencyMs":581.2358340000064},{"questionId":"q9","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":7473,"outputTokens":1,"latencyMs":857.8352500000037},{"questionId":"q9","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":5173,"outputTokens":1,"latencyMs":741.203874999992},{"questionId":"q10","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":6529,"outputTokens":1,"latencyMs":601.8820419999975},{"questionId":"q10","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":4120,"outputTokens":1,"latencyMs":546.5664580000011},{"questionId":"q10","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":2699,"outputTokens":1,"latencyMs":782.0337089999957},{"questionId":"q10","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"1","isCorrect":true,"inputTokens":2536,"outputTokens":1,"latencyMs":550.6735000000044},{"questionId":"q10","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":7468,"outputTokens":1,"latencyMs":806.8262499999983},{"questionId":"q10","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"yes","actual":"true","isCorrect":true,"inputTokens":5168,"outputTokens":1,"latencyMs":495.4067090000026},{"questionId":"q11","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":13220,"outputTokens":2,"latencyMs":981.7749999999942},{"questionId":"q11","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":4121,"outputTokens":2,"latencyMs":564.8935420000053},{"questionId":"q11","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":2700,"outputTokens":2,"latencyMs":705.6945840000117},{"questionId":"q11","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":2537,"outputTokens":2,"latencyMs":444.15466699999524},{"questionId":"q11","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":7469,"outputTokens":2,"latencyMs":714.6199590000033},{"questionId":"q11","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"67757","actual":"67757","isCorrect":true,"inputTokens":5169,"outputTokens":2,"latencyMs":391.2693750000035},{"questionId":"q12","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":6532,"outputTokens":1,"latencyMs":561.0343339999963},{"questionId":"q12","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":4123,"outputTokens":1,"latencyMs":384.5078749999957},{"questionId":"q12","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":2702,"outputTokens":1,"latencyMs":377.22441699999035},{"questionId":"q12","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":2539,"outputTokens":1,"latencyMs":451.06324999999197},{"questionId":"q12","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":7471,"outputTokens":1,"latencyMs":577.9445830000041},{"questionId":"q12","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"Operations","actual":"Operations","isCorrect":true,"inputTokens":5171,"outputTokens":1,"latencyMs":498.8087499999965},{"questionId":"q13","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"10","isCorrect":false,"inputTokens":6528,"outputTokens":1,"latencyMs":631.3149169999961},{"questionId":"q13","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"12","isCorrect":false,"inputTokens":4119,"outputTokens":1,"latencyMs":493.6619999999966},{"questionId":"q13","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":2698,"outputTokens":1,"latencyMs":491.4699580000015},{"questionId":"q13","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"16","isCorrect":false,"inputTokens":2535,"outputTokens":1,"latencyMs":704.4415830000071},{"questionId":"q13","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":7467,"outputTokens":1,"latencyMs":853.990792000026},{"questionId":"q13","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"12","isCorrect":false,"inputTokens":5167,"outputTokens":1,"latencyMs":6325.531707999995},{"questionId":"q14","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":6528,"outputTokens":1,"latencyMs":825.4149170000164},{"questionId":"q14","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":4119,"outputTokens":1,"latencyMs":1838.9603329999954},{"questionId":"q14","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":2698,"outputTokens":1,"latencyMs":546.789208000002},{"questionId":"q14","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":2535,"outputTokens":1,"latencyMs":487.18799999999464},{"questionId":"q14","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":7467,"outputTokens":1,"latencyMs":719.2681250000023},{"questionId":"q14","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":5167,"outputTokens":1,"latencyMs":620.3604580000101},{"questionId":"q15","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"16","isCorrect":false,"inputTokens":6528,"outputTokens":1,"latencyMs":702.3599169999943},{"questionId":"q15","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"14","isCorrect":false,"inputTokens":4119,"outputTokens":1,"latencyMs":1350.6427919999987},{"questionId":"q15","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"20","isCorrect":false,"inputTokens":2698,"outputTokens":1,"latencyMs":445.94154200001503},{"questionId":"q15","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"25","isCorrect":false,"inputTokens":2535,"outputTokens":1,"latencyMs":430.4858330000134},{"questionId":"q15","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"16","isCorrect":false,"inputTokens":7467,"outputTokens":1,"latencyMs":764.09283400001},{"questionId":"q15","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"17","actual":"16","isCorrect":false,"inputTokens":5167,"outputTokens":1,"latencyMs":718.1185830000031},{"questionId":"q16","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"86","actual":"95","isCorrect":false,"inputTokens":6533,"outputTokens":1,"latencyMs":640.624833000009},{"questionId":"q16","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"86","actual":"92","isCorrect":false,"inputTokens":4124,"outputTokens":1,"latencyMs":518.7864580000169},{"questionId":"q16","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"86","actual":"95","isCorrect":false,"inputTokens":2703,"outputTokens":1,"latencyMs":505.09270800001104},{"questionId":"q16","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"86","actual":"95","isCorrect":false,"inputTokens":2540,"outputTokens":1,"latencyMs":7579.916916999995},{"questionId":"q16","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"86","actual":"95","isCorrect":false,"inputTokens":7472,"outputTokens":1,"latencyMs":826.4459579999966},{"questionId":"q16","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"86","actual":"95","isCorrect":false,"inputTokens":5172,"outputTokens":1,"latencyMs":513.2143330000108},{"questionId":"q17","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"65","actual":"67","isCorrect":false,"inputTokens":6533,"outputTokens":1,"latencyMs":521.1675840000098},{"questionId":"q17","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"65","actual":"68","isCorrect":false,"inputTokens":4124,"outputTokens":1,"latencyMs":657.3941669999913},{"questionId":"q17","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"65","actual":"68","isCorrect":false,"inputTokens":2703,"outputTokens":1,"latencyMs":427.2082079999964},{"questionId":"q17","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"65","actual":"67","isCorrect":false,"inputTokens":2540,"outputTokens":1,"latencyMs":500.1402919999964},{"questionId":"q17","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"65","actual":"67","isCorrect":false,"inputTokens":7472,"outputTokens":1,"latencyMs":624.4298750000016},{"questionId":"q17","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"65","actual":"67","isCorrect":false,"inputTokens":5172,"outputTokens":1,"latencyMs":532.2810839999875},{"questionId":"q18","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"47","actual":"42","isCorrect":false,"inputTokens":6533,"outputTokens":1,"latencyMs":725.5792080000101},{"questionId":"q18","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"47","actual":"52","isCorrect":false,"inputTokens":4124,"outputTokens":1,"latencyMs":518.0593750000116},{"questionId":"q18","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"47","actual":"58","isCorrect":false,"inputTokens":2703,"outputTokens":1,"latencyMs":525.0036670000118},{"questionId":"q18","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"47","actual":"58","isCorrect":false,"inputTokens":2540,"outputTokens":1,"latencyMs":479.3065420000057},{"questionId":"q18","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"47","actual":"47","isCorrect":true,"inputTokens":7472,"outputTokens":1,"latencyMs":705.658167000016},{"questionId":"q18","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"47","actual":"45","isCorrect":false,"inputTokens":5172,"outputTokens":1,"latencyMs":866.4303749999963},{"questionId":"q19","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":6529,"outputTokens":1,"latencyMs":585.0571249999921},{"questionId":"q19","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":4120,"outputTokens":1,"latencyMs":741.8611669999955},{"questionId":"q19","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":2699,"outputTokens":1,"latencyMs":362.5301249999902},{"questionId":"q19","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":2536,"outputTokens":1,"latencyMs":432.6569170000148},{"questionId":"q19","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":7468,"outputTokens":1,"latencyMs":545.3256669999973},{"questionId":"q19","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":5168,"outputTokens":1,"latencyMs":572.6263749999925},{"questionId":"q20","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"96825","actual":"104140","isCorrect":false,"inputTokens":6530,"outputTokens":2,"latencyMs":744.2125410000153},{"questionId":"q20","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"96825","actual":"{\"averageSalary\":102382}\n\n## Explanation\n\nTo calculate the average salary across all employees, the process involves these steps:\n\n1. Extract the salary values from the employees array, yielding 100 numeric values.\n2. Sum these values: total = 10238200.\n3. Divide the total by the number of employees: 10238200 ÷ 100 = 102382.\n4. The result is the average salary, output as a compact JSON object with the key \"averageSalary\".","isCorrect":false,"inputTokens":4121,"outputTokens":100,"latencyMs":1392.9901250000112},{"questionId":"q20","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"96825","actual":"104878","isCorrect":false,"inputTokens":2700,"outputTokens":2,"latencyMs":434.17908299999544},{"questionId":"q20","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"96825","actual":"100492","isCorrect":false,"inputTokens":2537,"outputTokens":2,"latencyMs":572.8373330000031},{"questionId":"q20","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"96825","actual":"99999","isCorrect":false,"inputTokens":7469,"outputTokens":2,"latencyMs":734.3059579999826},{"questionId":"q20","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"96825","actual":"104122","isCorrect":false,"inputTokens":5169,"outputTokens":2,"latencyMs":486.9180000000051},{"questionId":"q21","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"79","actual":"76","isCorrect":false,"inputTokens":6527,"outputTokens":1,"latencyMs":650.6604170000064},{"questionId":"q21","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"79","actual":"73","isCorrect":false,"inputTokens":4118,"outputTokens":1,"latencyMs":552.6554160000233},{"questionId":"q21","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"79","actual":"72","isCorrect":false,"inputTokens":2697,"outputTokens":1,"latencyMs":411.1337080000085},{"questionId":"q21","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"79","actual":"62","isCorrect":false,"inputTokens":2534,"outputTokens":1,"latencyMs":448.09316699998453},{"questionId":"q21","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"79","actual":"77","isCorrect":false,"inputTokens":7466,"outputTokens":1,"latencyMs":559.984334000008},{"questionId":"q21","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"79","actual":"77","isCorrect":false,"inputTokens":5166,"outputTokens":1,"latencyMs":555.7300839999807},{"questionId":"q22","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"21","actual":"20","isCorrect":false,"inputTokens":6527,"outputTokens":1,"latencyMs":603.9903330000234},{"questionId":"q22","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"21","actual":"22","isCorrect":false,"inputTokens":4118,"outputTokens":1,"latencyMs":553.2302919999929},{"questionId":"q22","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"21","actual":"20","isCorrect":false,"inputTokens":2697,"outputTokens":1,"latencyMs":538.6504580000183},{"questionId":"q22","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"21","actual":"26","isCorrect":false,"inputTokens":2534,"outputTokens":1,"latencyMs":661.0164169999771},{"questionId":"q22","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"21","actual":"20","isCorrect":false,"inputTokens":7466,"outputTokens":1,"latencyMs":648.4302499999758},{"questionId":"q22","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"21","actual":"20","isCorrect":false,"inputTokens":5166,"outputTokens":1,"latencyMs":510.7701249999809},{"questionId":"q23","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":6535,"outputTokens":1,"latencyMs":678.8493340000277},{"questionId":"q23","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"14","isCorrect":false,"inputTokens":4126,"outputTokens":1,"latencyMs":530.6694580000476},{"questionId":"q23","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"19","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":557.4721249999711},{"questionId":"q23","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"20","isCorrect":false,"inputTokens":2542,"outputTokens":1,"latencyMs":415.67420799995307},{"questionId":"q23","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"18","isCorrect":false,"inputTokens":7474,"outputTokens":1,"latencyMs":697.5731660000165},{"questionId":"q23","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"11","isCorrect":false,"inputTokens":5174,"outputTokens":1,"latencyMs":693.6044589999947},{"questionId":"q24","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"12","isCorrect":false,"inputTokens":6535,"outputTokens":1,"latencyMs":762.7310419999994},{"questionId":"q24","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"12","isCorrect":false,"inputTokens":4126,"outputTokens":1,"latencyMs":543.1126659999718},{"questionId":"q24","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"16","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":471.44666600000346},{"questionId":"q24","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"7","isCorrect":false,"inputTokens":2542,"outputTokens":1,"latencyMs":460.0599579999689},{"questionId":"q24","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"18","isCorrect":false,"inputTokens":7474,"outputTokens":1,"latencyMs":558.4079590000329},{"questionId":"q24","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"18","isCorrect":false,"inputTokens":5174,"outputTokens":1,"latencyMs":525.0044999999809},{"questionId":"q25","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"11","isCorrect":false,"inputTokens":6535,"outputTokens":1,"latencyMs":656.2660419999738},{"questionId":"q25","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"12","isCorrect":false,"inputTokens":4126,"outputTokens":1,"latencyMs":477.75929100002395},{"questionId":"q25","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"12","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":491.8847079999978},{"questionId":"q25","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"15","isCorrect":false,"inputTokens":2542,"outputTokens":1,"latencyMs":492.8086250000051},{"questionId":"q25","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"21","isCorrect":false,"inputTokens":7474,"outputTokens":1,"latencyMs":499.74279200000456},{"questionId":"q25","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"20","isCorrect":false,"inputTokens":5174,"outputTokens":1,"latencyMs":349.43766699999105},{"questionId":"q26","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"5","isCorrect":false,"inputTokens":6535,"outputTokens":1,"latencyMs":724.9569999999949},{"questionId":"q26","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"7","isCorrect":false,"inputTokens":4126,"outputTokens":1,"latencyMs":587.3300000000163},{"questionId":"q26","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"8","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":540.6909169999999},{"questionId":"q26","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"9","isCorrect":false,"inputTokens":2542,"outputTokens":1,"latencyMs":630.7571250000037},{"questionId":"q26","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"8","isCorrect":false,"inputTokens":7474,"outputTokens":1,"latencyMs":892.6946670000325},{"questionId":"q26","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"7","isCorrect":false,"inputTokens":5174,"outputTokens":1,"latencyMs":529.9295830000192},{"questionId":"q27","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"11","isCorrect":false,"inputTokens":6535,"outputTokens":1,"latencyMs":884.9784160000272},{"questionId":"q27","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":4126,"outputTokens":1,"latencyMs":539.7102500000037},{"questionId":"q27","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"20","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":537.8525420000078},{"questionId":"q27","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"14","isCorrect":false,"inputTokens":2542,"outputTokens":1,"latencyMs":570.3273330000229},{"questionId":"q27","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":7474,"outputTokens":1,"latencyMs":635.3287920000148},{"questionId":"q27","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"11","isCorrect":false,"inputTokens":5174,"outputTokens":1,"latencyMs":625.2615409999853},{"questionId":"q28","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"68","isCorrect":false,"inputTokens":6534,"outputTokens":1,"latencyMs":785.9446250000037},{"questionId":"q28","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"68","isCorrect":false,"inputTokens":4125,"outputTokens":1,"latencyMs":446.27083399996627},{"questionId":"q28","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"83","isCorrect":false,"inputTokens":2704,"outputTokens":1,"latencyMs":386.73541600001045},{"questionId":"q28","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"73","isCorrect":false,"inputTokens":2541,"outputTokens":1,"latencyMs":529.6602500000154},{"questionId":"q28","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"78","isCorrect":false,"inputTokens":7473,"outputTokens":1,"latencyMs":479.33741700003156},{"questionId":"q28","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"80","isCorrect":false,"inputTokens":5173,"outputTokens":1,"latencyMs":428.09250000002794},{"questionId":"q29","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"48","actual":"58","isCorrect":false,"inputTokens":6534,"outputTokens":1,"latencyMs":678.0827500000014},{"questionId":"q29","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"48","actual":"55","isCorrect":false,"inputTokens":4125,"outputTokens":1,"latencyMs":473.91145799995866},{"questionId":"q29","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"48","actual":"67","isCorrect":false,"inputTokens":2704,"outputTokens":1,"latencyMs":477.66345900000306},{"questionId":"q29","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"48","actual":"58","isCorrect":false,"inputTokens":2541,"outputTokens":1,"latencyMs":480.2849580000038},{"questionId":"q29","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"48","actual":"58","isCorrect":false,"inputTokens":7473,"outputTokens":1,"latencyMs":625.3212920000078},{"questionId":"q29","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"48","actual":"62","isCorrect":false,"inputTokens":5173,"outputTokens":1,"latencyMs":372.15625},{"questionId":"q30","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"36","actual":"35","isCorrect":false,"inputTokens":6534,"outputTokens":1,"latencyMs":681.7562080000062},{"questionId":"q30","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"36","actual":"38","isCorrect":false,"inputTokens":4125,"outputTokens":1,"latencyMs":520.1463329999824},{"questionId":"q30","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"36","actual":"38","isCorrect":false,"inputTokens":2704,"outputTokens":1,"latencyMs":605.6678340000217},{"questionId":"q30","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"36","actual":"42","isCorrect":false,"inputTokens":2541,"outputTokens":1,"latencyMs":566.4944160000305},{"questionId":"q30","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"36","actual":"25","isCorrect":false,"inputTokens":7473,"outputTokens":1,"latencyMs":716.9994169999845},{"questionId":"q30","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"36","actual":"35","isCorrect":false,"inputTokens":5173,"outputTokens":1,"latencyMs":488.8819159999839},{"questionId":"q31","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":6535,"outputTokens":1,"latencyMs":731.7278750000405},{"questionId":"q31","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":4126,"outputTokens":1,"latencyMs":555.083666999999},{"questionId":"q31","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"25","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":1560.6482499999693},{"questionId":"q31","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"20","isCorrect":false,"inputTokens":2542,"outputTokens":1,"latencyMs":513.6154170000227},{"questionId":"q31","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":7474,"outputTokens":1,"latencyMs":837.3640000000014},{"questionId":"q31","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":5174,"outputTokens":1,"latencyMs":388.5099170000176},{"questionId":"q32","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"12","isCorrect":false,"inputTokens":6535,"outputTokens":1,"latencyMs":704.1717499999795},{"questionId":"q32","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"12","isCorrect":false,"inputTokens":4126,"outputTokens":1,"latencyMs":740.1301250000251},{"questionId":"q32","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"22","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":678.28216599999},{"questionId":"q32","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"15","isCorrect":false,"inputTokens":2542,"outputTokens":1,"latencyMs":406.4673330000369},{"questionId":"q32","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"12","isCorrect":false,"inputTokens":7474,"outputTokens":1,"latencyMs":760.0417080000043},{"questionId":"q32","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"12","isCorrect":false,"inputTokens":5174,"outputTokens":1,"latencyMs":471.80441599996993},{"questionId":"q33","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":6535,"outputTokens":1,"latencyMs":628.7932499999879},{"questionId":"q33","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":4126,"outputTokens":1,"latencyMs":338.47966599999927},{"questionId":"q33","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"25","isCorrect":false,"inputTokens":2705,"outputTokens":1,"latencyMs":349.7202920000418},{"questionId":"q33","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":2542,"outputTokens":1,"latencyMs":434.40783300000476},{"questionId":"q33","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"12","isCorrect":true,"inputTokens":7474,"outputTokens":1,"latencyMs":792.124582999968},{"questionId":"q33","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"12","actual":"20","isCorrect":false,"inputTokens":5174,"outputTokens":1,"latencyMs":536.3979579999577},{"questionId":"q34","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"12","isCorrect":false,"inputTokens":6529,"outputTokens":1,"latencyMs":789.2198330000392},{"questionId":"q34","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"12","isCorrect":false,"inputTokens":4120,"outputTokens":1,"latencyMs":484.61395800003083},{"questionId":"q34","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"16","isCorrect":false,"inputTokens":2699,"outputTokens":1,"latencyMs":553.2415829999954},{"questionId":"q34","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"14","isCorrect":true,"inputTokens":2536,"outputTokens":1,"latencyMs":984.8130420000525},{"questionId":"q34","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"12","isCorrect":false,"inputTokens":7468,"outputTokens":1,"latencyMs":783.877790999948},{"questionId":"q34","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"14","isCorrect":true,"inputTokens":5168,"outputTokens":1,"latencyMs":548.6428749999614},{"questionId":"q35","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"20","isCorrect":false,"inputTokens":6529,"outputTokens":1,"latencyMs":579.86837500002},{"questionId":"q35","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"16","isCorrect":false,"inputTokens":4120,"outputTokens":1,"latencyMs":550.2779159999918},{"questionId":"q35","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"23","isCorrect":false,"inputTokens":2699,"outputTokens":1,"latencyMs":1089.9340000000084},{"questionId":"q35","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"15","isCorrect":false,"inputTokens":2536,"outputTokens":1,"latencyMs":590.8242079999764},{"questionId":"q35","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"25","isCorrect":false,"inputTokens":7468,"outputTokens":1,"latencyMs":731.3217500000028},{"questionId":"q35","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"14","actual":"25","isCorrect":false,"inputTokens":5168,"outputTokens":1,"latencyMs":576.3463750000228},{"questionId":"q36","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":11530,"outputTokens":3,"latencyMs":956.360708000022},{"questionId":"q36","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":7304,"outputTokens":3,"latencyMs":941.193499999994},{"questionId":"q36","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":7622,"outputTokens":3,"latencyMs":624.192958999949},{"questionId":"q36","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":12949,"outputTokens":3,"latencyMs":1550.606792000006},{"questionId":"q36","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"806.24","actual":"806.24","isCorrect":true,"inputTokens":8967,"outputTokens":3,"latencyMs":1126.5071250000037},{"questionId":"q37","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":11530,"outputTokens":2,"latencyMs":706.5146250000107},{"questionId":"q37","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":7304,"outputTokens":2,"latencyMs":848.0497500000056},{"questionId":"q37","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":7622,"outputTokens":2,"latencyMs":688.9195419999887},{"questionId":"q37","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":12949,"outputTokens":2,"latencyMs":665.4055830000434},{"questionId":"q37","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"shipped","actual":"shipped","isCorrect":true,"inputTokens":8967,"outputTokens":2,"latencyMs":726.3700830000453},{"questionId":"q38","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":11530,"outputTokens":3,"latencyMs":1015.0052920000162},{"questionId":"q38","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":7304,"outputTokens":3,"latencyMs":570.882500000007},{"questionId":"q38","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":7622,"outputTokens":3,"latencyMs":873.4677500000107},{"questionId":"q38","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":12949,"outputTokens":3,"latencyMs":895.0652910000063},{"questionId":"q38","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"970.81","actual":"970.81","isCorrect":true,"inputTokens":8967,"outputTokens":3,"latencyMs":679.0187089999672},{"questionId":"q39","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":11530,"outputTokens":1,"latencyMs":834.1685419999994},{"questionId":"q39","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":7304,"outputTokens":1,"latencyMs":819.2685000000056},{"questionId":"q39","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":7622,"outputTokens":1,"latencyMs":743.3259170000674},{"questionId":"q39","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":12949,"outputTokens":1,"latencyMs":866.9567500000121},{"questionId":"q39","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"processing","actual":"processing","isCorrect":true,"inputTokens":8967,"outputTokens":1,"latencyMs":914.6808339999989},{"questionId":"q40","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":11530,"outputTokens":3,"latencyMs":900.5022079999326},{"questionId":"q40","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":7304,"outputTokens":3,"latencyMs":683.2722500000382},{"questionId":"q40","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":7622,"outputTokens":3,"latencyMs":5871.395708999946},{"questionId":"q40","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":12949,"outputTokens":3,"latencyMs":939.6127499999711},{"questionId":"q40","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"891.82","actual":"891.82","isCorrect":true,"inputTokens":8967,"outputTokens":3,"latencyMs":632.5313339999411},{"questionId":"q41","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":11530,"outputTokens":1,"latencyMs":766.8200000000652},{"questionId":"q41","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":7304,"outputTokens":1,"latencyMs":699.7452079999493},{"questionId":"q41","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":7622,"outputTokens":1,"latencyMs":584.8662079999922},{"questionId":"q41","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":12949,"outputTokens":1,"latencyMs":927.346416000044},{"questionId":"q41","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"pending","actual":"pending","isCorrect":true,"inputTokens":8967,"outputTokens":1,"latencyMs":707.2890840000473},{"questionId":"q42","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":11530,"outputTokens":3,"latencyMs":649.6757920000236},{"questionId":"q42","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":7304,"outputTokens":3,"latencyMs":702.0949169999221},{"questionId":"q42","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":7622,"outputTokens":3,"latencyMs":1425.4147919999668},{"questionId":"q42","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":12949,"outputTokens":3,"latencyMs":962.8165000000736},{"questionId":"q42","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"257.3","actual":"257.3","isCorrect":true,"inputTokens":8967,"outputTokens":3,"latencyMs":663.4795000000158},{"questionId":"q43","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":11530,"outputTokens":2,"latencyMs":1409.5359999999637},{"questionId":"q43","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":7304,"outputTokens":2,"latencyMs":571.7008749999804},{"questionId":"q43","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":7622,"outputTokens":2,"latencyMs":654.3067499999888},{"questionId":"q43","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":12949,"outputTokens":2,"latencyMs":744.0284999998985},{"questionId":"q43","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8967,"outputTokens":2,"latencyMs":798.8013339999598},{"questionId":"q44","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":11531,"outputTokens":5,"latencyMs":826.5850839999039},{"questionId":"q44","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":7305,"outputTokens":5,"latencyMs":768.5535420000087},{"questionId":"q44","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":7623,"outputTokens":5,"latencyMs":752.9152499999618},{"questionId":"q44","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":12950,"outputTokens":5,"latencyMs":788.1977920000209},{"questionId":"q44","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"Dr. Lafayette Schumm","actual":"Dr. Lafayette Schumm","isCorrect":true,"inputTokens":8968,"outputTokens":5,"latencyMs":909.2735419999808},{"questionId":"q45","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":11531,"outputTokens":5,"latencyMs":768.4937919999938},{"questionId":"q45","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":7305,"outputTokens":5,"latencyMs":853.6360420000274},{"questionId":"q45","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":7623,"outputTokens":5,"latencyMs":803.939916000003},{"questionId":"q45","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":12950,"outputTokens":5,"latencyMs":990.8110419999575},{"questionId":"q45","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"nicholas38@hotmail.com","actual":"nicholas38@hotmail.com","isCorrect":true,"inputTokens":8968,"outputTokens":5,"latencyMs":1062.565166000044},{"questionId":"q46","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":11531,"outputTokens":6,"latencyMs":692.8294590000296},{"questionId":"q46","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":7305,"outputTokens":6,"latencyMs":628.9641660000198},{"questionId":"q46","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":7623,"outputTokens":6,"latencyMs":645.8005000000121},{"questionId":"q46","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":12950,"outputTokens":6,"latencyMs":778.7395409999881},{"questionId":"q46","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2026-02-25","actual":"2026-02-25","isCorrect":true,"inputTokens":8968,"outputTokens":6,"latencyMs":709.9560000000056},{"questionId":"q47","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":11530,"outputTokens":1,"latencyMs":740.091791999992},{"questionId":"q47","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":7304,"outputTokens":1,"latencyMs":513.7422500000102},{"questionId":"q47","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":7622,"outputTokens":1,"latencyMs":670.5234169999603},{"questionId":"q47","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":12949,"outputTokens":1,"latencyMs":884.692541000084},{"questionId":"q47","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":8967,"outputTokens":1,"latencyMs":868.8687499999069},{"questionId":"q48","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":11531,"outputTokens":5,"latencyMs":837.4969999999739},{"questionId":"q48","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":7305,"outputTokens":5,"latencyMs":578.1517920000479},{"questionId":"q48","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":7623,"outputTokens":5,"latencyMs":758.3587909999769},{"questionId":"q48","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":12950,"outputTokens":5,"latencyMs":718.5274590000045},{"questionId":"q48","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"Ms. Bertha Hagenes","actual":"Ms. Bertha Hagenes","isCorrect":true,"inputTokens":8968,"outputTokens":5,"latencyMs":538.1767499999842},{"questionId":"q49","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":11531,"outputTokens":8,"latencyMs":858.6002909999806},{"questionId":"q49","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":7305,"outputTokens":8,"latencyMs":907.2394170000916},{"questionId":"q49","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":7623,"outputTokens":8,"latencyMs":778.3918749999721},{"questionId":"q49","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":12950,"outputTokens":8,"latencyMs":985.8520830000052},{"questionId":"q49","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"tomas_jacobi57@hotmail.com","actual":"tomas_jacobi57@hotmail.com","isCorrect":true,"inputTokens":8968,"outputTokens":8,"latencyMs":777.3625829999801},{"questionId":"q50","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":11531,"outputTokens":6,"latencyMs":966.6647499999963},{"questionId":"q50","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":7305,"outputTokens":6,"latencyMs":788.547542000073},{"questionId":"q50","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":7623,"outputTokens":6,"latencyMs":758.7405839998974},{"questionId":"q50","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":12950,"outputTokens":6,"latencyMs":809.8147920001065},{"questionId":"q50","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2025-12-04","actual":"2025-12-04","isCorrect":true,"inputTokens":8968,"outputTokens":6,"latencyMs":1031.0702499999898},{"questionId":"q51","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"8","isCorrect":false,"inputTokens":11530,"outputTokens":1,"latencyMs":948.9400000000605},{"questionId":"q51","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"8","isCorrect":false,"inputTokens":7304,"outputTokens":1,"latencyMs":535.2138330000453},{"questionId":"q51","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":7622,"outputTokens":1,"latencyMs":714.1589580000145},{"questionId":"q51","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"8","isCorrect":false,"inputTokens":12949,"outputTokens":1,"latencyMs":783.6580829999875},{"questionId":"q51","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"8","isCorrect":false,"inputTokens":8967,"outputTokens":1,"latencyMs":661.3643750000047},{"questionId":"q52","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":11527,"outputTokens":1,"latencyMs":643.7397079999791},{"questionId":"q52","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":7301,"outputTokens":1,"latencyMs":536.3641669999342},{"questionId":"q52","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":7619,"outputTokens":1,"latencyMs":535.9290829999372},{"questionId":"q52","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":12946,"outputTokens":1,"latencyMs":826.2162920000264},{"questionId":"q52","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":8964,"outputTokens":1,"latencyMs":496.73766600003},{"questionId":"q53","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":11527,"outputTokens":1,"latencyMs":462.14420800004154},{"questionId":"q53","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":7301,"outputTokens":1,"latencyMs":439.53295799996704},{"questionId":"q53","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":7619,"outputTokens":1,"latencyMs":1286.7901250000577},{"questionId":"q53","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":12946,"outputTokens":1,"latencyMs":1014.7207919999491},{"questionId":"q53","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":8964,"outputTokens":1,"latencyMs":1001.4921660000691},{"questionId":"q54","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":11528,"outputTokens":1,"latencyMs":773.265625},{"questionId":"q54","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":7302,"outputTokens":1,"latencyMs":789.5051669999957},{"questionId":"q54","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":7620,"outputTokens":1,"latencyMs":880.1744160000235},{"questionId":"q54","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":12947,"outputTokens":1,"latencyMs":962.6629160000011},{"questionId":"q54","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":8965,"outputTokens":1,"latencyMs":559.9401249999646},{"questionId":"q55","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"38069.93","actual":"352938.35","isCorrect":false,"inputTokens":11528,"outputTokens":4,"latencyMs":813.3962500001071},{"questionId":"q55","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"38069.93","actual":"35235.79","isCorrect":false,"inputTokens":7302,"outputTokens":4,"latencyMs":6438.277499999967},{"questionId":"q55","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"38069.93","actual":"35239.79","isCorrect":false,"inputTokens":7620,"outputTokens":4,"latencyMs":792.4362909999909},{"questionId":"q55","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"38069.93","actual":"43341.79","isCorrect":false,"inputTokens":12947,"outputTokens":4,"latencyMs":1113.342209000024},{"questionId":"q55","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"38069.93","actual":"35208.89","isCorrect":false,"inputTokens":8965,"outputTokens":4,"latencyMs":660.7110419999808},{"questionId":"q56","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"761.40","actual":"806.07","isCorrect":false,"inputTokens":11526,"outputTokens":3,"latencyMs":711.9117499999702},{"questionId":"q56","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"761.40","actual":"806.35","isCorrect":false,"inputTokens":7300,"outputTokens":3,"latencyMs":729.7875840000343},{"questionId":"q56","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"761.40","actual":"554.39","isCorrect":false,"inputTokens":7618,"outputTokens":3,"latencyMs":682.1083750000689},{"questionId":"q56","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"761.40","actual":"806.35","isCorrect":false,"inputTokens":12945,"outputTokens":3,"latencyMs":876.8197500000242},{"questionId":"q56","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"761.40","actual":"806.35","isCorrect":false,"inputTokens":8963,"outputTokens":3,"latencyMs":590.2461250000633},{"questionId":"q57","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":11527,"outputTokens":1,"latencyMs":761.8431669999845},{"questionId":"q57","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":7301,"outputTokens":1,"latencyMs":894.544041999965},{"questionId":"q57","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":7619,"outputTokens":1,"latencyMs":717.7675840000156},{"questionId":"q57","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":12946,"outputTokens":1,"latencyMs":893.6164590000408},{"questionId":"q57","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":8964,"outputTokens":1,"latencyMs":623.1933339999523},{"questionId":"q58","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":11526,"outputTokens":4,"latencyMs":616.3692909999518},{"questionId":"q58","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":7300,"outputTokens":4,"latencyMs":592.0623749999795},{"questionId":"q58","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":7618,"outputTokens":4,"latencyMs":571.844458000036},{"questionId":"q58","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":12945,"outputTokens":4,"latencyMs":825.7324580000713},{"questionId":"q58","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2282.44","actual":"2282.44","isCorrect":true,"inputTokens":8963,"outputTokens":4,"latencyMs":4773.520040999982},{"questionId":"q59","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"46","actual":"38","isCorrect":false,"inputTokens":11530,"outputTokens":1,"latencyMs":875.5253329999978},{"questionId":"q59","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"46","actual":"35","isCorrect":false,"inputTokens":7304,"outputTokens":1,"latencyMs":880.0571250000503},{"questionId":"q59","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"46","actual":"42","isCorrect":false,"inputTokens":7622,"outputTokens":1,"latencyMs":969.6056660000468},{"questionId":"q59","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"46","actual":"38","isCorrect":false,"inputTokens":12949,"outputTokens":1,"latencyMs":897.704458000022},{"questionId":"q59","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"46","actual":"35","isCorrect":false,"inputTokens":8967,"outputTokens":1,"latencyMs":848.4107910000021},{"questionId":"q60","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"38","actual":"32","isCorrect":false,"inputTokens":11530,"outputTokens":1,"latencyMs":787.8418749999255},{"questionId":"q60","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"38","actual":"27","isCorrect":false,"inputTokens":7304,"outputTokens":1,"latencyMs":674.10699999996},{"questionId":"q60","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"38","actual":"32","isCorrect":false,"inputTokens":7622,"outputTokens":1,"latencyMs":740.3179999999702},{"questionId":"q60","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"38","actual":"31","isCorrect":false,"inputTokens":12949,"outputTokens":1,"latencyMs":943.3058749999618},{"questionId":"q60","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"38","actual":"29","isCorrect":false,"inputTokens":8967,"outputTokens":1,"latencyMs":746.451834000065},{"questionId":"q61","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"29","actual":"25","isCorrect":false,"inputTokens":11530,"outputTokens":1,"latencyMs":862.3196669999743},{"questionId":"q61","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"29","actual":"23","isCorrect":false,"inputTokens":7304,"outputTokens":1,"latencyMs":493.14495799993165},{"questionId":"q61","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"29","actual":"27","isCorrect":false,"inputTokens":7622,"outputTokens":1,"latencyMs":635.9011669999454},{"questionId":"q61","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"29","actual":"25","isCorrect":false,"inputTokens":12949,"outputTokens":1,"latencyMs":760.0674590000417},{"questionId":"q61","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"29","actual":"26","isCorrect":false,"inputTokens":8967,"outputTokens":1,"latencyMs":691.1831660000607},{"questionId":"q62","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"7","isCorrect":false,"inputTokens":11534,"outputTokens":1,"latencyMs":895.8139170000795},{"questionId":"q62","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":7308,"outputTokens":1,"latencyMs":470.424500000081},{"questionId":"q62","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"9","isCorrect":false,"inputTokens":7626,"outputTokens":1,"latencyMs":516.0680840000277},{"questionId":"q62","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"12","isCorrect":false,"inputTokens":12953,"outputTokens":1,"latencyMs":809.124499999918},{"questionId":"q62","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"10","actual":"10","isCorrect":true,"inputTokens":8971,"outputTokens":1,"latencyMs":890.047499999986},{"questionId":"q63","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"13","isCorrect":false,"inputTokens":11534,"outputTokens":1,"latencyMs":951.530999999959},{"questionId":"q63","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"17","isCorrect":false,"inputTokens":7308,"outputTokens":1,"latencyMs":913.9237920000451},{"questionId":"q63","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"17","isCorrect":false,"inputTokens":7626,"outputTokens":1,"latencyMs":634.6960420000833},{"questionId":"q63","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"12","isCorrect":false,"inputTokens":12953,"outputTokens":1,"latencyMs":948.3523329999298},{"questionId":"q63","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"18","isCorrect":false,"inputTokens":8971,"outputTokens":1,"latencyMs":811.1162079999922},{"questionId":"q64","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"7","isCorrect":false,"inputTokens":11535,"outputTokens":1,"latencyMs":912.2439580000937},{"questionId":"q64","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"7","isCorrect":false,"inputTokens":7309,"outputTokens":1,"latencyMs":718.7288330000592},{"questionId":"q64","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"7","isCorrect":false,"inputTokens":7627,"outputTokens":1,"latencyMs":883.9092080000555},{"questionId":"q64","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"5","isCorrect":false,"inputTokens":12954,"outputTokens":1,"latencyMs":661.7925409999443},{"questionId":"q64","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"5","isCorrect":false,"inputTokens":8972,"outputTokens":1,"latencyMs":604.7804580000229},{"questionId":"q65","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"7","isCorrect":false,"inputTokens":11535,"outputTokens":1,"latencyMs":737.0242090000538},{"questionId":"q65","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"7","isCorrect":false,"inputTokens":7309,"outputTokens":1,"latencyMs":782.2560000000522},{"questionId":"q65","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"7","isCorrect":false,"inputTokens":7627,"outputTokens":1,"latencyMs":749.6715839999961},{"questionId":"q65","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"7","isCorrect":false,"inputTokens":12954,"outputTokens":1,"latencyMs":769.360250000027},{"questionId":"q65","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"7","isCorrect":false,"inputTokens":8972,"outputTokens":1,"latencyMs":835.999291999964},{"questionId":"q66","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"2","isCorrect":false,"inputTokens":11534,"outputTokens":1,"latencyMs":682.1851670000469},{"questionId":"q66","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"2","isCorrect":false,"inputTokens":7308,"outputTokens":1,"latencyMs":568.4164159999928},{"questionId":"q66","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"2","isCorrect":false,"inputTokens":7626,"outputTokens":1,"latencyMs":4085.6644169999054},{"questionId":"q66","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"5","isCorrect":false,"inputTokens":12953,"outputTokens":1,"latencyMs":526.9112080000341},{"questionId":"q66","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"0","isCorrect":false,"inputTokens":8971,"outputTokens":1,"latencyMs":803.7134579999838},{"questionId":"q67","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"12","isCorrect":false,"inputTokens":11534,"outputTokens":1,"latencyMs":481.30329099996015},{"questionId":"q67","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"12","isCorrect":false,"inputTokens":7308,"outputTokens":1,"latencyMs":883.0185419999762},{"questionId":"q67","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"12","isCorrect":false,"inputTokens":7626,"outputTokens":1,"latencyMs":555.7566250000382},{"questionId":"q67","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"12","isCorrect":false,"inputTokens":12953,"outputTokens":1,"latencyMs":832.8929159999825},{"questionId":"q67","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"11","isCorrect":false,"inputTokens":8971,"outputTokens":1,"latencyMs":691.280792000005},{"questionId":"q68","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"3","isCorrect":false,"inputTokens":11535,"outputTokens":1,"latencyMs":876.8689170000143},{"questionId":"q68","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"2","isCorrect":false,"inputTokens":7309,"outputTokens":1,"latencyMs":528.7574579999782},{"questionId":"q68","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":7627,"outputTokens":1,"latencyMs":535.7825419999426},{"questionId":"q68","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"8","isCorrect":false,"inputTokens":12954,"outputTokens":1,"latencyMs":980.4331659999443},{"questionId":"q68","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"2","isCorrect":false,"inputTokens":8972,"outputTokens":1,"latencyMs":620.7542089999188},{"questionId":"q69","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"18","isCorrect":false,"inputTokens":11536,"outputTokens":1,"latencyMs":753.1936249999562},{"questionId":"q69","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"12","isCorrect":false,"inputTokens":7310,"outputTokens":1,"latencyMs":873.3733329999959},{"questionId":"q69","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"25","isCorrect":false,"inputTokens":7628,"outputTokens":1,"latencyMs":700.7058749999851},{"questionId":"q69","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"25","isCorrect":false,"inputTokens":12955,"outputTokens":1,"latencyMs":931.9119159999536},{"questionId":"q69","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"12","isCorrect":false,"inputTokens":8973,"outputTokens":1,"latencyMs":842.4380420000525},{"questionId":"q70","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"22","actual":"12","isCorrect":false,"inputTokens":11536,"outputTokens":1,"latencyMs":867.3247500000289},{"questionId":"q70","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"22","actual":"12","isCorrect":false,"inputTokens":7310,"outputTokens":1,"latencyMs":684.500500000082},{"questionId":"q70","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"22","actual":"20","isCorrect":false,"inputTokens":7628,"outputTokens":1,"latencyMs":680.5775829999475},{"questionId":"q70","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"22","actual":"20","isCorrect":false,"inputTokens":12955,"outputTokens":1,"latencyMs":816.7335409999359},{"questionId":"q70","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"22","actual":"12","isCorrect":false,"inputTokens":8973,"outputTokens":1,"latencyMs":864.5872499999823},{"questionId":"q71","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":3877,"outputTokens":2,"latencyMs":667.1784170001047},{"questionId":"q71","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":2548,"outputTokens":2,"latencyMs":636.6927500000456},{"questionId":"q71","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":1807,"outputTokens":2,"latencyMs":946.3649999999907},{"questionId":"q71","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":1666,"outputTokens":2,"latencyMs":627.0107919999864},{"questionId":"q71","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":4523,"outputTokens":2,"latencyMs":595.8528330000117},{"questionId":"q71","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"6944","actual":"6944","isCorrect":true,"inputTokens":3207,"outputTokens":2,"latencyMs":721.3300420000451},{"questionId":"q72","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":3877,"outputTokens":4,"latencyMs":583.9176250000019},{"questionId":"q72","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":2548,"outputTokens":4,"latencyMs":757.8279590000166},{"questionId":"q72","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":1807,"outputTokens":4,"latencyMs":643.0962499999441},{"questionId":"q72","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":1666,"outputTokens":4,"latencyMs":536.1682499999879},{"questionId":"q72","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":4523,"outputTokens":4,"latencyMs":581.5594580000034},{"questionId":"q72","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1865.57","actual":"1865.57","isCorrect":true,"inputTokens":3207,"outputTokens":4,"latencyMs":477.8069579999428},{"questionId":"q73","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":3878,"outputTokens":3,"latencyMs":469.72750000003725},{"questionId":"q73","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":2549,"outputTokens":3,"latencyMs":392.8477919999277},{"questionId":"q73","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":1808,"outputTokens":3,"latencyMs":504.17579100001603},{"questionId":"q73","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":1667,"outputTokens":3,"latencyMs":646.0211249999702},{"questionId":"q73","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":4524,"outputTokens":3,"latencyMs":598.495833999943},{"questionId":"q73","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"0.36","actual":"0.36","isCorrect":true,"inputTokens":3208,"outputTokens":3,"latencyMs":626.5938340000575},{"questionId":"q74","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"37","isCorrect":true,"inputTokens":3878,"outputTokens":1,"latencyMs":866.4132919999538},{"questionId":"q74","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"37","isCorrect":true,"inputTokens":2549,"outputTokens":1,"latencyMs":596.7857079999521},{"questionId":"q74","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"37","isCorrect":true,"inputTokens":1808,"outputTokens":1,"latencyMs":639.6120830000145},{"questionId":"q74","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"37","isCorrect":true,"inputTokens":1667,"outputTokens":1,"latencyMs":478.824583999929},{"questionId":"q74","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"37","isCorrect":true,"inputTokens":4524,"outputTokens":1,"latencyMs":513.1029999998864},{"questionId":"q74","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"37","isCorrect":true,"inputTokens":3208,"outputTokens":1,"latencyMs":624.2674169999082},{"questionId":"q75","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":3877,"outputTokens":2,"latencyMs":561.5092919999734},{"questionId":"q75","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":2548,"outputTokens":2,"latencyMs":553.9505000000354},{"questionId":"q75","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":1807,"outputTokens":2,"latencyMs":480.482416999992},{"questionId":"q75","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":1666,"outputTokens":2,"latencyMs":445.3764170000795},{"questionId":"q75","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":4523,"outputTokens":2,"latencyMs":516.5285830000648},{"questionId":"q75","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"5532","actual":"5532","isCorrect":true,"inputTokens":3207,"outputTokens":2,"latencyMs":520.0939159999834},{"questionId":"q76","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":3877,"outputTokens":4,"latencyMs":438.40795799996704},{"questionId":"q76","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":2548,"outputTokens":4,"latencyMs":549.6162079999922},{"questionId":"q76","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":1807,"outputTokens":4,"latencyMs":475.16245800000615},{"questionId":"q76","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":1666,"outputTokens":4,"latencyMs":523.6847919999855},{"questionId":"q76","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":4523,"outputTokens":4,"latencyMs":529.6843329999829},{"questionId":"q76","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"4760.27","actual":"4760.27","isCorrect":true,"inputTokens":3207,"outputTokens":4,"latencyMs":586.4784579999978},{"questionId":"q77","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":3878,"outputTokens":3,"latencyMs":595.1180000000168},{"questionId":"q77","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":2549,"outputTokens":3,"latencyMs":474.8182079999242},{"questionId":"q77","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":1808,"outputTokens":3,"latencyMs":475.0827079999726},{"questionId":"q77","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":1667,"outputTokens":3,"latencyMs":380.63508300005924},{"questionId":"q77","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":4524,"outputTokens":3,"latencyMs":492.3605830000015},{"questionId":"q77","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"0.35","actual":"0.35","isCorrect":true,"inputTokens":3208,"outputTokens":3,"latencyMs":474.56541699997615},{"questionId":"q78","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"43","isCorrect":true,"inputTokens":3878,"outputTokens":1,"latencyMs":637.0482920000795},{"questionId":"q78","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"43","isCorrect":true,"inputTokens":2549,"outputTokens":1,"latencyMs":623.8157079999801},{"questionId":"q78","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"43","isCorrect":true,"inputTokens":1808,"outputTokens":1,"latencyMs":485.66600000008475},{"questionId":"q78","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"43","isCorrect":true,"inputTokens":1667,"outputTokens":1,"latencyMs":493.966083999956},{"questionId":"q78","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"43","isCorrect":true,"inputTokens":4524,"outputTokens":1,"latencyMs":494.7225000000326},{"questionId":"q78","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"43","isCorrect":true,"inputTokens":3208,"outputTokens":1,"latencyMs":447.7400830000406},{"questionId":"q79","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":3877,"outputTokens":2,"latencyMs":555.2192090000026},{"questionId":"q79","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":2548,"outputTokens":2,"latencyMs":498.54058400006033},{"questionId":"q79","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":1807,"outputTokens":2,"latencyMs":4969.214000000036},{"questionId":"q79","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":1666,"outputTokens":2,"latencyMs":442.37591599998996},{"questionId":"q79","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":4523,"outputTokens":2,"latencyMs":481.6617499999702},{"questionId":"q79","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"3827","actual":"3827","isCorrect":true,"inputTokens":3207,"outputTokens":2,"latencyMs":4933.525125000044},{"questionId":"q80","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"60","isCorrect":true,"inputTokens":3874,"outputTokens":1,"latencyMs":367.5052920000162},{"questionId":"q80","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"60","isCorrect":true,"inputTokens":2545,"outputTokens":1,"latencyMs":476.88395799999125},{"questionId":"q80","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"60","isCorrect":true,"inputTokens":1804,"outputTokens":1,"latencyMs":453.7974170000525},{"questionId":"q80","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"60","isCorrect":true,"inputTokens":1663,"outputTokens":1,"latencyMs":394.2155420000199},{"questionId":"q80","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"68","isCorrect":false,"inputTokens":4520,"outputTokens":1,"latencyMs":411.7383329999866},{"questionId":"q80","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"60","isCorrect":true,"inputTokens":3204,"outputTokens":1,"latencyMs":720.8658329999307},{"questionId":"q81","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"338580","actual":"436942","isCorrect":false,"inputTokens":3875,"outputTokens":2,"latencyMs":418.73649999999907},{"questionId":"q81","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"338580","actual":"{\"totalViews\":431790}","isCorrect":false,"inputTokens":2546,"outputTokens":7,"latencyMs":621.2502499999246},{"questionId":"q81","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"338580","actual":"433895","isCorrect":false,"inputTokens":1805,"outputTokens":2,"latencyMs":376.69983399997},{"questionId":"q81","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"338580","actual":"848687","isCorrect":false,"inputTokens":1664,"outputTokens":2,"latencyMs":526.1514169999864},{"questionId":"q81","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"338580","actual":"417476","isCorrect":false,"inputTokens":4521,"outputTokens":2,"latencyMs":715.0293339999625},{"questionId":"q81","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"338580","actual":"352879","isCorrect":false,"inputTokens":3205,"outputTokens":2,"latencyMs":696.9394590000156},{"questionId":"q82","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1666","actual":"1321","isCorrect":false,"inputTokens":3875,"outputTokens":2,"latencyMs":506.02349999989383},{"questionId":"q82","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1666","actual":"1672","isCorrect":false,"inputTokens":2546,"outputTokens":2,"latencyMs":529.5858749998733},{"questionId":"q82","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1666","actual":"1582","isCorrect":false,"inputTokens":1805,"outputTokens":2,"latencyMs":542.3188340000343},{"questionId":"q82","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"1666","actual":"1762","isCorrect":false,"inputTokens":1664,"outputTokens":2,"latencyMs":473.39445900009014},{"questionId":"q82","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1666","actual":"1621","isCorrect":false,"inputTokens":4521,"outputTokens":2,"latencyMs":514.995541999815},{"questionId":"q82","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1666","actual":"1541","isCorrect":false,"inputTokens":3205,"outputTokens":2,"latencyMs":637.6235839999281},{"questionId":"q83","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"278050.98","actual":"297171.51","isCorrect":false,"inputTokens":3873,"outputTokens":4,"latencyMs":601.5097920000553},{"questionId":"q83","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"278050.98","actual":"```json\n286329.96\n```","isCorrect":false,"inputTokens":2544,"outputTokens":9,"latencyMs":596.0253330001142},{"questionId":"q83","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"278050.98","actual":"304457.35","isCorrect":false,"inputTokens":1803,"outputTokens":4,"latencyMs":588.1962499998044},{"questionId":"q83","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"278050.98","actual":"319248.47","isCorrect":false,"inputTokens":1662,"outputTokens":4,"latencyMs":723.7299580001272},{"questionId":"q83","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"278050.98","actual":"336090.49","isCorrect":false,"inputTokens":4519,"outputTokens":4,"latencyMs":571.2627920000814},{"questionId":"q83","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"278050.98","actual":"291290.95","isCorrect":false,"inputTokens":3203,"outputTokens":4,"latencyMs":400.36724999989383},{"questionId":"q84","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"0.49","actual":"0.49","isCorrect":true,"inputTokens":3871,"outputTokens":3,"latencyMs":412.0887499998789},{"questionId":"q84","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"0.49","actual":"0.49","isCorrect":true,"inputTokens":2542,"outputTokens":3,"latencyMs":481.8872079998255},{"questionId":"q84","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"0.49","actual":"0.49","isCorrect":true,"inputTokens":1801,"outputTokens":3,"latencyMs":509.14441700000316},{"questionId":"q84","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"0.49","actual":"0.49","isCorrect":true,"inputTokens":1660,"outputTokens":3,"latencyMs":383.484625000041},{"questionId":"q84","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"0.49","actual":"0.49","isCorrect":true,"inputTokens":4517,"outputTokens":3,"latencyMs":943.1645829998888},{"questionId":"q84","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"0.49","actual":"0.49","isCorrect":true,"inputTokens":3201,"outputTokens":3,"latencyMs":567.7023330000229},{"questionId":"q85","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":3875,"outputTokens":1,"latencyMs":570.1790839999449},{"questionId":"q85","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"28","isCorrect":false,"inputTokens":2546,"outputTokens":1,"latencyMs":611.8090830000583},{"questionId":"q85","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":1805,"outputTokens":1,"latencyMs":329.9869160000235},{"questionId":"q85","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"28","isCorrect":false,"inputTokens":1664,"outputTokens":1,"latencyMs":803.358959000092},{"questionId":"q85","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"28","isCorrect":false,"inputTokens":4521,"outputTokens":1,"latencyMs":480.0933330000844},{"questionId":"q85","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"25","isCorrect":true,"inputTokens":3205,"outputTokens":1,"latencyMs":513.3110409998335},{"questionId":"q86","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"42","isCorrect":false,"inputTokens":3874,"outputTokens":1,"latencyMs":483.51479099993594},{"questionId":"q86","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"42","isCorrect":false,"inputTokens":2545,"outputTokens":1,"latencyMs":791.2523749999236},{"questionId":"q86","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"48","isCorrect":false,"inputTokens":1804,"outputTokens":1,"latencyMs":543.8400000000838},{"questionId":"q86","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"51","isCorrect":false,"inputTokens":1663,"outputTokens":1,"latencyMs":645.0875000001397},{"questionId":"q86","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"62","isCorrect":false,"inputTokens":4520,"outputTokens":1,"latencyMs":488.80837500002235},{"questionId":"q86","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"25","isCorrect":false,"inputTokens":3204,"outputTokens":1,"latencyMs":506.6174999999348},{"questionId":"q87","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"12","isCorrect":false,"inputTokens":3881,"outputTokens":1,"latencyMs":512.5699579999782},{"questionId":"q87","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"22","isCorrect":false,"inputTokens":2552,"outputTokens":1,"latencyMs":771.8474590000696},{"questionId":"q87","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"18","isCorrect":false,"inputTokens":1811,"outputTokens":1,"latencyMs":571.1704170000739},{"questionId":"q87","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"20","isCorrect":false,"inputTokens":3500,"outputTokens":1,"latencyMs":779.3597500000615},{"questionId":"q87","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"25","isCorrect":false,"inputTokens":4527,"outputTokens":1,"latencyMs":498.4696670002304},{"questionId":"q87","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"22","isCorrect":false,"inputTokens":3211,"outputTokens":1,"latencyMs":466.0879590001423},{"questionId":"q88","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"7","isCorrect":false,"inputTokens":3881,"outputTokens":1,"latencyMs":440.93083299999125},{"questionId":"q88","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"12","isCorrect":false,"inputTokens":2552,"outputTokens":1,"latencyMs":419.83762499992736},{"questionId":"q88","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"7","isCorrect":false,"inputTokens":1811,"outputTokens":1,"latencyMs":374.7700419998728},{"questionId":"q88","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"8","isCorrect":false,"inputTokens":1670,"outputTokens":1,"latencyMs":523.8123749999795},{"questionId":"q88","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"12","isCorrect":false,"inputTokens":4527,"outputTokens":1,"latencyMs":384.8144170001615},{"questionId":"q88","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"8","isCorrect":false,"inputTokens":3211,"outputTokens":1,"latencyMs":449.75270899990574},{"questionId":"q89","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"12","isCorrect":false,"inputTokens":3881,"outputTokens":1,"latencyMs":474.3642500001006},{"questionId":"q89","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"12","isCorrect":false,"inputTokens":2552,"outputTokens":1,"latencyMs":438.2861250001006},{"questionId":"q89","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"18","isCorrect":false,"inputTokens":1811,"outputTokens":1,"latencyMs":461.3356250000652},{"questionId":"q89","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"11","isCorrect":false,"inputTokens":1670,"outputTokens":1,"latencyMs":418.0638330001384},{"questionId":"q89","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"12","isCorrect":false,"inputTokens":4527,"outputTokens":1,"latencyMs":355.1533749999944},{"questionId":"q89","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"11","isCorrect":false,"inputTokens":3211,"outputTokens":1,"latencyMs":367.96429100004025},{"questionId":"q90","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"12","isCorrect":false,"inputTokens":3881,"outputTokens":1,"latencyMs":582.3625829999801},{"questionId":"q90","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"12","isCorrect":false,"inputTokens":2552,"outputTokens":1,"latencyMs":674.5549170000013},{"questionId":"q90","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"22","isCorrect":false,"inputTokens":1811,"outputTokens":1,"latencyMs":474.828582999995},{"questionId":"q90","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"15","isCorrect":false,"inputTokens":1670,"outputTokens":1,"latencyMs":526.7267080000602},{"questionId":"q90","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"12","isCorrect":false,"inputTokens":4527,"outputTokens":1,"latencyMs":578.2530419998802},{"questionId":"q90","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"12","isCorrect":false,"inputTokens":3211,"outputTokens":1,"latencyMs":523.2485409998335},{"questionId":"q91","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"11","isCorrect":false,"inputTokens":3881,"outputTokens":1,"latencyMs":490.45974999992177},{"questionId":"q91","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"12","isCorrect":false,"inputTokens":2552,"outputTokens":1,"latencyMs":503.06287500006147},{"questionId":"q91","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"12","isCorrect":false,"inputTokens":1811,"outputTokens":1,"latencyMs":513.8332919999957},{"questionId":"q91","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"11","isCorrect":false,"inputTokens":1670,"outputTokens":1,"latencyMs":505.761208999902},{"questionId":"q91","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"12","isCorrect":false,"inputTokens":4527,"outputTokens":1,"latencyMs":470.404499999946},{"questionId":"q91","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"11","isCorrect":false,"inputTokens":3211,"outputTokens":1,"latencyMs":419.1475840001367},{"questionId":"q92","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"23","isCorrect":false,"inputTokens":3880,"outputTokens":1,"latencyMs":537.3507499999832},{"questionId":"q92","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"23","isCorrect":false,"inputTokens":2551,"outputTokens":1,"latencyMs":419.5671249998268},{"questionId":"q92","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"16","isCorrect":false,"inputTokens":1810,"outputTokens":1,"latencyMs":530.794624999864},{"questionId":"q92","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"23","isCorrect":false,"inputTokens":1669,"outputTokens":1,"latencyMs":650.0870000000577},{"questionId":"q92","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"25","isCorrect":false,"inputTokens":4526,"outputTokens":1,"latencyMs":588.5105830000248},{"questionId":"q92","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"19","isCorrect":false,"inputTokens":3210,"outputTokens":1,"latencyMs":588.5343329999596},{"questionId":"q93","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"9","actual":"9","isCorrect":true,"inputTokens":3880,"outputTokens":1,"latencyMs":448.8965419998858},{"questionId":"q93","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"9","actual":"12","isCorrect":false,"inputTokens":2551,"outputTokens":1,"latencyMs":427.7964580000844},{"questionId":"q93","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"9","actual":"12","isCorrect":false,"inputTokens":1810,"outputTokens":1,"latencyMs":449.91379200015217},{"questionId":"q93","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"9","actual":"22","isCorrect":false,"inputTokens":1669,"outputTokens":1,"latencyMs":486.40762499999255},{"questionId":"q93","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"9","actual":"12","isCorrect":false,"inputTokens":4526,"outputTokens":1,"latencyMs":385.6574169998057},{"questionId":"q93","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"9","actual":"8","isCorrect":false,"inputTokens":3210,"outputTokens":1,"latencyMs":446.4321250000503},{"questionId":"q94","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"31","actual":"25","isCorrect":false,"inputTokens":3883,"outputTokens":1,"latencyMs":601.0140420000535},{"questionId":"q94","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"31","actual":"28","isCorrect":false,"inputTokens":2554,"outputTokens":1,"latencyMs":626.378292000154},{"questionId":"q94","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"31","actual":"28","isCorrect":false,"inputTokens":1813,"outputTokens":1,"latencyMs":593.7920419999864},{"questionId":"q94","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"31","actual":"35","isCorrect":false,"inputTokens":1672,"outputTokens":1,"latencyMs":604.6700839998666},{"questionId":"q94","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"31","actual":"31","isCorrect":true,"inputTokens":4529,"outputTokens":1,"latencyMs":648.5342079999391},{"questionId":"q94","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"31","actual":"26","isCorrect":false,"inputTokens":3213,"outputTokens":1,"latencyMs":462.07916700001806},{"questionId":"q95","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"23","isCorrect":false,"inputTokens":3883,"outputTokens":1,"latencyMs":586.6430000001565},{"questionId":"q95","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"26","isCorrect":false,"inputTokens":2554,"outputTokens":1,"latencyMs":779.3139580001589},{"questionId":"q95","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"26","isCorrect":false,"inputTokens":1813,"outputTokens":1,"latencyMs":712.7615409998689},{"questionId":"q95","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"25","isCorrect":false,"inputTokens":1672,"outputTokens":1,"latencyMs":597.8637919998728},{"questionId":"q95","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"35","isCorrect":false,"inputTokens":4529,"outputTokens":1,"latencyMs":856.8264589998871},{"questionId":"q95","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"28","actual":"25","isCorrect":false,"inputTokens":3213,"outputTokens":1,"latencyMs":507.8472919999622},{"questionId":"q96","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":15289,"outputTokens":2,"latencyMs":1028.6837090000045},{"questionId":"q96","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":11593,"outputTokens":2,"latencyMs":839.0477919999976},{"questionId":"q96","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":8916,"outputTokens":2,"latencyMs":1018.7267920000013},{"questionId":"q96","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":8666,"outputTokens":2,"latencyMs":864.162416000152},{"questionId":"q96","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":17201,"outputTokens":2,"latencyMs":1127.1430420000106},{"questionId":"q96","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"430886","actual":"430886","isCorrect":true,"inputTokens":13287,"outputTokens":2,"latencyMs":875.9483329998329},{"questionId":"q97","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":15290,"outputTokens":2,"latencyMs":884.7861669999547},{"questionId":"q97","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":11594,"outputTokens":2,"latencyMs":863.4405000000261},{"questionId":"q97","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":8917,"outputTokens":2,"latencyMs":713.9379590000026},{"questionId":"q97","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":8667,"outputTokens":2,"latencyMs":954.2697080001235},{"questionId":"q97","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":17202,"outputTokens":2,"latencyMs":1526.1647910000756},{"questionId":"q97","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"52904","actual":"52904","isCorrect":true,"inputTokens":13288,"outputTokens":2,"latencyMs":651.6998330000788},{"questionId":"q98","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":15286,"outputTokens":2,"latencyMs":1002.4406659998931},{"questionId":"q98","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":11590,"outputTokens":2,"latencyMs":778.0931249998976},{"questionId":"q98","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":8913,"outputTokens":2,"latencyMs":698.0237500001676},{"questionId":"q98","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":8663,"outputTokens":2,"latencyMs":2732.6750419999007},{"questionId":"q98","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":17198,"outputTokens":2,"latencyMs":2715.6885829998646},{"questionId":"q98","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"5786","actual":"5786","isCorrect":true,"inputTokens":13284,"outputTokens":2,"latencyMs":3166.9347080001608},{"questionId":"q99","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"master","actual":"master","isCorrect":true,"inputTokens":15291,"outputTokens":1,"latencyMs":969.6473749999423},{"questionId":"q99","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"master","actual":"master","isCorrect":true,"inputTokens":11595,"outputTokens":1,"latencyMs":669.4215410000179},{"questionId":"q99","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"master","actual":"master","isCorrect":true,"inputTokens":8918,"outputTokens":1,"latencyMs":904.8111250000075},{"questionId":"q99","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"master","actual":"master","isCorrect":true,"inputTokens":8668,"outputTokens":1,"latencyMs":907.8212919998914},{"questionId":"q99","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"master","actual":"master","isCorrect":true,"inputTokens":17203,"outputTokens":1,"latencyMs":1169.048333000159},{"questionId":"q99","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"master","actual":"master","isCorrect":true,"inputTokens":13289,"outputTokens":1,"latencyMs":1029.4778330000117},{"questionId":"q100","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":15286,"outputTokens":2,"latencyMs":965.6498750001192},{"questionId":"q100","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":11590,"outputTokens":2,"latencyMs":955.9572499999776},{"questionId":"q100","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":8913,"outputTokens":2,"latencyMs":1146.311541999923},{"questionId":"q100","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":8663,"outputTokens":2,"latencyMs":718.9294590000063},{"questionId":"q100","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":17198,"outputTokens":2,"latencyMs":1058.8808750000317},{"questionId":"q100","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"170327","actual":"170327","isCorrect":true,"inputTokens":13284,"outputTokens":2,"latencyMs":883.9617089999374},{"questionId":"q101","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":15290,"outputTokens":2,"latencyMs":1124.9118330001365},{"questionId":"q101","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":11594,"outputTokens":2,"latencyMs":801.140625},{"questionId":"q101","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":8917,"outputTokens":2,"latencyMs":672.8650829999242},{"questionId":"q101","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":8667,"outputTokens":2,"latencyMs":636.2527499999851},{"questionId":"q101","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":17202,"outputTokens":2,"latencyMs":920.3125},{"questionId":"q101","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"48578","actual":"48578","isCorrect":true,"inputTokens":13288,"outputTokens":2,"latencyMs":687.6969169999938},{"questionId":"q102","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"678","actual":"678","isCorrect":true,"inputTokens":15290,"outputTokens":1,"latencyMs":644.921166999964},{"questionId":"q102","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"678","actual":"678","isCorrect":true,"inputTokens":11594,"outputTokens":1,"latencyMs":772.4852089998312},{"questionId":"q102","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"678","actual":"678","isCorrect":true,"inputTokens":8917,"outputTokens":1,"latencyMs":691.0827920001466},{"questionId":"q102","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"678","actual":"678","isCorrect":true,"inputTokens":8667,"outputTokens":1,"latencyMs":680.5707080001011},{"questionId":"q102","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"678","actual":"678","isCorrect":true,"inputTokens":17202,"outputTokens":1,"latencyMs":1144.0804580000695},{"questionId":"q102","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"678","actual":"678","isCorrect":true,"inputTokens":13288,"outputTokens":1,"latencyMs":851.5518330000341},{"questionId":"q103","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"main","actual":"main","isCorrect":true,"inputTokens":15289,"outputTokens":1,"latencyMs":905.217165999813},{"questionId":"q103","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"main","actual":"main","isCorrect":true,"inputTokens":11593,"outputTokens":1,"latencyMs":783.5753750000149},{"questionId":"q103","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"main","actual":"main","isCorrect":true,"inputTokens":8916,"outputTokens":1,"latencyMs":688.7867499999702},{"questionId":"q103","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"main","actual":"main","isCorrect":true,"inputTokens":8666,"outputTokens":1,"latencyMs":1171.5374169999268},{"questionId":"q103","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"main","actual":"main","isCorrect":true,"inputTokens":17201,"outputTokens":1,"latencyMs":1257.7813329999335},{"questionId":"q103","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"main","actual":"main","isCorrect":true,"inputTokens":13287,"outputTokens":1,"latencyMs":956.1958329998888},{"questionId":"q104","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":15292,"outputTokens":2,"latencyMs":795.1947500000242},{"questionId":"q104","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":11596,"outputTokens":2,"latencyMs":977.4269169999752},{"questionId":"q104","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":8919,"outputTokens":2,"latencyMs":630.7267080000602},{"questionId":"q104","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":8669,"outputTokens":2,"latencyMs":675.2367499999236},{"questionId":"q104","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":17204,"outputTokens":2,"latencyMs":876.6145830000751},{"questionId":"q104","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"115543","actual":"115543","isCorrect":true,"inputTokens":13290,"outputTokens":2,"latencyMs":878.96875},{"questionId":"q105","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":15289,"outputTokens":2,"latencyMs":918.0222499999218},{"questionId":"q105","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":11593,"outputTokens":2,"latencyMs":818.6200840000529},{"questionId":"q105","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":8916,"outputTokens":2,"latencyMs":999.2968339999206},{"questionId":"q105","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":8666,"outputTokens":2,"latencyMs":842.6387499999255},{"questionId":"q105","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":17201,"outputTokens":2,"latencyMs":1071.0478339998517},{"questionId":"q105","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"36054","actual":"36054","isCorrect":true,"inputTokens":13287,"outputTokens":2,"latencyMs":650.575416999869},{"questionId":"q106","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":15293,"outputTokens":2,"latencyMs":1022.3034590000752},{"questionId":"q106","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":11597,"outputTokens":2,"latencyMs":744.5276669999585},{"questionId":"q106","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":8920,"outputTokens":2,"latencyMs":893.2044999999925},{"questionId":"q106","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":8670,"outputTokens":2,"latencyMs":667.956875000149},{"questionId":"q106","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":17205,"outputTokens":2,"latencyMs":1136.977458999958},{"questionId":"q106","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2607","actual":"2607","isCorrect":true,"inputTokens":13291,"outputTokens":2,"latencyMs":635.3852920001373},{"questionId":"q107","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":15285,"outputTokens":1,"latencyMs":1026.0453329999},{"questionId":"q107","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":11589,"outputTokens":1,"latencyMs":860.7715409998782},{"questionId":"q107","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":8912,"outputTokens":1,"latencyMs":779.9598750001751},{"questionId":"q107","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":8662,"outputTokens":1,"latencyMs":936.8535829999018},{"questionId":"q107","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"65","isCorrect":false,"inputTokens":17197,"outputTokens":1,"latencyMs":728.9177079999354},{"questionId":"q107","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":13283,"outputTokens":1,"latencyMs":954.2083749999292},{"questionId":"q108","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"15413563","actual":"23964281","isCorrect":false,"inputTokens":15288,"outputTokens":3,"latencyMs":1148.6309589999728},{"questionId":"q108","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"15413563","actual":"15939335","isCorrect":false,"inputTokens":11592,"outputTokens":3,"latencyMs":661.1757080000825},{"questionId":"q108","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"15413563","actual":"11000000","isCorrect":false,"inputTokens":8915,"outputTokens":3,"latencyMs":1072.593416000018},{"questionId":"q108","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"15413563","actual":"17321675","isCorrect":false,"inputTokens":8665,"outputTokens":3,"latencyMs":811.687249999959},{"questionId":"q108","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"15413563","actual":"14782617","isCorrect":false,"inputTokens":17200,"outputTokens":3,"latencyMs":1047.7613750000019},{"questionId":"q108","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"15413563","actual":"15077794","isCorrect":false,"inputTokens":13286,"outputTokens":3,"latencyMs":781.4639580000658},{"questionId":"q109","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2528243","actual":"1821788","isCorrect":false,"inputTokens":15288,"outputTokens":3,"latencyMs":878.4607920001727},{"questionId":"q109","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2528243","actual":"2251781","isCorrect":false,"inputTokens":11592,"outputTokens":3,"latencyMs":748.1280000000261},{"questionId":"q109","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2528243","actual":"1841289","isCorrect":false,"inputTokens":8915,"outputTokens":3,"latencyMs":989.4657089998946},{"questionId":"q109","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"2528243","actual":"2751478","isCorrect":false,"inputTokens":8665,"outputTokens":3,"latencyMs":805.2802919999231},{"questionId":"q109","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2528243","actual":"1735050","isCorrect":false,"inputTokens":17200,"outputTokens":3,"latencyMs":530.580083000008},{"questionId":"q109","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2528243","actual":"1721781","isCorrect":false,"inputTokens":13286,"outputTokens":3,"latencyMs":1035.6608329999726},{"questionId":"q110","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"154136","actual":"147531","isCorrect":false,"inputTokens":15287,"outputTokens":2,"latencyMs":749.2882499999832},{"questionId":"q110","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"154136","actual":"143707","isCorrect":false,"inputTokens":11591,"outputTokens":2,"latencyMs":918.7278330000117},{"questionId":"q110","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"154136","actual":"143279","isCorrect":false,"inputTokens":8914,"outputTokens":2,"latencyMs":742.1529169999994},{"questionId":"q110","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"154136","actual":"143937","isCorrect":false,"inputTokens":8664,"outputTokens":2,"latencyMs":852.8257919999305},{"questionId":"q110","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"154136","actual":"128484","isCorrect":false,"inputTokens":17199,"outputTokens":2,"latencyMs":1170.8482499998063},{"questionId":"q110","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"154136","actual":"132881","isCorrect":false,"inputTokens":13285,"outputTokens":2,"latencyMs":612.3462499999441},{"questionId":"q111","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"42","isCorrect":false,"inputTokens":15289,"outputTokens":1,"latencyMs":953.1429159999825},{"questionId":"q111","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"35","isCorrect":false,"inputTokens":11593,"outputTokens":1,"latencyMs":1029.956540999934},{"questionId":"q111","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"35","isCorrect":false,"inputTokens":8916,"outputTokens":1,"latencyMs":741.9714999999851},{"questionId":"q111","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"35","isCorrect":false,"inputTokens":8666,"outputTokens":1,"latencyMs":864.9059580001049},{"questionId":"q111","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"42","isCorrect":false,"inputTokens":17201,"outputTokens":1,"latencyMs":1208.616083999863},{"questionId":"q111","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"41","actual":"42","isCorrect":false,"inputTokens":13287,"outputTokens":1,"latencyMs":1042.6380839999765},{"questionId":"q112","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"53","actual":"42","isCorrect":false,"inputTokens":15289,"outputTokens":1,"latencyMs":1173.637875000015},{"questionId":"q112","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"53","actual":"42","isCorrect":false,"inputTokens":11593,"outputTokens":1,"latencyMs":841.763666999992},{"questionId":"q112","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"53","actual":"39","isCorrect":false,"inputTokens":8916,"outputTokens":1,"latencyMs":1392.9161670000758},{"questionId":"q112","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"53","actual":"42","isCorrect":false,"inputTokens":8666,"outputTokens":1,"latencyMs":714.6367079999764},{"questionId":"q112","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"53","actual":"42","isCorrect":false,"inputTokens":17201,"outputTokens":1,"latencyMs":815.1910830000415},{"questionId":"q112","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"53","actual":"42","isCorrect":false,"inputTokens":13287,"outputTokens":1,"latencyMs":597.3310420000926},{"questionId":"q113","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"77","actual":"78","isCorrect":false,"inputTokens":15288,"outputTokens":1,"latencyMs":871.604708999861},{"questionId":"q113","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"77","actual":"66","isCorrect":false,"inputTokens":11592,"outputTokens":1,"latencyMs":807.617333999835},{"questionId":"q113","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"77","actual":"38","isCorrect":false,"inputTokens":8915,"outputTokens":1,"latencyMs":638.7451250001322},{"questionId":"q113","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"77","actual":"66","isCorrect":false,"inputTokens":8665,"outputTokens":1,"latencyMs":708.1594589999877},{"questionId":"q113","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"77","actual":"62","isCorrect":false,"inputTokens":17200,"outputTokens":1,"latencyMs":855.563000000082},{"questionId":"q113","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"77","actual":"71","isCorrect":false,"inputTokens":13286,"outputTokens":1,"latencyMs":794.3505830001086},{"questionId":"q114","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"42","isCorrect":false,"inputTokens":15288,"outputTokens":1,"latencyMs":933.0522910000291},{"questionId":"q114","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"35","isCorrect":false,"inputTokens":11592,"outputTokens":1,"latencyMs":506.23399999993853},{"questionId":"q114","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"39","isCorrect":false,"inputTokens":8915,"outputTokens":1,"latencyMs":494.1964999998454},{"questionId":"q114","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"26","isCorrect":false,"inputTokens":8665,"outputTokens":1,"latencyMs":563.3929169999901},{"questionId":"q114","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"25","isCorrect":false,"inputTokens":17200,"outputTokens":1,"latencyMs":1057.8160830000415},{"questionId":"q114","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"37","actual":"39","isCorrect":false,"inputTokens":13286,"outputTokens":1,"latencyMs":895.5522079998627},{"questionId":"q115","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"12","isCorrect":false,"inputTokens":15288,"outputTokens":1,"latencyMs":5503.627499999944},{"questionId":"q115","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"25","isCorrect":false,"inputTokens":11592,"outputTokens":1,"latencyMs":889.953167000087},{"questionId":"q115","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"38","isCorrect":false,"inputTokens":8915,"outputTokens":1,"latencyMs":814.922040999867},{"questionId":"q115","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"25","isCorrect":false,"inputTokens":8665,"outputTokens":1,"latencyMs":831.7978329998441},{"questionId":"q115","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"12","isCorrect":false,"inputTokens":17200,"outputTokens":1,"latencyMs":8343.876374999993},{"questionId":"q115","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"16","isCorrect":true,"inputTokens":13286,"outputTokens":1,"latencyMs":1020.1683750001248},{"questionId":"q116","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"49","actual":"66","isCorrect":false,"inputTokens":15288,"outputTokens":1,"latencyMs":984.3649999999907},{"questionId":"q116","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"49","actual":"65","isCorrect":false,"inputTokens":11592,"outputTokens":1,"latencyMs":919.2169170000125},{"questionId":"q116","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"49","actual":"84","isCorrect":false,"inputTokens":8915,"outputTokens":1,"latencyMs":857.4657499999739},{"questionId":"q116","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"49","actual":"58","isCorrect":false,"inputTokens":8665,"outputTokens":1,"latencyMs":841.0771250000689},{"questionId":"q116","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"49","actual":"39","isCorrect":false,"inputTokens":17200,"outputTokens":1,"latencyMs":889.4989579999819},{"questionId":"q116","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"49","actual":"51","isCorrect":false,"inputTokens":13286,"outputTokens":1,"latencyMs":779.8050410000142},{"questionId":"q117","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"35","isCorrect":false,"inputTokens":15288,"outputTokens":1,"latencyMs":1121.593792000087},{"questionId":"q117","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"38","isCorrect":false,"inputTokens":11592,"outputTokens":1,"latencyMs":766.6404159998056},{"questionId":"q117","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"39","isCorrect":false,"inputTokens":8915,"outputTokens":1,"latencyMs":885.6065830001608},{"questionId":"q117","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"35","isCorrect":false,"inputTokens":8665,"outputTokens":1,"latencyMs":781.8842919999734},{"questionId":"q117","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"42","isCorrect":false,"inputTokens":17200,"outputTokens":1,"latencyMs":1529.369834000012},{"questionId":"q117","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"23","actual":"25","isCorrect":false,"inputTokens":13286,"outputTokens":1,"latencyMs":862.2314169998281},{"questionId":"q118","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"1","isCorrect":false,"inputTokens":15288,"outputTokens":1,"latencyMs":731.7133749998175},{"questionId":"q118","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"7","isCorrect":false,"inputTokens":11592,"outputTokens":1,"latencyMs":755.2664159999695},{"questionId":"q118","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"0","isCorrect":false,"inputTokens":8915,"outputTokens":1,"latencyMs":650.2094999998808},{"questionId":"q118","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"0","isCorrect":false,"inputTokens":8665,"outputTokens":1,"latencyMs":634.1624999998603},{"questionId":"q118","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"1","isCorrect":false,"inputTokens":17200,"outputTokens":1,"latencyMs":999.6341250000987},{"questionId":"q118","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"1","isCorrect":false,"inputTokens":13286,"outputTokens":1,"latencyMs":678.9282090000343},{"questionId":"q119","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"57","actual":"67","isCorrect":false,"inputTokens":15295,"outputTokens":1,"latencyMs":637.6143749998882},{"questionId":"q119","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"57","actual":"42","isCorrect":false,"inputTokens":11599,"outputTokens":1,"latencyMs":822.7012920000125},{"questionId":"q119","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"57","actual":"12","isCorrect":false,"inputTokens":8922,"outputTokens":1,"latencyMs":915.8724579999689},{"questionId":"q119","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"57","actual":"42","isCorrect":false,"inputTokens":8672,"outputTokens":1,"latencyMs":901.0087499998044},{"questionId":"q119","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"57","actual":"42","isCorrect":false,"inputTokens":17207,"outputTokens":1,"latencyMs":986.8243750000838},{"questionId":"q119","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"57","actual":"25","isCorrect":false,"inputTokens":13293,"outputTokens":1,"latencyMs":971.8272919999436},{"questionId":"q120","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"42","isCorrect":false,"inputTokens":15295,"outputTokens":1,"latencyMs":955.3667919998989},{"questionId":"q120","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"25","isCorrect":false,"inputTokens":11599,"outputTokens":1,"latencyMs":790.203125},{"questionId":"q120","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"8","isCorrect":false,"inputTokens":8922,"outputTokens":1,"latencyMs":995.7208749998827},{"questionId":"q120","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"21","isCorrect":false,"inputTokens":8672,"outputTokens":1,"latencyMs":871.769125000108},{"questionId":"q120","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"35","isCorrect":false,"inputTokens":17207,"outputTokens":1,"latencyMs":1067.404041999951},{"questionId":"q120","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"43","actual":"25","isCorrect":false,"inputTokens":13293,"outputTokens":1,"latencyMs":836.6861250000075},{"questionId":"q121","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"12","isCorrect":false,"inputTokens":15295,"outputTokens":1,"latencyMs":1021.1512080000248},{"questionId":"q121","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"12","isCorrect":false,"inputTokens":11599,"outputTokens":1,"latencyMs":2547.47916600015},{"questionId":"q121","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"5","isCorrect":false,"inputTokens":8922,"outputTokens":1,"latencyMs":606.0274169999175},{"questionId":"q121","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"14","isCorrect":false,"inputTokens":8672,"outputTokens":1,"latencyMs":970.8832089998759},{"questionId":"q121","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"12","isCorrect":false,"inputTokens":17207,"outputTokens":1,"latencyMs":751.1921669999138},{"questionId":"q121","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"25","actual":"12","isCorrect":false,"inputTokens":13293,"outputTokens":1,"latencyMs":750.9648329999764},{"questionId":"q122","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"0","isCorrect":false,"inputTokens":15295,"outputTokens":1,"latencyMs":659.0223749999423},{"questionId":"q122","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"0","isCorrect":false,"inputTokens":11599,"outputTokens":1,"latencyMs":975.9859169998672},{"questionId":"q122","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"0","isCorrect":false,"inputTokens":8922,"outputTokens":1,"latencyMs":538.5595419998281},{"questionId":"q122","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"0","isCorrect":false,"inputTokens":8672,"outputTokens":1,"latencyMs":818.3229999998584},{"questionId":"q122","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"8","isCorrect":false,"inputTokens":17207,"outputTokens":1,"latencyMs":1122.52804200002},{"questionId":"q122","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"0","isCorrect":false,"inputTokens":13293,"outputTokens":1,"latencyMs":627.9833750000689},{"questionId":"q123","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"7","isCorrect":false,"inputTokens":15295,"outputTokens":1,"latencyMs":1331.4281659999397},{"questionId":"q123","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"12","isCorrect":false,"inputTokens":11599,"outputTokens":1,"latencyMs":686.9591670001391},{"questionId":"q123","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":8922,"outputTokens":1,"latencyMs":720.088041999843},{"questionId":"q123","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":8672,"outputTokens":1,"latencyMs":778.5020409999415},{"questionId":"q123","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"2","isCorrect":false,"inputTokens":17207,"outputTokens":1,"latencyMs":1112.706082999939},{"questionId":"q123","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"5","isCorrect":false,"inputTokens":13293,"outputTokens":1,"latencyMs":799.0505000001285},{"questionId":"q124","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":7075,"outputTokens":1,"latencyMs":669.6382910001557},{"questionId":"q124","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":4987,"outputTokens":1,"latencyMs":676.7220410001464},{"questionId":"q124","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":6076,"outputTokens":1,"latencyMs":633.0288750000764},{"questionId":"q124","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":7985,"outputTokens":1,"latencyMs":726.093957999954},{"questionId":"q124","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":6102,"outputTokens":1,"latencyMs":722.1203749999404},{"questionId":"q125","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":7075,"outputTokens":3,"latencyMs":663.9378329999745},{"questionId":"q125","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":4987,"outputTokens":3,"latencyMs":668.3629580000415},{"questionId":"q125","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":6076,"outputTokens":3,"latencyMs":695.7735000001267},{"questionId":"q125","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":7985,"outputTokens":3,"latencyMs":791.358666999964},{"questionId":"q125","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"/api/payments","actual":"/api/payments","isCorrect":true,"inputTokens":6102,"outputTokens":3,"latencyMs":547.9040830000304},{"questionId":"q126","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"297","actual":"297","isCorrect":true,"inputTokens":7076,"outputTokens":1,"latencyMs":720.8922079999465},{"questionId":"q126","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"297","actual":"297","isCorrect":true,"inputTokens":4988,"outputTokens":1,"latencyMs":1768.109417000087},{"questionId":"q126","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"297","actual":"297","isCorrect":true,"inputTokens":6077,"outputTokens":1,"latencyMs":434.0052920000162},{"questionId":"q126","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"297","actual":"297","isCorrect":true,"inputTokens":7986,"outputTokens":1,"latencyMs":841.0514159998856},{"questionId":"q126","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"297","actual":"297","isCorrect":true,"inputTokens":6103,"outputTokens":1,"latencyMs":671.7435409999453},{"questionId":"q127","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":7076,"outputTokens":2,"latencyMs":679.3918339998927},{"questionId":"q127","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":4988,"outputTokens":2,"latencyMs":531.0219580000266},{"questionId":"q127","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":6077,"outputTokens":2,"latencyMs":502.0852500000037},{"questionId":"q127","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":7986,"outputTokens":2,"latencyMs":547.9402090001386},{"questionId":"q127","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1000","actual":"1000","isCorrect":true,"inputTokens":6103,"outputTokens":2,"latencyMs":816.4800829999149},{"questionId":"q128","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":7075,"outputTokens":1,"latencyMs":1052.8748339999001},{"questionId":"q128","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":4987,"outputTokens":1,"latencyMs":430.6976250000298},{"questionId":"q128","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":6076,"outputTokens":1,"latencyMs":596.5801659999415},{"questionId":"q128","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":7985,"outputTokens":1,"latencyMs":670.6383330000099},{"questionId":"q128","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"error","actual":"error","isCorrect":true,"inputTokens":6102,"outputTokens":1,"latencyMs":666.0189590000082},{"questionId":"q129","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":7075,"outputTokens":2,"latencyMs":975.8670830000192},{"questionId":"q129","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"{\"logs\":[{\"timestamp\":\"2026-03-01T16:56:46.557Z\",\"level\":\"info\",\"endpoint\":\"/api/auth\",\"statusCode\":205,\"responseTime\":765,\"userId\":2867}]} \n**Answer:** /api/auth","isCorrect":false,"inputTokens":4987,"outputTokens":53,"latencyMs":918.1929160000291},{"questionId":"q129","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":6076,"outputTokens":2,"latencyMs":484.9342089998536},{"questionId":"q129","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":7985,"outputTokens":2,"latencyMs":5054.260624999879},{"questionId":"q129","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":6102,"outputTokens":2,"latencyMs":4841.998874999816},{"questionId":"q130","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"298","actual":"298","isCorrect":true,"inputTokens":7076,"outputTokens":1,"latencyMs":596.7672910001129},{"questionId":"q130","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"298","actual":"298","isCorrect":true,"inputTokens":4988,"outputTokens":1,"latencyMs":4763.605125000002},{"questionId":"q130","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"298","actual":"298","isCorrect":true,"inputTokens":6077,"outputTokens":1,"latencyMs":756.2959169999231},{"questionId":"q130","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"298","actual":"298","isCorrect":true,"inputTokens":7986,"outputTokens":1,"latencyMs":452.62154199997894},{"questionId":"q130","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"298","actual":"298","isCorrect":true,"inputTokens":6103,"outputTokens":1,"latencyMs":494.21950000012293},{"questionId":"q131","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"398","actual":"398","isCorrect":true,"inputTokens":7076,"outputTokens":1,"latencyMs":590.6149999999907},{"questionId":"q131","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"398","actual":"398","isCorrect":true,"inputTokens":4988,"outputTokens":1,"latencyMs":544.4521670001559},{"questionId":"q131","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"398","actual":"398","isCorrect":true,"inputTokens":6077,"outputTokens":1,"latencyMs":496.3487500001211},{"questionId":"q131","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"398","actual":"398","isCorrect":true,"inputTokens":7986,"outputTokens":1,"latencyMs":717.1118340000976},{"questionId":"q131","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"398","actual":"398","isCorrect":true,"inputTokens":6103,"outputTokens":1,"latencyMs":480.12924999999814},{"questionId":"q132","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":7075,"outputTokens":1,"latencyMs":693.7166249998845},{"questionId":"q132","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":4987,"outputTokens":1,"latencyMs":537.2367080000695},{"questionId":"q132","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":6076,"outputTokens":1,"latencyMs":744.6592079999391},{"questionId":"q132","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":7985,"outputTokens":1,"latencyMs":652.4775829999708},{"questionId":"q132","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":6102,"outputTokens":1,"latencyMs":560.8553750000428},{"questionId":"q133","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":7075,"outputTokens":2,"latencyMs":589.8924159999005},{"questionId":"q133","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"{\"logs\":[{\"timestamp\":\"2026-02-26T16:40:28.512Z\",\"level\":\"error\",\"endpoint\":\"/api/payments\",\"statusCode\":442,\"responseTime\":2708,\"userId\":4665,\"error\":{\"message\":\"Invalid authentication token\",\"stack\":\"Error: Admitto tui et aperio denuo minima aeneus arto.\\n at desparatus\\n at curvo\",\"retryable\":true}},{\"timestamp\":\"2026-03-01T22:38:51.411Z\",\"level\":\"info\",\"endpoint\":\"/api/orders\",\"statusCode\":215,\"responseTime\":2058,\"userId\":8534},{\"timestamp\":\"2026-03-02T14:27:54.933","isCorrect":false,"inputTokens":4987,"outputTokens":153,"latencyMs":1858.7134589999914},{"questionId":"q133","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":6076,"outputTokens":2,"latencyMs":620.287666000193},{"questionId":"q133","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":7985,"outputTokens":2,"latencyMs":765.4392080001999},{"questionId":"q133","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"/api/auth","actual":"/api/auth","isCorrect":true,"inputTokens":6102,"outputTokens":2,"latencyMs":614.7382080000825},{"questionId":"q134","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"80","isCorrect":false,"inputTokens":7059,"outputTokens":1,"latencyMs":641.4104170000646},{"questionId":"q134","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"{\"count\":100}","isCorrect":false,"inputTokens":4971,"outputTokens":5,"latencyMs":561.1789159998298},{"questionId":"q134","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"75","isCorrect":true,"inputTokens":6060,"outputTokens":1,"latencyMs":3554.437834000215},{"questionId":"q134","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"50","isCorrect":false,"inputTokens":7969,"outputTokens":1,"latencyMs":534.5713329999708},{"questionId":"q134","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"72","isCorrect":false,"inputTokens":6086,"outputTokens":1,"latencyMs":482.87349999998696},{"questionId":"q135","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2665.00","actual":"2388","isCorrect":false,"inputTokens":7060,"outputTokens":2,"latencyMs":2085.6222910000943},{"questionId":"q135","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2665.00","actual":"2591","isCorrect":false,"inputTokens":4972,"outputTokens":2,"latencyMs":583.098166000098},{"questionId":"q135","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2665.00","actual":"2585","isCorrect":false,"inputTokens":6061,"outputTokens":2,"latencyMs":497.60691700014286},{"questionId":"q135","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2665.00","actual":"2507","isCorrect":false,"inputTokens":7970,"outputTokens":2,"latencyMs":644.9506250000559},{"questionId":"q135","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2665.00","actual":"2459","isCorrect":false,"inputTokens":6087,"outputTokens":2,"latencyMs":573.1919589999598},{"questionId":"q136","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"25","isCorrect":false,"inputTokens":7059,"outputTokens":1,"latencyMs":697.5648329998367},{"questionId":"q136","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"25","isCorrect":false,"inputTokens":4971,"outputTokens":1,"latencyMs":463.88112500007264},{"questionId":"q136","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"28","isCorrect":false,"inputTokens":6060,"outputTokens":1,"latencyMs":2941.9194579999894},{"questionId":"q136","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"25","isCorrect":false,"inputTokens":7969,"outputTokens":1,"latencyMs":703.0076249998529},{"questionId":"q136","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"25","isCorrect":false,"inputTokens":6086,"outputTokens":1,"latencyMs":696.7065420001745},{"questionId":"q137","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"30","actual":"26","isCorrect":false,"inputTokens":7059,"outputTokens":1,"latencyMs":658.0850420000497},{"questionId":"q137","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"30","actual":"24","isCorrect":false,"inputTokens":4971,"outputTokens":1,"latencyMs":492.91225000005215},{"questionId":"q137","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"30","actual":"25","isCorrect":false,"inputTokens":6060,"outputTokens":1,"latencyMs":514.7448330000043},{"questionId":"q137","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"30","actual":"26","isCorrect":false,"inputTokens":7969,"outputTokens":1,"latencyMs":393.0493749999441},{"questionId":"q137","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"30","actual":"24","isCorrect":false,"inputTokens":6086,"outputTokens":1,"latencyMs":423.2260000000242},{"questionId":"q138","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"19","actual":"16","isCorrect":false,"inputTokens":7059,"outputTokens":1,"latencyMs":526.8290830000769},{"questionId":"q138","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"19","actual":"15","isCorrect":false,"inputTokens":4971,"outputTokens":1,"latencyMs":452.80616699997336},{"questionId":"q138","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"19","actual":"16","isCorrect":false,"inputTokens":6060,"outputTokens":1,"latencyMs":747.925125000067},{"questionId":"q138","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"19","actual":"19","isCorrect":true,"inputTokens":7969,"outputTokens":1,"latencyMs":616.3643330000341},{"questionId":"q138","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"19","actual":"17","isCorrect":false,"inputTokens":6086,"outputTokens":1,"latencyMs":545.6614999999292},{"questionId":"q139","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"17","isCorrect":false,"inputTokens":7062,"outputTokens":1,"latencyMs":594.9790000000503},{"questionId":"q139","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"15","isCorrect":false,"inputTokens":4974,"outputTokens":1,"latencyMs":633.8448329998646},{"questionId":"q139","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"15","isCorrect":false,"inputTokens":6063,"outputTokens":1,"latencyMs":467.36437499988824},{"questionId":"q139","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"16","isCorrect":true,"inputTokens":7972,"outputTokens":1,"latencyMs":749.2177499998361},{"questionId":"q139","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"16","actual":"18","isCorrect":false,"inputTokens":6089,"outputTokens":1,"latencyMs":588.8195410000626},{"questionId":"q140","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"15","isCorrect":false,"inputTokens":7061,"outputTokens":1,"latencyMs":955.6003749999218},{"questionId":"q140","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"15","isCorrect":false,"inputTokens":4973,"outputTokens":1,"latencyMs":524.8153750000056},{"questionId":"q140","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"15","isCorrect":false,"inputTokens":6062,"outputTokens":1,"latencyMs":5448.149041000055},{"questionId":"q140","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"12","isCorrect":false,"inputTokens":7971,"outputTokens":1,"latencyMs":656.2655419998337},{"questionId":"q140","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"13","actual":"15","isCorrect":false,"inputTokens":6088,"outputTokens":1,"latencyMs":537.8960829998832},{"questionId":"q141","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"33","actual":"35","isCorrect":false,"inputTokens":7065,"outputTokens":1,"latencyMs":437.2469580001198},{"questionId":"q141","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"33","actual":"35","isCorrect":false,"inputTokens":4977,"outputTokens":1,"latencyMs":567.4906249998603},{"questionId":"q141","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"33","actual":"37","isCorrect":false,"inputTokens":6066,"outputTokens":1,"latencyMs":568.9782090000808},{"questionId":"q141","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"33","actual":"34","isCorrect":false,"inputTokens":7975,"outputTokens":1,"latencyMs":640.1640840000473},{"questionId":"q141","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"33","actual":"35","isCorrect":false,"inputTokens":6092,"outputTokens":1,"latencyMs":731.982374999905},{"questionId":"q142","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"42","actual":"28","isCorrect":false,"inputTokens":7064,"outputTokens":1,"latencyMs":656.7047500000335},{"questionId":"q142","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"42","actual":"38","isCorrect":false,"inputTokens":4976,"outputTokens":1,"latencyMs":480.58245799993165},{"questionId":"q142","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"42","actual":"37","isCorrect":false,"inputTokens":6065,"outputTokens":1,"latencyMs":539.6479159998707},{"questionId":"q142","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"42","actual":"38","isCorrect":false,"inputTokens":7974,"outputTokens":1,"latencyMs":655.2422499998938},{"questionId":"q142","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"42","actual":"42","isCorrect":true,"inputTokens":6091,"outputTokens":1,"latencyMs":641.8482500000391},{"questionId":"q143","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"20","isCorrect":false,"inputTokens":7060,"outputTokens":1,"latencyMs":507.58849999983795},{"questionId":"q143","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"18","isCorrect":false,"inputTokens":4972,"outputTokens":1,"latencyMs":587.9285420000087},{"questionId":"q143","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"26","isCorrect":false,"inputTokens":6061,"outputTokens":1,"latencyMs":543.0668340001721},{"questionId":"q143","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"25","isCorrect":false,"inputTokens":7970,"outputTokens":1,"latencyMs":633.4865840000566},{"questionId":"q143","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"24","actual":"22","isCorrect":false,"inputTokens":6087,"outputTokens":1,"latencyMs":547.9707090000156},{"questionId":"q144","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"25","isCorrect":false,"inputTokens":7066,"outputTokens":1,"latencyMs":807.6963329999708},{"questionId":"q144","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"27","isCorrect":false,"inputTokens":4978,"outputTokens":1,"latencyMs":603.1292499999981},{"questionId":"q144","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"26","isCorrect":true,"inputTokens":6067,"outputTokens":1,"latencyMs":570.2862919999752},{"questionId":"q144","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"26","isCorrect":true,"inputTokens":7976,"outputTokens":1,"latencyMs":810.4777089999989},{"questionId":"q144","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"26","actual":"25","isCorrect":false,"inputTokens":6093,"outputTokens":1,"latencyMs":584.8633749999572},{"questionId":"q145","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":7066,"outputTokens":1,"latencyMs":771.7984579999465},{"questionId":"q145","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"12","isCorrect":false,"inputTokens":4978,"outputTokens":1,"latencyMs":611.7062500000466},{"questionId":"q145","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":6067,"outputTokens":1,"latencyMs":562.7346249998081},{"questionId":"q145","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"12","isCorrect":false,"inputTokens":7976,"outputTokens":1,"latencyMs":1165.6651250000577},{"questionId":"q145","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"11","isCorrect":false,"inputTokens":6093,"outputTokens":1,"latencyMs":458.80704100010917},{"questionId":"q146","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":7069,"outputTokens":1,"latencyMs":863.8479579999112},{"questionId":"q146","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":4981,"outputTokens":1,"latencyMs":570.4369580000639},{"questionId":"q146","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":6070,"outputTokens":1,"latencyMs":519.018917000154},{"questionId":"q146","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"5","isCorrect":false,"inputTokens":7979,"outputTokens":1,"latencyMs":949.3144999998622},{"questionId":"q146","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":6096,"outputTokens":1,"latencyMs":402.4407089999877},{"questionId":"q147","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"7","isCorrect":false,"inputTokens":7068,"outputTokens":1,"latencyMs":622.7003339999355},{"questionId":"q147","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"5","isCorrect":false,"inputTokens":4980,"outputTokens":1,"latencyMs":441.8485409999266},{"questionId":"q147","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"7","isCorrect":false,"inputTokens":6069,"outputTokens":1,"latencyMs":483.62008300004527},{"questionId":"q147","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"5","isCorrect":false,"inputTokens":7978,"outputTokens":1,"latencyMs":699.7590830000117},{"questionId":"q147","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"6","isCorrect":true,"inputTokens":6095,"outputTokens":1,"latencyMs":602.388666999992},{"questionId":"q148","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"5","isCorrect":false,"inputTokens":7068,"outputTokens":1,"latencyMs":10241.612874999875},{"questionId":"q148","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":4980,"outputTokens":1,"latencyMs":475.24783300003037},{"questionId":"q148","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"5","isCorrect":false,"inputTokens":6069,"outputTokens":1,"latencyMs":575.6938749998808},{"questionId":"q148","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":7978,"outputTokens":1,"latencyMs":780.9076249999925},{"questionId":"q148","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"5","isCorrect":false,"inputTokens":6095,"outputTokens":1,"latencyMs":566.3519999999553},{"questionId":"q149","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"4","isCorrect":false,"inputTokens":7067,"outputTokens":1,"latencyMs":780.5568750000093},{"questionId":"q149","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"5","isCorrect":false,"inputTokens":4979,"outputTokens":1,"latencyMs":451.3896250000689},{"questionId":"q149","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"5","isCorrect":false,"inputTokens":6068,"outputTokens":1,"latencyMs":740.5959590000566},{"questionId":"q149","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"4","isCorrect":false,"inputTokens":7977,"outputTokens":1,"latencyMs":839.9617080001626},{"questionId":"q149","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"4","isCorrect":false,"inputTokens":6094,"outputTokens":1,"latencyMs":545.8948749999981},{"questionId":"q150","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"5","isCorrect":false,"inputTokens":7066,"outputTokens":1,"latencyMs":617.8501670002006},{"questionId":"q150","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":4978,"outputTokens":1,"latencyMs":499.71529100020416},{"questionId":"q150","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":6067,"outputTokens":1,"latencyMs":436.76954200002365},{"questionId":"q150","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"6","isCorrect":false,"inputTokens":7976,"outputTokens":1,"latencyMs":665.0118329999968},{"questionId":"q150","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":6093,"outputTokens":1,"latencyMs":630.4103749999776},{"questionId":"q151","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"development","actual":"development","isCorrect":true,"inputTokens":1167,"outputTokens":1,"latencyMs":568.3108330001123},{"questionId":"q151","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"development","actual":"development","isCorrect":true,"inputTokens":791,"outputTokens":1,"latencyMs":664.702874999959},{"questionId":"q151","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"development","actual":"development","isCorrect":true,"inputTokens":898,"outputTokens":1,"latencyMs":887.1532499999739},{"questionId":"q151","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"development","actual":"development","isCorrect":true,"inputTokens":1239,"outputTokens":1,"latencyMs":1236.0112499999814},{"questionId":"q151","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"development","actual":"development","isCorrect":true,"inputTokens":918,"outputTokens":1,"latencyMs":532.5732499998994},{"questionId":"q152","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":1165,"outputTokens":7,"latencyMs":1971.4524580000434},{"questionId":"q152","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":789,"outputTokens":7,"latencyMs":813.3573330000509},{"questionId":"q152","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":896,"outputTokens":7,"latencyMs":924.5502499998547},{"questionId":"q152","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":1237,"outputTokens":7,"latencyMs":1006.0048750001006},{"questionId":"q152","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"neighboring-gastropod.net","actual":"neighboring-gastropod.net","isCorrect":true,"inputTokens":916,"outputTokens":7,"latencyMs":430.3585830000229},{"questionId":"q153","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1165,"outputTokens":2,"latencyMs":541.6815420000348},{"questionId":"q153","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":789,"outputTokens":2,"latencyMs":460.0598750000354},{"questionId":"q153","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":896,"outputTokens":2,"latencyMs":484.6317910000216},{"questionId":"q153","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":1237,"outputTokens":2,"latencyMs":465.4757910000626},{"questionId":"q153","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"5432","actual":"5432","isCorrect":true,"inputTokens":916,"outputTokens":2,"latencyMs":591.9300409997813},{"questionId":"q154","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":1167,"outputTokens":1,"latencyMs":435.2754999999888},{"questionId":"q154","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":791,"outputTokens":1,"latencyMs":536.249749999959},{"questionId":"q154","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":898,"outputTokens":1,"latencyMs":12238.85999999987},{"questionId":"q154","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":1239,"outputTokens":1,"latencyMs":662.8013750000391},{"questionId":"q154","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"18","actual":"18","isCorrect":true,"inputTokens":918,"outputTokens":1,"latencyMs":594.5850830001291},{"questionId":"q155","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1165,"outputTokens":2,"latencyMs":609.3842909999657},{"questionId":"q155","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":789,"outputTokens":2,"latencyMs":526.6292089999188},{"questionId":"q155","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":896,"outputTokens":2,"latencyMs":550.4001660000067},{"questionId":"q155","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":1237,"outputTokens":2,"latencyMs":539.1558329998516},{"questionId":"q155","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"86400","actual":"86400","isCorrect":true,"inputTokens":916,"outputTokens":2,"latencyMs":433.43420799984597},{"questionId":"q156","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":1167,"outputTokens":1,"latencyMs":506.74600000004284},{"questionId":"q156","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":791,"outputTokens":1,"latencyMs":492.7102080001496},{"questionId":"q156","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":898,"outputTokens":1,"latencyMs":433.3894169998821},{"questionId":"q156","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":1239,"outputTokens":1,"latencyMs":398.6489579998888},{"questionId":"q156","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":918,"outputTokens":1,"latencyMs":420.9260420000646},{"questionId":"q157","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1167,"outputTokens":2,"latencyMs":503.7374169998802},{"questionId":"q157","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":791,"outputTokens":2,"latencyMs":648.766082999995},{"questionId":"q157","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":898,"outputTokens":2,"latencyMs":711.5557909999043},{"questionId":"q157","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":1239,"outputTokens":2,"latencyMs":383.2424170000013},{"questionId":"q157","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"30000","actual":"30000","isCorrect":true,"inputTokens":918,"outputTokens":2,"latencyMs":564.1333329998888},{"questionId":"q158","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":1165,"outputTokens":1,"latencyMs":515.1211670001503},{"questionId":"q158","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":789,"outputTokens":1,"latencyMs":444.1989999997895},{"questionId":"q158","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":896,"outputTokens":1,"latencyMs":553.0614579999819},{"questionId":"q158","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":1237,"outputTokens":1,"latencyMs":516.2151250001043},{"questionId":"q158","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"varchar","actual":"varchar","isCorrect":true,"inputTokens":916,"outputTokens":1,"latencyMs":548.5459579997696},{"questionId":"q159","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1166,"outputTokens":2,"latencyMs":484.6058339998126},{"questionId":"q159","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":790,"outputTokens":2,"latencyMs":612.9599999999627},{"questionId":"q159","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":897,"outputTokens":2,"latencyMs":1002.7459169998765},{"questionId":"q159","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":1238,"outputTokens":2,"latencyMs":514.824166000355},{"questionId":"q159","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"3600","actual":"3600","isCorrect":true,"inputTokens":917,"outputTokens":2,"latencyMs":573.5519579998218},{"questionId":"q160","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":1167,"outputTokens":5,"latencyMs":729.5760000003502},{"questionId":"q160","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":791,"outputTokens":5,"latencyMs":22246.316542000044},{"questionId":"q160","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":898,"outputTokens":5,"latencyMs":484.80991700012237},{"questionId":"q160","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":1239,"outputTokens":5,"latencyMs":683.5540000000037},{"questionId":"q160","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"9.11.2","actual":"9.11.2","isCorrect":true,"inputTokens":918,"outputTokens":5,"latencyMs":537.343334000092},{"questionId":"q161","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":1167,"outputTokens":1,"latencyMs":321.8642909997143},{"questionId":"q161","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":791,"outputTokens":1,"latencyMs":711.7291670003906},{"questionId":"q161","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":1956,"outputTokens":1,"latencyMs":585.2053749999031},{"questionId":"q161","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":1239,"outputTokens":1,"latencyMs":443.67554099997506},{"questionId":"q161","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":918,"outputTokens":1,"latencyMs":499.66949999984354},{"questionId":"q162","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":1167,"outputTokens":1,"latencyMs":650.0754170003347},{"questionId":"q162","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":791,"outputTokens":1,"latencyMs":559.8465829999186},{"questionId":"q162","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":898,"outputTokens":1,"latencyMs":404.6767500001006},{"questionId":"q162","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":2638,"outputTokens":1,"latencyMs":474.6345830000937},{"questionId":"q162","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":918,"outputTokens":1,"latencyMs":785.9961669999175},{"questionId":"q163","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":1166,"outputTokens":1,"latencyMs":506.4886670000851},{"questionId":"q163","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":790,"outputTokens":1,"latencyMs":499.8605829998851},{"questionId":"q163","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":897,"outputTokens":1,"latencyMs":548.1877080001868},{"questionId":"q163","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":1238,"outputTokens":1,"latencyMs":573.2623340003192},{"questionId":"q163","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":917,"outputTokens":1,"latencyMs":2085.15475000022},{"questionId":"q164","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":1166,"outputTokens":1,"latencyMs":2063.1506249997765},{"questionId":"q164","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":790,"outputTokens":1,"latencyMs":2063.2115000002086},{"questionId":"q164","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":897,"outputTokens":1,"latencyMs":484.71375000011176},{"questionId":"q164","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":1238,"outputTokens":1,"latencyMs":412.90587500017136},{"questionId":"q164","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2","actual":"2","isCorrect":true,"inputTokens":917,"outputTokens":1,"latencyMs":592.0760830002837},{"questionId":"q165","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":1166,"outputTokens":1,"latencyMs":465.3804170000367},{"questionId":"q165","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":790,"outputTokens":1,"latencyMs":490.51391700003296},{"questionId":"q165","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":897,"outputTokens":1,"latencyMs":555.85291699972},{"questionId":"q165","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":1238,"outputTokens":1,"latencyMs":416.2364590000361},{"questionId":"q165","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":917,"outputTokens":1,"latencyMs":626.9462079997174},{"questionId":"q166","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1170,"outputTokens":1,"latencyMs":483.3775410000235},{"questionId":"q166","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":794,"outputTokens":1,"latencyMs":525.3997499998659},{"questionId":"q166","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":901,"outputTokens":1,"latencyMs":460.0272920001298},{"questionId":"q166","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1242,"outputTokens":1,"latencyMs":468.0580000001937},{"questionId":"q166","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":921,"outputTokens":1,"latencyMs":434.2507499996573},{"questionId":"q167","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"0","isCorrect":true,"inputTokens":1166,"outputTokens":1,"latencyMs":498.7844169996679},{"questionId":"q167","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"0","isCorrect":true,"inputTokens":790,"outputTokens":1,"latencyMs":1106.7191670001484},{"questionId":"q167","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"0","isCorrect":true,"inputTokens":897,"outputTokens":1,"latencyMs":398.2881669998169},{"questionId":"q167","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"0","isCorrect":true,"inputTokens":1238,"outputTokens":1,"latencyMs":423.91337499953806},{"questionId":"q167","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"0","isCorrect":true,"inputTokens":917,"outputTokens":1,"latencyMs":399.51820799987763},{"questionId":"q168","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"5","isCorrect":true,"inputTokens":2496,"outputTokens":1,"latencyMs":530.0409999997355},{"questionId":"q168","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"5","isCorrect":true,"inputTokens":792,"outputTokens":1,"latencyMs":372.8215419999324},{"questionId":"q168","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"5","isCorrect":true,"inputTokens":899,"outputTokens":1,"latencyMs":559.5414160001092},{"questionId":"q168","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"5","isCorrect":true,"inputTokens":1240,"outputTokens":1,"latencyMs":516.9351249998435},{"questionId":"q168","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"5","isCorrect":true,"inputTokens":919,"outputTokens":1,"latencyMs":625.4416249999776},{"questionId":"q169","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"9","isCorrect":false,"inputTokens":1170,"outputTokens":1,"latencyMs":958.8957500001416},{"questionId":"q169","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"9","isCorrect":false,"inputTokens":794,"outputTokens":1,"latencyMs":609.8635840001516},{"questionId":"q169","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"8","isCorrect":true,"inputTokens":901,"outputTokens":1,"latencyMs":1038.2139170002192},{"questionId":"q169","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"8","isCorrect":true,"inputTokens":1242,"outputTokens":1,"latencyMs":849.5646669999696},{"questionId":"q169","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"8","actual":"11","isCorrect":false,"inputTokens":921,"outputTokens":1,"latencyMs":3410.467042000033},{"questionId":"q170","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"5","isCorrect":true,"inputTokens":1169,"outputTokens":1,"latencyMs":594.7289589997381},{"questionId":"q170","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"6","isCorrect":false,"inputTokens":793,"outputTokens":1,"latencyMs":551.421459000092},{"questionId":"q170","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"6","isCorrect":false,"inputTokens":900,"outputTokens":1,"latencyMs":573.0434999996796},{"questionId":"q170","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"6","isCorrect":false,"inputTokens":1241,"outputTokens":1,"latencyMs":658.1823330000043},{"questionId":"q170","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"5","actual":"5","isCorrect":true,"inputTokens":920,"outputTokens":1,"latencyMs":408.4260830003768},{"questionId":"q171","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"4","isCorrect":false,"inputTokens":1171,"outputTokens":1,"latencyMs":497.02908399980515},{"questionId":"q171","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":795,"outputTokens":1,"latencyMs":491.7958749998361},{"questionId":"q171","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"3","isCorrect":true,"inputTokens":902,"outputTokens":1,"latencyMs":590.5531250000931},{"questionId":"q171","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"4","isCorrect":false,"inputTokens":1243,"outputTokens":1,"latencyMs":452.9944170000963},{"questionId":"q171","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"3","actual":"5","isCorrect":false,"inputTokens":922,"outputTokens":1,"latencyMs":866.8199579999782},{"questionId":"q172","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":1171,"outputTokens":1,"latencyMs":607.6427080002613},{"questionId":"q172","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":795,"outputTokens":1,"latencyMs":519.5298330001533},{"questionId":"q172","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":902,"outputTokens":1,"latencyMs":399.87587500037625},{"questionId":"q172","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":1243,"outputTokens":1,"latencyMs":440.52479099994525},{"questionId":"q172","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":922,"outputTokens":1,"latencyMs":559.67833300028},{"questionId":"q173","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"2","isCorrect":false,"inputTokens":1172,"outputTokens":1,"latencyMs":564.7204999998212},{"questionId":"q173","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"2","isCorrect":false,"inputTokens":796,"outputTokens":1,"latencyMs":384.5131250000559},{"questionId":"q173","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"2","isCorrect":false,"inputTokens":903,"outputTokens":1,"latencyMs":510.0762499999255},{"questionId":"q173","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1244,"outputTokens":1,"latencyMs":471.7332919999026},{"questionId":"q173","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":923,"outputTokens":1,"latencyMs":584.4937080000527},{"questionId":"q174","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1169,"outputTokens":1,"latencyMs":669.2428330001421},{"questionId":"q174","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":793,"outputTokens":1,"latencyMs":494.3819160000421},{"questionId":"q174","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":900,"outputTokens":1,"latencyMs":773.781165999826},{"questionId":"q174","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"2","isCorrect":false,"inputTokens":1241,"outputTokens":1,"latencyMs":585.256166999694},{"questionId":"q174","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":920,"outputTokens":1,"latencyMs":707.0857500000857},{"questionId":"q175","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"1","isCorrect":false,"inputTokens":1173,"outputTokens":1,"latencyMs":434.921624999959},{"questionId":"q175","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"2","isCorrect":false,"inputTokens":797,"outputTokens":1,"latencyMs":814.5306669999845},{"questionId":"q175","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"1","isCorrect":false,"inputTokens":1968,"outputTokens":1,"latencyMs":414.5523340003565},{"questionId":"q175","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"1","isCorrect":false,"inputTokens":1245,"outputTokens":1,"latencyMs":571.2765410002321},{"questionId":"q175","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"0","actual":"1","isCorrect":false,"inputTokens":924,"outputTokens":1,"latencyMs":344.7449159999378},{"questionId":"q176","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1167,"outputTokens":1,"latencyMs":513.3467919998802},{"questionId":"q176","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":791,"outputTokens":1,"latencyMs":533.905749999918},{"questionId":"q176","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":898,"outputTokens":1,"latencyMs":370.74941699998453},{"questionId":"q176","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1239,"outputTokens":1,"latencyMs":631.5751670002937},{"questionId":"q176","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1996,"outputTokens":1,"latencyMs":930.6895409999415},{"questionId":"q177","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":1175,"outputTokens":1,"latencyMs":399.03012500004843},{"questionId":"q177","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":799,"outputTokens":1,"latencyMs":365.84254100034013},{"questionId":"q177","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":906,"outputTokens":1,"latencyMs":582.7095000003465},{"questionId":"q177","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":1247,"outputTokens":1,"latencyMs":418.50779200019315},{"questionId":"q177","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":926,"outputTokens":1,"latencyMs":336.9835830000229},{"questionId":"q178","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":1170,"outputTokens":1,"latencyMs":532.1033749999478},{"questionId":"q178","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":794,"outputTokens":1,"latencyMs":438.3645840003155},{"questionId":"q178","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":901,"outputTokens":1,"latencyMs":577.2512499997392},{"questionId":"q178","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":1242,"outputTokens":1,"latencyMs":579.6784999999218},{"questionId":"q178","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":921,"outputTokens":1,"latencyMs":811.5298750000075},{"questionId":"q179","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":1169,"outputTokens":1,"latencyMs":556.395291000139},{"questionId":"q179","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":793,"outputTokens":1,"latencyMs":579.4757079998963},{"questionId":"q179","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"1","isCorrect":true,"inputTokens":900,"outputTokens":1,"latencyMs":561.9488340001553},{"questionId":"q179","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":1241,"outputTokens":1,"latencyMs":552.0802909997292},{"questionId":"q179","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"1","actual":"0","isCorrect":false,"inputTokens":920,"outputTokens":1,"latencyMs":556.203125},{"questionId":"q180","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":6529,"outputTokens":1,"latencyMs":1252.5789579995908},{"questionId":"q180","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":4120,"outputTokens":1,"latencyMs":664.0655419998802},{"questionId":"q180","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":2699,"outputTokens":1,"latencyMs":564.4555420000106},{"questionId":"q180","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":2536,"outputTokens":1,"latencyMs":546.6606669998728},{"questionId":"q180","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":7468,"outputTokens":1,"latencyMs":785.5787080000155},{"questionId":"q180","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":5168,"outputTokens":1,"latencyMs":802.9945829999633},{"questionId":"q181","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":6534,"outputTokens":12,"latencyMs":770.2546669999138},{"questionId":"q181","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":4125,"outputTokens":12,"latencyMs":646.5715000000782},{"questionId":"q181","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":2704,"outputTokens":12,"latencyMs":738.1328750001267},{"questionId":"q181","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":2541,"outputTokens":12,"latencyMs":453.31841700011864},{"questionId":"q181","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":7473,"outputTokens":12,"latencyMs":600.691165999975},{"questionId":"q181","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,department,salary,yearsExperience,active","actual":"id,name,email,department,salary,yearsExperience,active","isCorrect":true,"inputTokens":5173,"outputTokens":12,"latencyMs":528.4648329997435},{"questionId":"q182","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"email","actual":"email","isCorrect":true,"inputTokens":6532,"outputTokens":1,"latencyMs":747.806707999669},{"questionId":"q182","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"email","actual":"email","isCorrect":true,"inputTokens":4123,"outputTokens":1,"latencyMs":579.8203329998069},{"questionId":"q182","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"email","actual":"email","isCorrect":true,"inputTokens":2702,"outputTokens":1,"latencyMs":648.0913329999894},{"questionId":"q182","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"email","actual":"email","isCorrect":true,"inputTokens":5238,"outputTokens":1,"latencyMs":753.5913749998435},{"questionId":"q182","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"email","actual":"email","isCorrect":true,"inputTokens":7471,"outputTokens":1,"latencyMs":555.7612499999814},{"questionId":"q182","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"email","actual":"email","isCorrect":true,"inputTokens":5171,"outputTokens":1,"latencyMs":694.1673329998739},{"questionId":"q183","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":6533,"outputTokens":1,"latencyMs":615.7989579997957},{"questionId":"q183","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":4124,"outputTokens":1,"latencyMs":636.5812910003588},{"questionId":"q183","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":2703,"outputTokens":1,"latencyMs":477.2688339999877},{"questionId":"q183","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":2540,"outputTokens":1,"latencyMs":539.9712910000235},{"questionId":"q183","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":7472,"outputTokens":1,"latencyMs":761.8279169998132},{"questionId":"q183","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"HR","actual":"HR","isCorrect":true,"inputTokens":5172,"outputTokens":1,"latencyMs":465.2080830000341},{"questionId":"q184","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":6533,"outputTokens":4,"latencyMs":1139.6310410001315},{"questionId":"q184","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":4124,"outputTokens":4,"latencyMs":613.9928749999963},{"questionId":"q184","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":2703,"outputTokens":4,"latencyMs":488.5675419997424},{"questionId":"q184","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":2540,"outputTokens":4,"latencyMs":604.6790000000037},{"questionId":"q184","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":7472,"outputTokens":4,"latencyMs":608.0647919997573},{"questionId":"q184","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"Tavares Skiles","actual":"Tavares Skiles","isCorrect":true,"inputTokens":5172,"outputTokens":4,"latencyMs":664.8144999998622},{"questionId":"q185","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":6530,"outputTokens":1,"latencyMs":12182.406749999616},{"questionId":"q185","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":4121,"outputTokens":1,"latencyMs":493.5817499998957},{"questionId":"q185","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"6","isCorrect":false,"inputTokens":2700,"outputTokens":1,"latencyMs":586.3510839999653},{"questionId":"q185","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":2537,"outputTokens":1,"latencyMs":549.4824999999255},{"questionId":"q185","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":7469,"outputTokens":1,"latencyMs":706.1140000000596},{"questionId":"q185","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"7","actual":"7","isCorrect":true,"inputTokens":5169,"outputTokens":1,"latencyMs":509.1063749999739},{"questionId":"q186","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":11527,"outputTokens":1,"latencyMs":889.8342909999192},{"questionId":"q186","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":7301,"outputTokens":1,"latencyMs":836.0652499999851},{"questionId":"q186","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":7619,"outputTokens":1,"latencyMs":764.9607909996994},{"questionId":"q186","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":12946,"outputTokens":1,"latencyMs":843.1492909998633},{"questionId":"q186","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"50","actual":"50","isCorrect":true,"inputTokens":8964,"outputTokens":1,"latencyMs":867.6933340001851},{"questionId":"q187","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":11534,"outputTokens":15,"latencyMs":869.5551669998094},{"questionId":"q187","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":7308,"outputTokens":15,"latencyMs":618.4852499999106},{"questionId":"q187","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":7626,"outputTokens":15,"latencyMs":876.8569999998435},{"questionId":"q187","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":12953,"outputTokens":15,"latencyMs":1181.0373749998398},{"questionId":"q187","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"orderId,customer,items,subtotal,tax,total,status,orderDate","actual":"orderId,customer,items,subtotal,tax,total,status,orderDate","isCorrect":true,"inputTokens":8971,"outputTokens":15,"latencyMs":767.7519590002485},{"questionId":"q188","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":11530,"outputTokens":1,"latencyMs":818.1288749999367},{"questionId":"q188","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":7304,"outputTokens":1,"latencyMs":910.3320000004023},{"questionId":"q188","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":7622,"outputTokens":1,"latencyMs":584.8427499998361},{"questionId":"q188","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":12949,"outputTokens":1,"latencyMs":774.0600000000559},{"questionId":"q188","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"4","actual":"4","isCorrect":true,"inputTokens":8967,"outputTokens":1,"latencyMs":861.5179579998367},{"questionId":"q189","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":11535,"outputTokens":6,"latencyMs":803.0108340000734},{"questionId":"q189","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":7309,"outputTokens":6,"latencyMs":700.7432919996791},{"questionId":"q189","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":7627,"outputTokens":6,"latencyMs":598.2462919997051},{"questionId":"q189","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":12954,"outputTokens":6,"latencyMs":726.3003329997882},{"questionId":"q189","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"sku,name,quantity,price","actual":"sku,name,quantity,price","isCorrect":true,"inputTokens":8972,"outputTokens":6,"latencyMs":865.8394590001553},{"questionId":"q190","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":11531,"outputTokens":2,"latencyMs":854.4052920001559},{"questionId":"q190","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":7305,"outputTokens":2,"latencyMs":612.8385410001501},{"questionId":"q190","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":7623,"outputTokens":2,"latencyMs":533.2371249999851},{"questionId":"q190","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":12950,"outputTokens":2,"latencyMs":958.8608749997802},{"questionId":"q190","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"cancelled","actual":"cancelled","isCorrect":true,"inputTokens":8968,"outputTokens":2,"latencyMs":928.7744160001166},{"questionId":"q191","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":11536,"outputTokens":5,"latencyMs":699.7848749998957},{"questionId":"q191","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":7310,"outputTokens":5,"latencyMs":767.8369579999708},{"questionId":"q191","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":7628,"outputTokens":5,"latencyMs":809.0390840000473},{"questionId":"q191","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":12955,"outputTokens":5,"latencyMs":722.1277910000645},{"questionId":"q191","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"id,name,email,phone","actual":"id,name,email,phone","isCorrect":true,"inputTokens":8973,"outputTokens":5,"latencyMs":742.7240829998627},{"questionId":"q192","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"62","isCorrect":false,"inputTokens":3873,"outputTokens":1,"latencyMs":580.0553330001421},{"questionId":"q192","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"62","isCorrect":false,"inputTokens":2544,"outputTokens":1,"latencyMs":611.8277079998516},{"questionId":"q192","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"60","isCorrect":true,"inputTokens":1803,"outputTokens":1,"latencyMs":582.2866250001825},{"questionId":"q192","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"60","isCorrect":true,"inputTokens":1662,"outputTokens":1,"latencyMs":609.2173339999281},{"questionId":"q192","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"67","isCorrect":false,"inputTokens":4519,"outputTokens":1,"latencyMs":661.6859169998206},{"questionId":"q192","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"60","actual":"62","isCorrect":false,"inputTokens":3203,"outputTokens":1,"latencyMs":589.9222920001484},{"questionId":"q193","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":3877,"outputTokens":14,"latencyMs":522.1874170000665},{"questionId":"q193","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":2548,"outputTokens":14,"latencyMs":549.2259579999372},{"questionId":"q193","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":1807,"outputTokens":14,"latencyMs":560.921875},{"questionId":"q193","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":1666,"outputTokens":14,"latencyMs":681.8551659998484},{"questionId":"q193","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":4523,"outputTokens":14,"latencyMs":610.0809579999186},{"questionId":"q193","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"date,views,clicks,conversions,revenue,bounceRate","actual":"date,views,clicks,conversions,revenue,bounceRate","isCorrect":true,"inputTokens":3207,"outputTokens":14,"latencyMs":941.1139580002055},{"questionId":"q194","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"revenue","actual":"conversions","isCorrect":false,"inputTokens":3876,"outputTokens":2,"latencyMs":595.4821250000969},{"questionId":"q194","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":2547,"outputTokens":1,"latencyMs":410.87079200008884},{"questionId":"q194","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":1806,"outputTokens":1,"latencyMs":527.0672500003129},{"questionId":"q194","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":3490,"outputTokens":1,"latencyMs":1611.3186250003055},{"questionId":"q194","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"revenue","actual":"conversions","isCorrect":false,"inputTokens":4522,"outputTokens":2,"latencyMs":541.6719589997083},{"questionId":"q194","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"revenue","actual":"revenue","isCorrect":true,"inputTokens":3206,"outputTokens":1,"latencyMs":443.4391669998877},{"questionId":"q195","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":3877,"outputTokens":6,"latencyMs":646.6973330001347},{"questionId":"q195","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":2548,"outputTokens":6,"latencyMs":460.30249999975786},{"questionId":"q195","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":1807,"outputTokens":6,"latencyMs":462.1170000000857},{"questionId":"q195","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":1666,"outputTokens":6,"latencyMs":681.4101669997908},{"questionId":"q195","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":4523,"outputTokens":6,"latencyMs":554.3486249996349},{"questionId":"q195","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"2025-03-01","actual":"2025-03-01","isCorrect":true,"inputTokens":3207,"outputTokens":6,"latencyMs":659.1484580002725},{"questionId":"q196","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"6","isCorrect":true,"inputTokens":3873,"outputTokens":1,"latencyMs":371.490291999653},{"questionId":"q196","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"6","isCorrect":true,"inputTokens":2544,"outputTokens":1,"latencyMs":539.5985829997808},{"questionId":"q196","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"6","isCorrect":true,"inputTokens":1803,"outputTokens":1,"latencyMs":515.8493749997579},{"questionId":"q196","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"6","isCorrect":true,"inputTokens":1662,"outputTokens":1,"latencyMs":477.6968330000527},{"questionId":"q196","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"6","isCorrect":true,"inputTokens":4519,"outputTokens":1,"latencyMs":632.060792000033},{"questionId":"q196","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"6","actual":"6","isCorrect":true,"inputTokens":3203,"outputTokens":1,"latencyMs":475.396583000198},{"questionId":"q197","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":15285,"outputTokens":1,"latencyMs":1091.5453749997541},{"questionId":"q197","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":11589,"outputTokens":1,"latencyMs":772.7851670002565},{"questionId":"q197","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":8912,"outputTokens":1,"latencyMs":879.1698340000585},{"questionId":"q197","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":8662,"outputTokens":1,"latencyMs":802.6152500002645},{"questionId":"q197","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"68","isCorrect":false,"inputTokens":17197,"outputTokens":1,"latencyMs":1066.6569999996573},{"questionId":"q197","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"100","actual":"100","isCorrect":true,"inputTokens":13283,"outputTokens":1,"latencyMs":815.2937500001863},{"questionId":"q198","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":15290,"outputTokens":24,"latencyMs":993.7714999997988},{"questionId":"q198","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":11594,"outputTokens":24,"latencyMs":801.9431670000777},{"questionId":"q198","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":8917,"outputTokens":24,"latencyMs":784.6215000003576},{"questionId":"q198","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":8667,"outputTokens":24,"latencyMs":862.8154170000926},{"questionId":"q198","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":17202,"outputTokens":24,"latencyMs":1196.5535840000957},{"questionId":"q198","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt","actual":"id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch","isCorrect":false,"inputTokens":13288,"outputTokens":24,"latencyMs":790.2083340003155},{"questionId":"q199","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"forks","actual":"stars","isCorrect":false,"inputTokens":15290,"outputTokens":1,"latencyMs":820.4306660001166},{"questionId":"q199","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"forks","actual":"stars","isCorrect":false,"inputTokens":11594,"outputTokens":1,"latencyMs":1137.4689589999616},{"questionId":"q199","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"forks","actual":"pushedAt","isCorrect":false,"inputTokens":8917,"outputTokens":3,"latencyMs":434.1880419999361},{"questionId":"q199","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"forks","actual":"defaultBranch","isCorrect":false,"inputTokens":8667,"outputTokens":2,"latencyMs":723.8139579999261},{"questionId":"q199","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"forks","actual":"stars","isCorrect":false,"inputTokens":17202,"outputTokens":1,"latencyMs":1043.3001669999212},{"questionId":"q199","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"forks","actual":"stars","isCorrect":false,"inputTokens":13288,"outputTokens":1,"latencyMs":663.120625000447},{"questionId":"q200","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":15289,"outputTokens":2,"latencyMs":864.5532499998808},{"questionId":"q200","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":11593,"outputTokens":2,"latencyMs":821.558999999892},{"questionId":"q200","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":8916,"outputTokens":2,"latencyMs":840.106124999933},{"questionId":"q200","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":8666,"outputTokens":2,"latencyMs":885.9072079998441},{"questionId":"q200","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":17201,"outputTokens":2,"latencyMs":1052.6682919999585},{"questionId":"q200","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"tailwindcss","actual":"tailwindcss","isCorrect":true,"inputTokens":13287,"outputTokens":2,"latencyMs":924.2444159998558},{"questionId":"q201","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"11","isCorrect":true,"inputTokens":15286,"outputTokens":1,"latencyMs":844.3746249997057},{"questionId":"q201","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"12","isCorrect":false,"inputTokens":11590,"outputTokens":1,"latencyMs":948.0078329998069},{"questionId":"q201","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"11","isCorrect":true,"inputTokens":8913,"outputTokens":1,"latencyMs":642.2395419999957},{"questionId":"q201","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"11","isCorrect":true,"inputTokens":8663,"outputTokens":1,"latencyMs":635.705959000159},{"questionId":"q201","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"10","isCorrect":false,"inputTokens":17198,"outputTokens":1,"latencyMs":4565.840375000145},{"questionId":"q201","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"11","actual":"11","isCorrect":true,"inputTokens":13284,"outputTokens":1,"latencyMs":741.9597499999218},{"questionId":"q202","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"80","isCorrect":false,"inputTokens":7059,"outputTokens":1,"latencyMs":776.4865409997292},{"questionId":"q202","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"{\"count\":100}","isCorrect":false,"inputTokens":4971,"outputTokens":5,"latencyMs":509.64670799998567},{"questionId":"q202","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"75","isCorrect":true,"inputTokens":6060,"outputTokens":1,"latencyMs":748.3888749997132},{"questionId":"q202","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"65","isCorrect":false,"inputTokens":7969,"outputTokens":1,"latencyMs":633.1895829997957},{"questionId":"q202","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"75","actual":"67","isCorrect":false,"inputTokens":6086,"outputTokens":1,"latencyMs":579.6138330004178},{"questionId":"q203","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":7068,"outputTokens":13,"latencyMs":600.4942499999888},{"questionId":"q203","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":4980,"outputTokens":13,"latencyMs":655.0449999999255},{"questionId":"q203","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":6069,"outputTokens":13,"latencyMs":636.632458999753},{"questionId":"q203","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":7978,"outputTokens":13,"latencyMs":663.1943330001086},{"questionId":"q203","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"timestamp,level,endpoint,statusCode,responseTime,userId,error","actual":"timestamp,level,endpoint,statusCode,responseTime,userId,error","isCorrect":true,"inputTokens":6095,"outputTokens":13,"latencyMs":835.4048330001533},{"questionId":"q204","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":7063,"outputTokens":1,"latencyMs":483.99374999990687},{"questionId":"q204","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":4975,"outputTokens":1,"latencyMs":437.3198329997249},{"questionId":"q204","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":6064,"outputTokens":1,"latencyMs":1106.938957999926},{"questionId":"q204","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":7973,"outputTokens":1,"latencyMs":584.410000000149},{"questionId":"q204","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"info","actual":"info","isCorrect":true,"inputTokens":6090,"outputTokens":1,"latencyMs":390.6225839997642},{"questionId":"q205","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1525,"outputTokens":1,"latencyMs":772.7192919999361},{"questionId":"q205","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1036,"outputTokens":1,"latencyMs":672.0561250001192},{"questionId":"q205","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":788,"outputTokens":1,"latencyMs":674.134958000388},{"questionId":"q205","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"YES","actual":"NO","isCorrect":false,"inputTokens":737,"outputTokens":1,"latencyMs":759.3277080003172},{"questionId":"q205","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"YES","actual":"NO","isCorrect":false,"inputTokens":1708,"outputTokens":1,"latencyMs":616.3170829997398},{"questionId":"q205","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"YES","actual":"YES","isCorrect":true,"inputTokens":1243,"outputTokens":1,"latencyMs":761.9437910001725},{"questionId":"q206","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1337,"outputTokens":1,"latencyMs":440.14870799984783},{"questionId":"q206","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":920,"outputTokens":1,"latencyMs":861.7682080003433},{"questionId":"q206","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":716,"outputTokens":1,"latencyMs":613.3492499999702},{"questionId":"q206","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":668,"outputTokens":1,"latencyMs":620.8054160000756},{"questionId":"q206","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":3144,"outputTokens":1,"latencyMs":2278.847166999709},{"questionId":"q206","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1095,"outputTokens":1,"latencyMs":534.0277920002118},{"questionId":"q207","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1710,"outputTokens":1,"latencyMs":522.3714589998126},{"questionId":"q207","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1149,"outputTokens":1,"latencyMs":523.8722079996951},{"questionId":"q207","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":857,"outputTokens":1,"latencyMs":1199.117249999661},{"questionId":"q207","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":803,"outputTokens":1,"latencyMs":552.5905840001069},{"questionId":"q207","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1921,"outputTokens":1,"latencyMs":349.7741249999963},{"questionId":"q207","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1387,"outputTokens":1,"latencyMs":648.5670839999802},{"questionId":"q208","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1517,"outputTokens":1,"latencyMs":471.8094999999739},{"questionId":"q208","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1031,"outputTokens":1,"latencyMs":569.4215420000255},{"questionId":"q208","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1261,"outputTokens":1,"latencyMs":645.5075420001522},{"questionId":"q208","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":734,"outputTokens":1,"latencyMs":608.0681250002235},{"questionId":"q208","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1699,"outputTokens":1,"latencyMs":655.7050419999287},{"questionId":"q208","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1236,"outputTokens":1,"latencyMs":626.2335000000894},{"questionId":"q209","format":"json-pretty","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1477,"outputTokens":1,"latencyMs":647.2873749998398},{"questionId":"q209","format":"json-compact","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"YES","isCorrect":false,"inputTokens":1000,"outputTokens":1,"latencyMs":616.4202079996467},{"questionId":"q209","format":"toon","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1227,"outputTokens":1,"latencyMs":674.6432080003433},{"questionId":"q209","format":"csv","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":595,"outputTokens":1,"latencyMs":797.3570420001633},{"questionId":"q209","format":"xml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1656,"outputTokens":1,"latencyMs":605.5889579998329},{"questionId":"q209","format":"yaml","model":"grok-4-1-fast-non-reasoning","expected":"NO","actual":"NO","isCorrect":true,"inputTokens":1202,"outputTokens":1,"latencyMs":780.1308329999447}] ================================================ FILE: benchmarks/results/retrieval-accuracy.md ================================================ Benchmarks test LLM comprehension across different input formats using 209 data retrieval questions on 4 models.
Show Dataset Catalog #### Dataset Catalog | Dataset | Rows | Structure | CSV Support | Eligibility | | ------- | ---- | --------- | ----------- | ----------- | | Uniform employee records | 100 | uniform | ✓ | 100% | | E-commerce orders with nested structures | 50 | nested | ✗ | 33% | | Time-series analytics data | 60 | uniform | ✓ | 100% | | Top 100 GitHub repositories | 100 | uniform | ✓ | 100% | | Semi-uniform event logs | 75 | semi-uniform | ✗ | 50% | | Deeply nested configuration | 11 | deep | ✗ | 0% | | Valid complete dataset (control) | 20 | uniform | ✓ | 100% | | Array truncated: 3 rows removed from end | 17 | uniform | ✓ | 100% | | Extra rows added beyond declared length | 23 | uniform | ✓ | 100% | | Inconsistent field count (missing salary in row 10) | 20 | uniform | ✓ | 100% | | Missing required fields (no email in multiple rows) | 20 | uniform | ✓ | 100% | **Structure classes:** - **uniform**: All objects have identical fields with primitive values - **semi-uniform**: Mix of uniform and non-uniform structures - **nested**: Objects with nested structures (nested objects or arrays) - **deep**: Highly nested with minimal tabular eligibility **CSV Support:** ✓ (supported), ✗ (not supported – would require lossy flattening) **Eligibility:** Percentage of arrays that qualify for TOON's tabular format (uniform objects with primitive values)
#### Efficiency Ranking (Accuracy per 1K Tokens) Each format ranked by efficiency (accuracy percentage per 1,000 tokens): ``` TOON ████████████████████ 27.7 acc%/1K tok │ 76.4% acc │ 2,759 tokens JSON compact █████████████████░░░ 23.7 acc%/1K tok │ 73.7% acc │ 3,104 tokens YAML ██████████████░░░░░░ 19.9 acc%/1K tok │ 74.5% acc │ 3,749 tokens JSON ████████████░░░░░░░░ 16.4 acc%/1K tok │ 75.0% acc │ 4,587 tokens XML ██████████░░░░░░░░░░ 13.8 acc%/1K tok │ 72.1% acc │ 5,221 tokens ``` *Efficiency score = (Accuracy % ÷ Tokens) × 1,000. Higher is better.* > [!TIP] > TOON achieves **76.4%** accuracy (vs JSON's 75.0%) while using **39.9% fewer tokens**. **Note on CSV:** Excluded from ranking as it only supports 109 of 209 questions (flat tabular data only). While CSV is highly token-efficient for simple tabular data, it cannot represent nested structures that other formats handle. #### Per-Model Accuracy Accuracy across 4 LLMs on 209 data retrieval questions: ``` claude-haiku-4-5-20251001 → TOON ████████████░░░░░░░░ 59.8% (125/209) JSON ███████████░░░░░░░░░ 57.4% (120/209) YAML ███████████░░░░░░░░░ 56.0% (117/209) XML ███████████░░░░░░░░░ 55.5% (116/209) JSON compact ███████████░░░░░░░░░ 55.0% (115/209) CSV ██████████░░░░░░░░░░ 50.5% (55/109) gemini-3-flash-preview XML ████████████████████ 98.1% (205/209) JSON ███████████████████░ 97.1% (203/209) YAML ███████████████████░ 97.1% (203/209) → TOON ███████████████████░ 96.7% (202/209) JSON compact ███████████████████░ 96.7% (202/209) CSV ███████████████████░ 96.3% (105/109) gpt-5-nano → TOON ██████████████████░░ 90.9% (190/209) JSON compact ██████████████████░░ 90.9% (190/209) JSON ██████████████████░░ 89.0% (186/209) CSV ██████████████████░░ 89.0% (97/109) YAML █████████████████░░░ 87.1% (182/209) XML ████████████████░░░░ 80.9% (169/209) grok-4-1-fast-non-reasoning → TOON ████████████░░░░░░░░ 58.4% (122/209) YAML ████████████░░░░░░░░ 57.9% (121/209) JSON ███████████░░░░░░░░░ 56.5% (118/209) XML ███████████░░░░░░░░░ 54.1% (113/209) JSON compact ██████████░░░░░░░░░░ 52.2% (109/209) CSV ██████████░░░░░░░░░░ 51.4% (56/109) ``` > [!TIP] > TOON achieves **76.4% accuracy** (vs JSON's 75.0%) while using **39.9% fewer tokens** on these datasets.
Performance by dataset, model, and question type #### Performance by Question Type | Question Type | TOON | JSON | YAML | JSON compact | XML | CSV | | ------------- | ---- | ---- | ---- | ---- | ---- | ---- | | Field Retrieval | 99.6% | 99.3% | 98.5% | 98.5% | 98.9% | 100.0% | | Aggregation | 61.9% | 61.9% | 59.9% | 58.3% | 54.4% | 50.9% | | Filtering | 56.8% | 53.1% | 56.3% | 55.2% | 51.6% | 50.9% | | Structure Awareness | 89.0% | 87.0% | 84.0% | 84.0% | 81.0% | 85.9% | | Structural Validation | 70.0% | 60.0% | 60.0% | 55.0% | 85.0% | 80.0% | #### Performance by Dataset ##### Uniform employee records | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 73.2% | 2,334 | 120/164 | | `toon` | 73.2% | 2,498 | 120/164 | | `json-compact` | 73.8% | 3,924 | 121/164 | | `yaml` | 73.8% | 4,959 | 121/164 | | `json-pretty` | 73.8% | 6,331 | 121/164 | | `xml` | 74.4% | 7,296 | 122/164 | ##### E-commerce orders with nested structures | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `toon` | 82.3% | 7,458 | 135/164 | | `json-compact` | 78.7% | 7,110 | 129/164 | | `yaml` | 79.9% | 8,755 | 131/164 | | `json-pretty` | 79.3% | 11,234 | 130/164 | | `xml` | 77.4% | 12,649 | 127/164 | ##### Time-series analytics data | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 75.0% | 1,411 | 90/120 | | `toon` | 78.3% | 1,553 | 94/120 | | `json-compact` | 74.2% | 2,354 | 89/120 | | `yaml` | 75.8% | 2,954 | 91/120 | | `json-pretty` | 75.0% | 3,681 | 90/120 | | `xml` | 72.5% | 4,389 | 87/120 | ##### Top 100 GitHub repositories | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 65.9% | 8,527 | 87/132 | | `toon` | 66.7% | 8,779 | 88/132 | | `yaml` | 65.2% | 13,141 | 86/132 | | `json-compact` | 59.8% | 11,464 | 79/132 | | `json-pretty` | 63.6% | 15,157 | 84/132 | | `xml` | 56.1% | 17,105 | 74/132 | ##### Semi-uniform event logs | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `json-compact` | 68.3% | 4,839 | 82/120 | | `toon` | 65.0% | 5,819 | 78/120 | | `json-pretty` | 69.2% | 6,817 | 83/120 | | `yaml` | 61.7% | 5,847 | 74/120 | | `xml` | 58.3% | 7,729 | 70/120 | ##### Deeply nested configuration | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `json-compact` | 90.5% | 568 | 105/116 | | `toon` | 94.8% | 655 | 110/116 | | `yaml` | 93.1% | 675 | 108/116 | | `json-pretty` | 92.2% | 924 | 107/116 | | `xml` | 91.4% | 1,013 | 106/116 | ##### Valid complete dataset (control) | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `toon` | 100.0% | 535 | 4/4 | | `json-compact` | 100.0% | 787 | 4/4 | | `yaml` | 100.0% | 992 | 4/4 | | `json-pretty` | 100.0% | 1,274 | 4/4 | | `xml` | 25.0% | 1,462 | 1/4 | | `csv` | 0.0% | 483 | 0/4 | ##### Array truncated: 3 rows removed from end | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 413 | 4/4 | | `xml` | 100.0% | 1,243 | 4/4 | | `toon` | 0.0% | 462 | 0/4 | | `json-pretty` | 0.0% | 1,085 | 0/4 | | `yaml` | 0.0% | 843 | 0/4 | | `json-compact` | 0.0% | 670 | 0/4 | ##### Extra rows added beyond declared length | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 550 | 4/4 | | `toon` | 75.0% | 605 | 3/4 | | `json-compact` | 75.0% | 901 | 3/4 | | `xml` | 100.0% | 1,678 | 4/4 | | `yaml` | 75.0% | 1,138 | 3/4 | | `json-pretty` | 50.0% | 1,460 | 2/4 | ##### Inconsistent field count (missing salary in row 10) | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 480 | 4/4 | | `json-compact` | 100.0% | 782 | 4/4 | | `yaml` | 100.0% | 985 | 4/4 | | `toon` | 100.0% | 1,008 | 4/4 | | `json-pretty` | 100.0% | 1,266 | 4/4 | | `xml` | 100.0% | 1,453 | 4/4 | ##### Missing required fields (no email in multiple rows) | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | | `csv` | 100.0% | 340 | 4/4 | | `xml` | 100.0% | 1,409 | 4/4 | | `toon` | 75.0% | 974 | 3/4 | | `json-pretty` | 50.0% | 1,225 | 2/4 | | `yaml` | 25.0% | 951 | 1/4 | | `json-compact` | 0.0% | 750 | 0/4 | #### Performance by Model ##### claude-haiku-4-5-20251001 | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `toon` | 59.8% | 125/209 | | `json-pretty` | 57.4% | 120/209 | | `yaml` | 56.0% | 117/209 | | `xml` | 55.5% | 116/209 | | `json-compact` | 55.0% | 115/209 | | `csv` | 50.5% | 55/109 | ##### gemini-3-flash-preview | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `xml` | 98.1% | 205/209 | | `json-pretty` | 97.1% | 203/209 | | `yaml` | 97.1% | 203/209 | | `toon` | 96.7% | 202/209 | | `json-compact` | 96.7% | 202/209 | | `csv` | 96.3% | 105/109 | ##### gpt-5-nano | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `toon` | 90.9% | 190/209 | | `json-compact` | 90.9% | 190/209 | | `json-pretty` | 89.0% | 186/209 | | `csv` | 89.0% | 97/109 | | `yaml` | 87.1% | 182/209 | | `xml` | 80.9% | 169/209 | ##### grok-4-1-fast-non-reasoning | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | | `toon` | 58.4% | 122/209 | | `yaml` | 57.9% | 121/209 | | `json-pretty` | 56.5% | 118/209 | | `xml` | 54.1% | 113/209 | | `json-compact` | 52.2% | 109/209 | | `csv` | 51.4% | 56/109 |
#### What's Being Measured This benchmark tests **LLM comprehension and data retrieval accuracy** across different input formats. Each LLM receives formatted data and must answer questions about it. This does **not** test the model's ability to generate TOON output – only to read and understand it. #### Datasets Tested Eleven datasets designed to test different structural patterns and validation capabilities: **Primary datasets:** 1. **Tabular** (100 employee records): Uniform objects with identical fields – optimal for TOON's tabular format. 2. **Nested** (50 e-commerce orders): Complex structures with nested customer objects and item arrays. 3. **Analytics** (60 days of metrics): Time-series data with dates and numeric values. 4. **GitHub** (100 repositories): Real-world data from top GitHub repos by stars. 5. **Event Logs** (75 logs): Semi-uniform data with ~50% flat logs and ~50% with nested error objects. 6. **Nested Config** (1 configuration): Deeply nested configuration with minimal tabular eligibility. **Structural validation datasets:** 7. **Control**: Valid complete dataset (baseline for validation) 8. **Truncated**: Array with 3 rows removed from end (tests `[N]` length detection) 9. **Extra rows**: Array with 3 additional rows beyond declared length 10. **Width mismatch**: Inconsistent field count (missing salary in row 10) 11. **Missing fields**: Systematic field omissions (no email in multiple rows) #### Question Types 209 questions are generated dynamically across five categories: - **Field retrieval (33%)**: Direct value lookups or values that can be read straight off a record (including booleans and simple counts such as array lengths) - Example: "What is Alice's salary?" → `75000` - Example: "How many items are in order ORD-0042?" → `3` - Example: "What is the customer name for order ORD-0042?" → `John Doe` - **Aggregation (30%)**: Dataset-level totals and averages plus single-condition filters (counts, sums, min/max comparisons) - Example: "How many employees work in Engineering?" → `17` - Example: "What is the total revenue across all orders?" → `45123.50` - Example: "How many employees have salary > 80000?" → `23` - **Filtering (23%)**: Multi-condition queries requiring compound logic (AND constraints across fields) - Example: "How many employees in Sales have salary > 80000?" → `5` - Example: "How many active employees have more than 10 years of experience?" → `8` - **Structure awareness (12%)**: Tests format-native structural affordances (TOON's `[N]` count and `{fields}`, CSV's header row) - Example: "How many employees are in the dataset?" → `100` - Example: "List the field names for employees" → `id, name, email, department, salary, yearsExperience, active` - Example: "What is the department of the last employee?" → `Sales` - **Structural validation (2%)**: Tests ability to detect incomplete, truncated, or corrupted data using structural metadata - Example: "Is this data complete and valid?" → `YES` (control dataset) or `NO` (corrupted datasets) - Tests TOON's `[N]` length validation and `{fields}` consistency checking - Demonstrates CSV's lack of structural validation capabilities #### Evaluation Process 1. **Format conversion**: Each dataset is converted to all 6 formats (TOON, JSON, YAML, JSON compact, XML, CSV). 2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer. 3. **Validate deterministically**: Answers are validated using type-aware comparison (e.g., `50000` = `$50,000`, `Engineering` = `engineering`, `2025-01-01` = `January 1, 2025`) without requiring an LLM judge. #### Models & Configuration - **Models tested**: `claude-haiku-4-5-20251001`, `gemini-3-flash-preview`, `gpt-5-nano`, `grok-4-1-fast-non-reasoning` - **Token counting**: Using `gpt-tokenizer` with `o200k_base` encoding (GPT-5 tokenizer) - **Temperature**: Not set (models use their defaults) - **Total evaluations**: 209 questions × 6 formats × 4 models = 5,016 LLM calls ================================================ FILE: benchmarks/results/token-efficiency.md ================================================ #### Mixed-Structure Track Datasets with nested or semi-uniform structures. CSV excluded as it cannot properly represent these structures. ``` 🛒 E-commerce orders with nested structures ┊ Tabular: 33% │ TOON █████████████░░░░░░░ 73,126 tokens ├─ vs JSON (−33.3%) 109,599 tokens ├─ vs JSON compact (+5.3%) 69,459 tokens ├─ vs YAML (−14.4%) 85,415 tokens └─ vs XML (−40.7%) 123,344 tokens 🧾 Semi-uniform event logs ┊ Tabular: 50% │ TOON █████████████████░░░ 154,084 tokens ├─ vs JSON (−15.0%) 181,201 tokens ├─ vs JSON compact (+19.9%) 128,529 tokens ├─ vs YAML (−0.8%) 155,397 tokens └─ vs XML (−25.2%) 205,859 tokens 🧩 Deeply nested configuration ┊ Tabular: 0% │ TOON ██████████████░░░░░░ 620 tokens ├─ vs JSON (−31.9%) 911 tokens ├─ vs JSON compact (+11.1%) 558 tokens ├─ vs YAML (−6.3%) 662 tokens └─ vs XML (−38.2%) 1,003 tokens ──────────────────────────────────── Total ──────────────────────────────────── TOON ████████████████░░░░ 227,830 tokens ├─ vs JSON (−21.9%) 291,711 tokens ├─ vs JSON compact (+14.7%) 198,546 tokens ├─ vs YAML (−5.7%) 241,474 tokens └─ vs XML (−31.0%) 330,206 tokens ``` #### Flat-Only Track Datasets with flat tabular structures where CSV is applicable. ``` 👥 Uniform employee records ┊ Tabular: 100% │ CSV ███████████████████░ 47,102 tokens TOON ████████████████████ 49,919 tokens (+6.0% vs CSV) ├─ vs JSON (−60.7%) 127,063 tokens ├─ vs JSON compact (−36.9%) 79,059 tokens ├─ vs YAML (−50.1%) 100,011 tokens └─ vs XML (−65.9%) 146,579 tokens 📈 Time-series analytics data ┊ Tabular: 100% │ CSV ██████████████████░░ 8,383 tokens TOON ████████████████████ 9,115 tokens (+8.7% vs CSV) ├─ vs JSON (−59.0%) 22,245 tokens ├─ vs JSON compact (−35.9%) 14,211 tokens ├─ vs YAML (−49.0%) 17,858 tokens └─ vs XML (−65.8%) 26,616 tokens ⭐ Top 100 GitHub repositories ┊ Tabular: 100% │ CSV ███████████████████░ 8,512 tokens TOON ████████████████████ 8,744 tokens (+2.7% vs CSV) ├─ vs JSON (−42.3%) 15,144 tokens ├─ vs JSON compact (−23.7%) 11,454 tokens ├─ vs YAML (−33.4%) 13,128 tokens └─ vs XML (−48.9%) 17,095 tokens ──────────────────────────────────── Total ──────────────────────────────────── CSV ███████████████████░ 63,997 tokens TOON ████████████████████ 67,778 tokens (+5.9% vs CSV) ├─ vs JSON (−58.8%) 164,452 tokens ├─ vs JSON compact (−35.3%) 104,724 tokens ├─ vs YAML (−48.3%) 130,997 tokens └─ vs XML (−64.4%) 190,290 tokens ```
Show detailed examples #### 📈 Time-series analytics data **Savings:** 13,130 tokens (59.0% reduction vs JSON) **JSON** (22,245 tokens): ```json { "metrics": [ { "date": "2025-01-01", "views": 6138, "clicks": 174, "conversions": 12, "revenue": 2712.49, "bounceRate": 0.35 }, { "date": "2025-01-02", "views": 4616, "clicks": 274, "conversions": 34, "revenue": 9156.29, "bounceRate": 0.56 }, { "date": "2025-01-03", "views": 4460, "clicks": 143, "conversions": 8, "revenue": 1317.98, "bounceRate": 0.59 }, { "date": "2025-01-04", "views": 4740, "clicks": 125, "conversions": 13, "revenue": 2934.77, "bounceRate": 0.37 }, { "date": "2025-01-05", "views": 6428, "clicks": 369, "conversions": 19, "revenue": 1317.24, "bounceRate": 0.3 } ] } ``` **TOON** (9,115 tokens): ``` metrics[5]{date,views,clicks,conversions,revenue,bounceRate}: 2025-01-01,6138,174,12,2712.49,0.35 2025-01-02,4616,274,34,9156.29,0.56 2025-01-03,4460,143,8,1317.98,0.59 2025-01-04,4740,125,13,2934.77,0.37 2025-01-05,6428,369,19,1317.24,0.3 ``` --- #### ⭐ Top 100 GitHub repositories **Savings:** 6,400 tokens (42.3% reduction vs JSON) **JSON** (15,144 tokens): ```json { "repositories": [ { "id": 28457823, "name": "freeCodeCamp", "repo": "freeCodeCamp/freeCodeCamp", "description": "freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…", "createdAt": "2014-12-24T17:49:19Z", "updatedAt": "2025-10-28T11:58:08Z", "pushedAt": "2025-10-28T10:17:16Z", "stars": 430886, "watchers": 8583, "forks": 42146, "defaultBranch": "main" }, { "id": 132750724, "name": "build-your-own-x", "repo": "codecrafters-io/build-your-own-x", "description": "Master programming by recreating your favorite technologies from scratch.", "createdAt": "2018-05-09T12:03:18Z", "updatedAt": "2025-10-28T12:37:11Z", "pushedAt": "2025-10-10T18:45:01Z", "stars": 430877, "watchers": 6332, "forks": 40453, "defaultBranch": "master" }, { "id": 21737465, "name": "awesome", "repo": "sindresorhus/awesome", "description": "😎 Awesome lists about all kinds of interesting topics", "createdAt": "2014-07-11T13:42:37Z", "updatedAt": "2025-10-28T12:40:21Z", "pushedAt": "2025-10-27T17:57:31Z", "stars": 410052, "watchers": 8017, "forks": 32029, "defaultBranch": "main" } ] } ``` **TOON** (8,744 tokens): ``` repositories[3]{id,name,repo,description,createdAt,updatedAt,pushedAt,stars,watchers,forks,defaultBranch}: 28457823,freeCodeCamp,freeCodeCamp/freeCodeCamp,"freeCodeCamp.org's open-source codebase and curriculum. Learn math, programming,…","2014-12-24T17:49:19Z","2025-10-28T11:58:08Z","2025-10-28T10:17:16Z",430886,8583,42146,main 132750724,build-your-own-x,codecrafters-io/build-your-own-x,Master programming by recreating your favorite technologies from scratch.,"2018-05-09T12:03:18Z","2025-10-28T12:37:11Z","2025-10-10T18:45:01Z",430877,6332,40453,master 21737465,awesome,sindresorhus/awesome,😎 Awesome lists about all kinds of interesting topics,"2014-07-11T13:42:37Z","2025-10-28T12:40:21Z","2025-10-27T17:57:31Z",410052,8017,32029,main ```
================================================ FILE: benchmarks/scripts/accuracy-benchmark.ts ================================================ import type { Question } from '../src/types.ts' import * as fsp from 'node:fs/promises' import * as path from 'node:path' import process from 'node:process' import * as prompts from '@clack/prompts' import PQueue from 'p-queue' import { BENCHMARKS_DIR, DEFAULT_CONCURRENCY, DRY_RUN, DRY_RUN_LIMITS, MODEL_RPM_LIMITS, ROOT_DIR } from '../src/constants.ts' import { ACCURACY_DATASETS } from '../src/datasets.ts' import { evaluateQuestion, models } from '../src/evaluate.ts' import { formatters, supportsCSV } from '../src/formatters.ts' import { generateQuestions } from '../src/questions/index.ts' import { calculateFormatResults, calculateTokenCounts, generateAccuracyReport } from '../src/report.ts' import { getAllModelResults, hasModelResults, saveModelResults } from '../src/storage.ts' import { ensureDir } from '../src/utils.ts' // Constants const PROGRESS_UPDATE_INTERVAL = 10 const RATE_LIMIT_INTERVAL_MS = 60_000 prompts.intro('Retrieval Accuracy Benchmark') /** * Generate evaluation tasks for a model */ function generateEvaluationTasks(questions: Question[]): { question: Question, formatName: string }[] { const tasks: { question: Question, formatName: string }[] = [] for (const question of questions) { for (const [formatName] of Object.entries(formatters)) { // Skip CSV for datasets that don't support it const dataset = ACCURACY_DATASETS.find(d => d.name === question.dataset) if (formatName === 'csv' && dataset && !supportsCSV(dataset)) continue tasks.push({ question, formatName }) } } return tasks } /** * Check which models already have saved results */ async function checkExistingResults(activeModels: typeof models) { const existingModelResults: Record = {} for (const model of activeModels) { const existingResult = await hasModelResults(model.modelId) if (existingResult) existingModelResults[model.modelId] = existingResult } return existingModelResults } /** * Create a progress updater function */ function createProgressUpdater(spinner: ReturnType, total: number) { let completed = 0 return () => { completed++ if (completed % PROGRESS_UPDATE_INTERVAL === 0 || completed === total) { const percent = ((completed / total) * 100).toFixed(1) spinner.message(`Progress: ${completed}/${total} (${percent}%)`) } } } /** * Create a rate-limited queue for model evaluation */ function createEvaluationQueue(modelId: string) { const rpmLimit = MODEL_RPM_LIMITS[modelId] return new PQueue({ concurrency: DEFAULT_CONCURRENCY, intervalCap: rpmLimit ?? Infinity, interval: rpmLimit ? RATE_LIMIT_INTERVAL_MS : 0, }) } // Prompt user to select which models to benchmark const modelChoices = models.map(({ modelId }) => ({ value: modelId, label: modelId, })) const selectedModels = await prompts.multiselect({ message: 'Select models to benchmark (Space to select, Enter to confirm)', options: modelChoices, required: true, }) if (prompts.isCancel(selectedModels)) { prompts.cancel('Benchmark cancelled') process.exit(0) } const activeModels = models.filter(m => selectedModels.includes(m.modelId)) prompts.log.info(`Selected ${activeModels.length} model(s): ${activeModels.map(m => m.modelId).join(', ')}`) // Check which models already have results const existingModelResults = await checkExistingResults(activeModels) if (Object.keys(existingModelResults).length > 0) { prompts.log.info(`Found existing results for ${Object.keys(existingModelResults).length} model(s)`) } if (DRY_RUN) { prompts.log.info('Limiting questions and models for dry run') } let questions = generateQuestions() // Apply dry run limits if enabled if (DRY_RUN && DRY_RUN_LIMITS.maxQuestions) { questions = questions.slice(0, DRY_RUN_LIMITS.maxQuestions) } prompts.log.info(`Evaluating ${questions.length} questions`) prompts.log.info(`Testing ${Object.keys(formatters).length} formats`) // Evaluate each model separately and save results incrementally for (const model of activeModels) { const modelId = model.modelId // Skip if results already exist if (existingModelResults[modelId]) { prompts.log.info(`Skipping ${modelId} (results already exist)`) continue } prompts.log.step(`Running benchmark for ${modelId}`) // Generate evaluation tasks for this model const tasks = generateEvaluationTasks(questions) const total = tasks.length const rpmLimit = MODEL_RPM_LIMITS[modelId] const queue = createEvaluationQueue(modelId) const evalSpinner = prompts.spinner() evalSpinner.start(`Running ${total} evaluations (concurrency: ${DEFAULT_CONCURRENCY}, RPM limit: ${rpmLimit ?? 'unlimited'})`) const updateProgress = createProgressUpdater(evalSpinner, total) // Queue all tasks const modelResultPromises = tasks.map(task => queue.add(async () => { // Format data on-demand const dataset = ACCURACY_DATASETS.find(d => d.name === task.question.dataset)! const formatter = formatters[task.formatName]! const formattedData = formatter(dataset.data) const result = await evaluateQuestion({ question: task.question, formatName: task.formatName, formattedData, model, }) // Progress update after task completes updateProgress() return result }), ) // Wait for all tasks to complete const modelResults = await Promise.all(modelResultPromises) evalSpinner.stop(`Evaluation complete for ${modelId}`) // Save results immediately for this model await saveModelResults(modelId, modelResults) prompts.log.success(`Saved results for ${modelId}`) } // Generate/regenerate markdown report from all available model results const reportSpinner = prompts.spinner() reportSpinner.start('Generating report from all model results') // Load all available model results (including any that were skipped) const allModelResults = await getAllModelResults() const allResults = Object.values(allModelResults).flat() if (allResults.length === 0) { prompts.log.warn('No results available to generate report') process.exit(0) } const tokenCounts = calculateTokenCounts(formatters) const formatResults = calculateFormatResults(allResults, tokenCounts) const accuracyReport = generateAccuracyReport(allResults, formatResults, tokenCounts) const resultsDir = path.join(BENCHMARKS_DIR, 'results') await ensureDir(resultsDir) const outputFilePath = path.join(resultsDir, 'retrieval-accuracy.md') await fsp.writeFile(outputFilePath, accuracyReport) reportSpinner.stop('Report generation complete!') prompts.log.info(`Report saved to: \`${path.relative(ROOT_DIR, outputFilePath)}\``) ================================================ FILE: benchmarks/scripts/fetch-github-repos.ts ================================================ import * as fsp from 'node:fs/promises' import * as path from 'node:path' import process from 'node:process' import * as prompts from '@clack/prompts' import { ofetch } from 'ofetch' import pMap from 'p-map' import { BENCHMARKS_DIR } from '../src/constants.ts' import { ensureDir } from '../src/utils.ts' prompts.intro('GitHub Repositories Fetcher') try { // Fetch top 100 repos from GitHub const repoList = await searchTop100Repos() const repos = await fetchRepoDetails(repoList) if (repos.length === 0) { prompts.log.error('No repositories fetched. Exiting.') process.exit(1) } // Sort by stars descending repos.sort((a, b) => b.stars - a.stars) await saveRepos(repos) prompts.log.success('Done!') } catch (error) { prompts.log.error(String(error)) process.exit(1) } async function searchTop100Repos(): Promise { const s = prompts.spinner() s.start('Fetching top 100 starred repositories') const response = await ofetch<{ items: { full_name: string }[] }>( 'https://api.github.com/search/repositories', { query: { q: 'stars:>1', sort: 'stars', order: 'desc', per_page: 100, }, headers: { 'Accept': 'application/vnd.github+json', 'X-GitHub-Api-Version': '2022-11-28', }, }, ) s.stop('Fetched top 100 repositories') return response.items.map(item => item.full_name) } async function fetchRepoDetails(repoList: string[]): Promise[]> { const s = prompts.spinner() s.start(`Fetching ${repoList.length} GitHub repositories`) const repos = await pMap( repoList, async (repoPath, index) => { s.message(`[${index + 1}/${repoList.length}] Fetching ${repoPath}`) const { repo } = await ofetch(`https://ungh.cc/repos/${repoPath}`) return repo }, { concurrency: 5 }, ) s.stop(`Successfully fetched ${repos.length}/${repoList.length} repositories`) return repos } async function saveRepos(repos: Record[]): Promise { const outputDir = path.join(BENCHMARKS_DIR, 'data') const outputFile = path.join(outputDir, 'github-repos.json') await ensureDir(outputDir) const jsonOutput = JSON.stringify(repos, undefined, 2) await fsp.writeFile(outputFile, `${jsonOutput}\n`, 'utf-8') const relativePath = path.relative(BENCHMARKS_DIR, outputFile) prompts.log.info(`Result saved to \`${relativePath}\``) } ================================================ FILE: benchmarks/scripts/token-efficiency-benchmark.ts ================================================ import type { Dataset } from '../src/types.ts' import * as fsp from 'node:fs/promises' import * as path from 'node:path' import * as prompts from '@clack/prompts' import { encode } from '../../packages/toon/src/index.ts' import { BENCHMARKS_DIR, FORMATTER_DISPLAY_NAMES, ROOT_DIR } from '../src/constants.ts' import { TOKEN_EFFICIENCY_DATASETS } from '../src/datasets.ts' import { formatters, supportsCSV } from '../src/formatters.ts' import { createProgressBar, ensureDir, tokenize } from '../src/utils.ts' interface FormatMetrics { name: string tokens: number savings: number savingsPercent: number } interface BenchmarkResult { dataset: Dataset formats: FormatMetrics[] } // Constants const DATASET_ICONS: Record = { 'tabular': '👥', 'nested': '🛒', 'analytics': '📈', 'github': '⭐', 'event-logs': '🧾', 'nested-config': '🧩', } const COMPARISON_FORMAT_ORDER = ['json-pretty', 'json-compact', 'yaml', 'xml'] as const const PROGRESS_BAR_WIDTH = 20 const TOKEN_PADDING = 7 const DEFAULT_DATASET_ICON = '📊' const DETAILED_EXAMPLE_DATASETS = ['github', 'analytics'] as const const GITHUB_REPO_LIMIT = 3 const GITHUB_DESC_LIMIT = 80 const ANALYTICS_METRICS_LIMIT = 5 prompts.intro('Token Efficiency Benchmark') /** * Format a comparison line showing savings vs TOON */ function formatComparisonLine(format: FormatMetrics, isLast: boolean = false): string { const label = FORMATTER_DISPLAY_NAMES[format.name] || format.name.toUpperCase() const signedPercent = format.savingsPercent >= 0 ? `−${format.savingsPercent.toFixed(1)}%` : `+${Math.abs(format.savingsPercent).toFixed(1)}%` const connector = isLast ? '└─' : '├─' const tokenStr = format.tokens.toLocaleString('en-US').padStart(TOKEN_PADDING) return `${connector} vs ${label.padEnd(13)} ${`(${signedPercent})`.padEnd(20)} ${tokenStr} tokens` } /** * Calculate total tokens and savings for a set of datasets */ function calculateTotalMetrics(datasets: BenchmarkResult[], formatNames: readonly string[]) { const totalToonTokens = datasets.reduce((sum, r) => { const toon = r.formats.find(f => f.name === 'toon')! return sum + toon.tokens }, 0) const totals = formatNames.map((formatName) => { const totalTokens = datasets.reduce((sum, r) => { const format = r.formats.find(f => f.name === formatName) return sum + (format?.tokens || 0) }, 0) const savings = totalTokens - totalToonTokens const savingsPercent = (savings / totalTokens) * 100 return { name: formatName, tokens: totalTokens, savingsPercent } }) return { totalToonTokens, totals } } /** * Generate total lines for a track */ function generateTotalLines( totalToonTokens: number, totals: { name: string, tokens: number, savingsPercent: number }[], baselineFormat?: { name: string, tokens: number }, ) { const separatorHalf = '─'.repeat(36) const lines = [`${separatorHalf} Total ${separatorHalf}`] if (baselineFormat) { // Flat-only track with CSV baseline const csvPercentage = Math.min(100, (baselineFormat.tokens / totalToonTokens) * 100) const csvBar = createProgressBar(csvPercentage, 100, PROGRESS_BAR_WIDTH) const csvStr = baselineFormat.tokens.toLocaleString('en-US').padStart(TOKEN_PADDING) lines.push(` CSV ${csvBar} ${csvStr} tokens`) const overheadPercent = ((totalToonTokens - baselineFormat.tokens) / baselineFormat.tokens) * 100 const toonBar = createProgressBar(100, 100, PROGRESS_BAR_WIDTH) const toonStr = totalToonTokens.toLocaleString('en-US').padStart(TOKEN_PADDING) lines.push(` TOON ${toonBar} ${toonStr} tokens (+${overheadPercent.toFixed(1)}% vs CSV)`) } else { // Mixed-structure track const totalPercentage = Math.min(100, (totalToonTokens / totals[0]!.tokens) * 100) const totalBar = createProgressBar(totalPercentage, 100, PROGRESS_BAR_WIDTH) const toonStr = totalToonTokens.toLocaleString('en-US').padStart(TOKEN_PADDING) lines.push(` TOON ${totalBar} ${toonStr} tokens`) } // Add comparison lines for (let i = 0; i < totals.length; i++) { const format = totals[i]! const isLast = i === totals.length - 1 lines.push(` ${formatComparisonLine({ name: format.name, tokens: format.tokens, savings: 0, // Not used in this context savingsPercent: format.savingsPercent, }, isLast)}`) } return lines.join('\n') } /** * Generate bar chart for a dataset */ function generateDatasetChart(result: BenchmarkResult): string { const { dataset, formats } = result const toon = formats.find(f => f.name === 'toon')! const jsonPretty = formats.find(f => f.name === 'json-pretty')! const emoji = DATASET_ICONS[dataset.name] || DEFAULT_DATASET_ICON const eligibility = dataset.metadata.tabularEligibility const name = dataset.description const percentage = Math.min(100, 100 - jsonPretty.savingsPercent) const bar = createProgressBar(percentage, 100, PROGRESS_BAR_WIDTH) const toonStr = toon.tokens.toLocaleString('en-US') const line1 = `${emoji} ${name} ┊ Tabular: ${eligibility}%` const line2 = ` │` const line3 = ` TOON ${bar} ${toonStr.padStart(TOKEN_PADDING)} tokens` const comparisonLines = COMPARISON_FORMAT_ORDER.map((formatName, index, array) => { const format = formats.find(f => f.name === formatName) if (!format) return undefined return ` ${formatComparisonLine(format, index === array.length - 1)}` }).filter(Boolean) return [line1, line2, line3, ...comparisonLines].join('\n') } const results: BenchmarkResult[] = [] // Calculate token counts for all datasets for (const dataset of TOKEN_EFFICIENCY_DATASETS) { const formatMetrics: FormatMetrics[] = [] const tokensByFormat: Record = {} // Calculate tokens for each format for (const [formatName, formatter] of Object.entries(formatters)) { // Skip CSV for datasets that don't support it if (formatName === 'csv' && !supportsCSV(dataset)) continue const formattedData = formatter(dataset.data) const tokens = tokenize(formattedData) tokensByFormat[formatName] = tokens } // Calculate savings vs TOON const toonTokens = tokensByFormat.toon! for (const [formatName, tokens] of Object.entries(tokensByFormat)) { const savings = tokens - toonTokens formatMetrics.push({ name: formatName, tokens, savings, savingsPercent: formatName === 'toon' ? 0 : (savings / tokens) * 100, }) } results.push({ dataset, formats: formatMetrics, }) } // Separate datasets by CSV support const mixedStructureDatasets = results.filter(r => !supportsCSV(r.dataset)) const flatOnlyDatasets = results.filter(r => supportsCSV(r.dataset)) // Mixed-Structure Track (no CSV) const mixedCharts = mixedStructureDatasets .map(result => generateDatasetChart(result)) .join('\n\n') // Flat-Only Track (with CSV) const flatCharts = flatOnlyDatasets .map((result) => { const csv = result.formats.find(f => f.name === 'csv') const toon = result.formats.find(f => f.name === 'toon')! if (!csv) return generateDatasetChart(result) // Special handling to show CSV first with TOON overhead const { dataset } = result const emoji = DATASET_ICONS[dataset.name] || DEFAULT_DATASET_ICON const eligibility = dataset.metadata.tabularEligibility const name = dataset.description // CSV line const csvPercentage = Math.min(100, (csv.tokens / toon.tokens) * 100) const csvBar = createProgressBar(csvPercentage, 100, PROGRESS_BAR_WIDTH) const csvStr = csv.tokens.toLocaleString('en-US') const line1 = `${emoji} ${name} ┊ Tabular: ${eligibility}%` const line2 = ` │` const line3 = ` CSV ${csvBar} ${csvStr.padStart(TOKEN_PADDING)} tokens` const toonOverhead = toon.tokens - csv.tokens const toonOverheadPercent = (toonOverhead / csv.tokens) * 100 const toonBar = createProgressBar(100, 100, PROGRESS_BAR_WIDTH) const toonStr = toon.tokens.toLocaleString('en-US') const toonVsCSV = toonOverheadPercent >= 0 ? `(+${toonOverheadPercent.toFixed(1)}% vs CSV)` : `(${toonOverheadPercent.toFixed(1)}% vs CSV)` const toonLine = ` TOON ${toonBar} ${toonStr.padStart(TOKEN_PADDING)} tokens ${toonVsCSV}` // Other format comparisons (vs TOON) const comparisonLines = COMPARISON_FORMAT_ORDER.map((formatName, index, array) => { const format = result.formats.find(f => f.name === formatName) if (!format) return undefined return ` ${formatComparisonLine(format, index === array.length - 1)}` }).filter(Boolean) return [line1, line2, line3, toonLine, ...comparisonLines].join('\n') }) .join('\n\n') // Calculate totals for mixed structure const { totalToonTokens: totalToonTokensMixed, totals: mixedTotals } = calculateTotalMetrics(mixedStructureDatasets, COMPARISON_FORMAT_ORDER) const mixedTotalLines = generateTotalLines(totalToonTokensMixed, mixedTotals) // Calculate totals for flat-only const { totalToonTokens: totalToonTokensFlat, totals: flatTotals } = calculateTotalMetrics(flatOnlyDatasets, COMPARISON_FORMAT_ORDER) const totalCSVTokensFlat = flatOnlyDatasets.reduce((sum, r) => { const csv = r.formats.find(f => f.name === 'csv') return sum + (csv?.tokens || 0) }, 0) const flatTotalLines = generateTotalLines(totalToonTokensFlat, flatTotals, { name: 'csv', tokens: totalCSVTokensFlat }) const barChartSection = ` #### Mixed-Structure Track Datasets with nested or semi-uniform structures. CSV excluded as it cannot properly represent these structures. \`\`\` ${mixedCharts} ${mixedTotalLines} \`\`\` #### Flat-Only Track Datasets with flat tabular structures where CSV is applicable. \`\`\` ${flatCharts} ${flatTotalLines} \`\`\` `.trim() // Generate detailed examples (optional: show a few examples) const detailedExamples = results .filter(r => DETAILED_EXAMPLE_DATASETS.includes(r.dataset.name as any)) .map((result, i, filtered) => { let displayData = result.dataset.data // Truncate for display if (result.dataset.name === 'github') { displayData = { repositories: displayData.repositories.slice(0, GITHUB_REPO_LIMIT).map((repo: Record) => ({ ...repo, description: repo.description?.slice(0, GITHUB_DESC_LIMIT) + (repo.description?.length > GITHUB_DESC_LIMIT ? '…' : ''), })), } } else if (result.dataset.name === 'analytics') { displayData = { metrics: displayData.metrics.slice(0, ANALYTICS_METRICS_LIMIT) } } const emoji = DATASET_ICONS[result.dataset.name] || DEFAULT_DATASET_ICON const json = result.formats.find(f => f.name === 'json-pretty')! const toon = result.formats.find(f => f.name === 'toon')! const separator = i < filtered.length - 1 ? '---' : '' return ` #### ${emoji} ${result.dataset.description} **Savings:** ${json.savings.toLocaleString('en-US')} tokens (${json.savingsPercent.toFixed(1)}% reduction vs JSON) **JSON** (${json.tokens.toLocaleString('en-US')} tokens): \`\`\`json ${JSON.stringify(displayData, undefined, 2)} \`\`\` **TOON** (${toon.tokens.toLocaleString('en-US')} tokens): \`\`\` ${encode(displayData)} \`\`\` ${separator} `.trim() }) .join('\n\n') const markdown = ` ${barChartSection}
Show detailed examples ${detailedExamples}
`.trimStart() prompts.log.message(barChartSection) const resultsDir = path.join(BENCHMARKS_DIR, 'results') await ensureDir(resultsDir) const outputFilePath = path.join(resultsDir, 'token-efficiency.md') await fsp.writeFile(outputFilePath, markdown, 'utf-8') prompts.log.success(`Report saved to \`${path.relative(ROOT_DIR, outputFilePath)}\``) ================================================ FILE: benchmarks/src/constants.ts ================================================ import process from 'node:process' import * as url from 'node:url' export const ROOT_DIR: string = url.fileURLToPath(new URL('../../', import.meta.url)) export const BENCHMARKS_DIR: string = url.fileURLToPath(new URL('../', import.meta.url)) /** * Default concurrency for parallel evaluations to prevent bursting */ export const DEFAULT_CONCURRENCY = 10 /** * Enable dry run mode for quick testing with limited AI requests * * @remarks * Set via environment variable: `DRY_RUN=true`. */ export const DRY_RUN: boolean = process.env.DRY_RUN === 'true' /** * Limits applied during dry run mode */ export const DRY_RUN_LIMITS = { /** Maximum number of questions to evaluate */ maxQuestions: 10, } /** * Model-specific RPM (requests per minute) limits to handle API quotas * * @remarks * Set `undefined` for models without specific limits. */ /// keep-sorted export const MODEL_RPM_LIMITS: Record = { 'claude-haiku-4-5-20251001': 50, 'gemini-3-flash-preview': 25, 'gpt-5-nano': 50, 'grok-4-1-fast-non-reasoning': 25, } /** * Display names for data format types */ export const FORMATTER_DISPLAY_NAMES: Record = { 'json-pretty': 'JSON', 'json-compact': 'JSON compact', 'toon': 'TOON', 'csv': 'CSV', 'xml': 'XML', 'yaml': 'YAML', } as const /** * Question type identifiers */ export const QUESTION_TYPES = [ 'field-retrieval', 'retrieval', 'aggregation', 'filtering', 'structure-awareness', 'structural-validation', ] as const /** * Display names for question types */ export const QUESTION_TYPE_LABELS = { 'field-retrieval': 'Field Retrieval', 'retrieval': 'Retrieval', 'aggregation': 'Aggregation', 'filtering': 'Filtering', 'structure-awareness': 'Structure Awareness', 'structural-validation': 'Structural Validation', } as const /** * Dataset identifiers */ export const DATASET_NAMES = [ 'tabular', 'nested', 'analytics', 'github', 'event-logs', 'nested-config', 'large-uniform', 'structural-validation-control', 'structural-validation-truncated', 'structural-validation-extra-rows', 'structural-validation-width-mismatch', 'structural-validation-missing-fields', ] as const /** * Structure class identifiers */ export const STRUCTURE_CLASSES = [ 'uniform', 'semi-uniform', 'nested', 'deep', ] as const /** * Threshold values for filtering and aggregation questions */ export const QUESTION_THRESHOLDS = { tabular: { salaryRanges: [60000, 80000, 100000], experienceYears: [5, 10, 15, 20], departmentSalaryThreshold: 80000, departmentExperienceThreshold: 10, }, nested: { highValueOrders: [200, 400, 600], statusValueThreshold: 300, itemCountThreshold: 3, totalThresholdsForItems: [300, 500], }, analytics: { views: [6000], conversions: [20], viewsForFiltering: [6000, 7000], conversionsForFiltering: 15, revenueThresholds: [1000, 1500, 2000], viewsThresholdForRevenue: 6000, clicksForFiltering: [250, 400], conversionsForClickFiltering: 15, revenueForBounceRate: [1000, 1500], bounceRateThreshold: 0.5, }, github: { stars: [100000, 150000, 200000], forks: [20000, 35000], watchers: [8000], starForkCombinations: [ { stars: 75000, forks: 15000 }, { stars: 100000, forks: 20000 }, { stars: 150000, forks: 30000 }, { stars: 200000, forks: 45000 }, ], starWatcherCombinations: [ { stars: 100000, watchers: 7000 }, { stars: 150000, watchers: 9000 }, ], }, } as const /** * Question generation configuration */ export const QUESTION_LIMITS = { tabular: { fieldRetrieval: 12, aggregationDepartments: 3, filteringMultiConditionDepartments: 5, filteringExperience: 3, filteringDepartmentExp: 3, filteringDepartmentActive: 2, }, nested: { fieldRetrievalOrders: 8, fieldRetrievalCustomers: 8, aggregationStatuses: 3, filteringStatusAndValue: 4, filteringStatusAndItems: 3, }, analytics: { fieldRetrievalDates: 9, }, github: { fieldRetrievalRepos: 11, aggregationBranches: 2, filteringStarsAndForks: 3, }, eventLogs: { fieldRetrieval: 10, aggregationEndpoints: 2, filteringLevelAndStatus: 3, filteringEndpointAndStatus: 3, filteringEndpointRetryable: 2, }, nestedConfig: { fieldRetrieval: 10, filteringComplex: 5, }, } as const ================================================ FILE: benchmarks/src/datasets.ts ================================================ import type { Dataset } from './types.ts' import { faker } from '@faker-js/faker' import githubRepos from '../data/github-repos.json' with { type: 'json' } // Seed for reproducibility faker.seed(12345) /** * Employee record structure for tabular dataset */ export interface Employee { id: number name: string email: string department: string salary: number yearsExperience: number active: boolean } /** * E-commerce order structure for nested dataset */ export interface Order { orderId: string customer: { id: number name: string email: string phone: string } items: { sku: string name: string quantity: number price: number }[] subtotal: number tax: number total: number status: string orderDate?: string createdAt?: string } /** * Analytics metric structure for time-series dataset */ export interface AnalyticsMetric { date: string views: number clicks: number conversions: number revenue: number bounceRate: number } /** * GitHub repository structure for real-world dataset */ export interface Repository { id: number name: string repo: string description: string stars: number watchers: number forks: number defaultBranch: string createdAt: string updatedAt: string pushedAt: string } /** * Event log structure for semi-uniform dataset */ export interface EventLog { timestamp: string level: 'info' | 'warn' | 'error' endpoint: string statusCode: number responseTime: number userId: number error?: { message: string stack: string retryable: boolean } } /** * Nested configuration structure for deeply nested dataset */ export interface NestedConfig { environment: string version: string database: { host: string port: number name: string pool: { min: number max: number idleTimeout: number } replicas: { host: string port: number priority: number }[] } features: Record }[] }> authentication: { providers: { name: string clientId: string scopes: string[] config: Record }[] session: { secret: string duration: number refreshThreshold: number } } permissions: { roles: Record groups: Record } } /** * Product structure for large uniform arrays */ export interface Product { sku: string name: string category: string price: number qty: number lastUpdated: string } /** * Internal types for structural validation pattern generation */ type StructuralValidationType = 'truncated' | 'extra-rows' | 'width-mismatch' | 'missing-fields' interface StructuralValidationFixture { type: StructuralValidationType description: string data: Record isValid: boolean } /** * Generate analytics time-series data */ export function generateAnalyticsData(days: number, startDate = '2025-01-01'): { metrics: AnalyticsMetric[] } { const date = new Date(startDate) return { metrics: Array.from({ length: days }, (_, i) => { const currentDate = new Date(date) currentDate.setDate(currentDate.getDate() + i) // Simulate realistic web traffic with some variation const baseViews = 5000 const weekendMultiplier = currentDate.getDay() === 0 || currentDate.getDay() === 6 ? 0.7 : 1.0 const views = Math.round(baseViews * weekendMultiplier + faker.number.int({ min: -1000, max: 3000 })) const clicks = Math.round(views * faker.number.float({ min: 0.02, max: 0.08 })) const conversions = Math.round(clicks * faker.number.float({ min: 0.05, max: 0.15 })) const avgOrderValue = faker.number.float({ min: 49.99, max: 299.99 }) const revenue = Number((conversions * avgOrderValue).toFixed(2)) return { date: currentDate.toISOString().split('T')[0]!, views, clicks, conversions, revenue, bounceRate: faker.number.float({ min: 0.3, max: 0.7, fractionDigits: 2 }), } }), } } /** * Generate employee data (uniform tabular structure) */ const departments = ['Engineering', 'Sales', 'Marketing', 'HR', 'Operations', 'Finance'] as const function generateEmployees(count: number): { employees: Employee[] } { return { employees: Array.from({ length: count }, (_, i): Employee => { const yearsExp = faker.number.int({ min: 1, max: 25 }) return { id: i + 1, name: faker.person.fullName(), email: faker.internet.email().toLowerCase(), department: departments[i % departments.length]!, salary: faker.number.int({ min: 45000, max: 150000 }), yearsExperience: yearsExp, active: faker.datatype.boolean(0.8), // 80% active } }), } } /** * Tabular dataset: Uniform employee records * * @remarks * Tests TOON's tabular array format. */ const tabularDataset: Dataset = { name: 'tabular', description: 'Uniform employee records', data: generateEmployees(100), metadata: { supportsCSV: true, structureClass: 'uniform', tabularEligibility: 100, // All arrays contain uniform objects with primitive values only }, } /** * Generate e-commerce orders (nested structure) */ const PRODUCT_NAMES = ['Wireless Mouse', 'USB Cable', 'Laptop Stand', 'Keyboard', 'Webcam', 'Headphones', 'Monitor', 'Desk Lamp'] as const const ORDER_STATUSES = ['pending', 'processing', 'shipped', 'delivered', 'cancelled'] as const function generateOrders(count: number): { orders: Order[] } { return { orders: Array.from({ length: count }, (_, i) => { const customerId = (i % 20) + 1 // Rotate through 20 customers const itemCount = faker.number.int({ min: 1, max: 4 }) // 1-4 items per order const items = Array.from({ length: itemCount }, (_, j) => { const price = faker.number.float({ min: 9.99, max: 199.99, fractionDigits: 2, }) const quantity = faker.number.int({ min: 1, max: 5 }) return { sku: `SKU-${faker.string.alphanumeric({ length: 6 }).toUpperCase()}`, name: PRODUCT_NAMES[j % PRODUCT_NAMES.length]!, quantity, price, } }) const subtotal = Number(items.reduce((sum, item) => sum + (item.price * item.quantity), 0).toFixed(2)) const tax = Number((subtotal * 0.08).toFixed(2)) // 8% tax rate const total = Number((subtotal + tax).toFixed(2)) return { orderId: `ORD-${String(i + 1).padStart(4, '0')}`, customer: { id: customerId, name: faker.person.fullName(), email: faker.internet.email().toLowerCase(), phone: faker.phone.number(), }, items, subtotal, tax, total, status: ORDER_STATUSES[i % ORDER_STATUSES.length]!, orderDate: faker.date.recent({ days: 90 }).toISOString().split('T')[0], } }), } } /** * Nested dataset: E-commerce orders with nested structures * * @remarks * Tests TOON's handling of complex nested objects. */ const nestedDataset: Dataset = { name: 'nested', description: 'E-commerce orders with nested structures', data: generateOrders(50), metadata: { supportsCSV: false, structureClass: 'nested', tabularEligibility: 33, // Top-level orders array has nested objects (not tabular), but nested items arrays are tabular }, } /** * Analytics dataset: Time-series metrics * * @remarks * Tests TOON's handling of numeric data and date fields. */ const analyticsDataset: Dataset = { name: 'analytics', description: 'Time-series analytics data', data: generateAnalyticsData(60), metadata: { supportsCSV: true, structureClass: 'uniform', tabularEligibility: 100, // Uniform time-series records with consistent primitive fields }, } /** * Real-world dataset: Top 100 starred GitHub repositories * * @remarks * Tests TOON's tabular format with real data. */ const githubDataset: Dataset = { name: 'github', description: 'Top 100 GitHub repositories', data: { repositories: githubRepos, }, metadata: { supportsCSV: true, structureClass: 'uniform', tabularEligibility: 100, // Repository array contains uniform objects with primitive values }, } /** * Generate a single e-commerce order with nested structure * * @remarks * Used for token efficiency benchmarks. */ export function generateOrderData(): Order { return { orderId: faker.string.alphanumeric({ length: 12, casing: 'upper' }), customer: { id: faker.number.int({ min: 1000, max: 9999 }), name: faker.person.fullName(), email: faker.internet.email(), phone: faker.phone.number(), }, items: Array.from({ length: faker.number.int({ min: 2, max: 5 }) }, () => ({ sku: faker.string.alphanumeric({ length: 8, casing: 'upper' }), name: faker.commerce.productName(), quantity: faker.number.int({ min: 1, max: 5 }), price: Number(faker.commerce.price({ min: 10, max: 200 })), })), subtotal: Number(faker.commerce.price({ min: 100, max: 500 })), tax: Number(faker.commerce.price({ min: 10, max: 50 })), total: Number(faker.commerce.price({ min: 110, max: 550 })), status: faker.helpers.arrayElement(['pending', 'processing', 'shipped', 'delivered']), createdAt: faker.date.recent({ days: 7 }).toISOString(), } } /** * Generate event logs (semi-uniform structure) * * @remarks * Approximately 50% of logs include nested error objects, 50% are flat. * This creates ~45% tabular eligibility. */ export function generateEventLogs(count: number): { logs: EventLog[] } { const endpoints = ['/api/users', '/api/orders', '/api/products', '/api/auth', '/api/payments'] const levels = ['info', 'warn', 'error'] as const return { logs: Array.from({ length: count }, () => { const level = faker.helpers.arrayElement(levels) const hasError = level === 'error' || (level === 'warn' && faker.datatype.boolean(0.3)) const log: EventLog = { timestamp: faker.date.recent({ days: 7 }).toISOString(), level, endpoint: faker.helpers.arrayElement(endpoints), statusCode: hasError ? faker.number.int({ min: 400, max: 599 }) : faker.number.int({ min: 200, max: 299 }), responseTime: faker.number.int({ min: 10, max: 5000 }), userId: faker.number.int({ min: 1000, max: 9999 }), } if (hasError) { log.error = { message: faker.helpers.arrayElement([ 'Database connection timeout', 'Invalid authentication token', 'Resource not found', 'Internal server error', 'Rate limit exceeded', ]), stack: `Error: ${faker.lorem.sentence()}\n at ${faker.lorem.word()}\n at ${faker.lorem.word()}`, retryable: faker.datatype.boolean(0.6), } } return log }), } } /** * Generate deeply nested configuration * * @remarks * Creates a complex nested structure with minimal tabular eligibility (~0%). */ export function generateNestedConfig(): NestedConfig { return { environment: faker.helpers.arrayElement(['production', 'staging', 'development']), version: faker.system.semver(), database: { host: faker.internet.domainName(), port: 5432, name: faker.database.type(), pool: { min: 2, max: faker.number.int({ min: 10, max: 50 }), idleTimeout: 30000, }, replicas: Array.from({ length: 3 }, (_, i) => ({ host: `replica-${i + 1}.${faker.internet.domainName()}`, port: 5432, priority: i + 1, })), }, features: { darkMode: { enabled: faker.datatype.boolean(), rollout: faker.number.int({ min: 0, max: 100 }), variants: [ { name: 'default', weight: 70, config: { theme: 'dark', animations: true }, }, { name: 'minimal', weight: 30, config: { theme: 'dark', animations: false }, }, ], }, analytics: { enabled: faker.datatype.boolean(), rollout: faker.number.int({ min: 0, max: 100 }), variants: [ { name: 'full', weight: 100, config: { tracking: 'all', sampling: 1.0 }, }, ], }, }, authentication: { providers: [ { name: 'oauth2', clientId: faker.string.uuid(), scopes: ['read', 'write', 'admin'], config: { authUrl: faker.internet.url(), tokenUrl: faker.internet.url(), }, }, { name: 'saml', clientId: faker.string.uuid(), scopes: ['read'], config: { entryPoint: faker.internet.url(), cert: faker.string.alphanumeric({ length: 64 }), }, }, ], session: { secret: faker.string.alphanumeric({ length: 32 }), duration: 86400, refreshThreshold: 3600, }, }, permissions: { roles: { admin: { permissions: ['read', 'write', 'delete', 'manage_users', 'manage_roles'], inherits: [], }, editor: { permissions: ['read', 'write'], inherits: ['viewer'], }, viewer: { permissions: ['read'], inherits: [], }, }, groups: { engineering: { members: Array.from({ length: 5 }, () => faker.internet.email()), roles: ['admin', 'editor'], }, support: { members: Array.from({ length: 3 }, () => faker.internet.email()), roles: ['viewer'], }, }, }, } } /** * Generate large uniform product array (5000+ rows) * * @remarks * Tests TOON's token efficiency and structural reliability at scale. */ export function generateProducts(count: number): { products: Product[] } { const categories = ['Electronics', 'Clothing', 'Home & Garden', 'Sports', 'Books', 'Toys'] as const return { products: Array.from({ length: count }, (_, i): Product => ({ sku: `SKU-${String(i + 1).padStart(6, '0')}`, name: faker.commerce.productName(), category: categories[i % categories.length]!, price: Number(faker.commerce.price({ min: 5, max: 500 })), qty: faker.number.int({ min: 0, max: 1000 }), lastUpdated: faker.date.recent({ days: 30 }).toISOString().split('T')[0]!, })), } } /** * Generate structural validation fixtures from employee data * * @remarks * Creates deliberately corrupted datasets to test TOON's structural validation * capabilities via [N] length declarations and {fields} headers. * Internal function used to generate structural validation datasets. */ function generateStructuralValidationFixtures(): StructuralValidationFixture[] { const baseData = generateEmployees(20) return [ // Valid baseline { type: 'truncated' as const, description: 'Valid complete dataset (control)', data: { employees: baseData.employees }, isValid: true, }, // Truncated array (missing last 3 rows) { type: 'truncated' as const, description: 'Array truncated: 3 rows removed from end', data: { employees: baseData.employees.slice(0, -3) }, isValid: false, // [N] won't match actual row count in TOON }, // Extra rows (3 more than original) { type: 'extra-rows' as const, description: 'Extra rows added beyond declared length', data: { employees: [ ...baseData.employees, ...generateEmployees(3).employees, ], }, isValid: false, // [N] won't match actual row count in TOON }, // Width mismatch (inconsistent field count) { type: 'width-mismatch' as const, description: 'Inconsistent field count (missing salary in row 10)', data: { employees: baseData.employees.map((emp, i) => { if (i === 9) { // Row 10, missing salary field const { salary, ...rest } = emp return rest } return emp }), }, isValid: false, // Not all objects have same fields (tabular requirement) }, // Missing required fields { type: 'missing-fields' as const, description: 'Missing required fields (no email in multiple rows)', data: { employees: baseData.employees.map((emp, i) => { if (i % 5 === 0) { // Every 5th row, missing email const { email, ...rest } = emp return rest } return emp }), }, isValid: false, // Not all objects have same fields (tabular requirement) }, ] } /** * Event logs dataset: Semi-uniform structure * * @remarks * Tests TOON with semi-uniform data (~50% flat, ~50% with nested errors). */ const eventLogsDataset: Dataset = { name: 'event-logs', description: 'Semi-uniform event logs', data: generateEventLogs(75), metadata: { supportsCSV: false, structureClass: 'semi-uniform', tabularEligibility: 50, // Top-level logs array is tabular, but ~50% have nested optional error objects }, } /** * Nested config dataset: Deeply nested structure * * @remarks * Tests TOON's worst-case scenario with deeply nested configuration. */ const nestedConfigDataset: Dataset = { name: 'nested-config', description: 'Deeply nested configuration', data: generateNestedConfig(), metadata: { supportsCSV: false, structureClass: 'deep', tabularEligibility: 0, // Deeply nested configuration with no tabular arrays }, } /** * Structural validation datasets: Tests ability to detect incomplete, truncated, or corrupted data * * @remarks * These datasets test TOON's structural validation advantages via [N] length declarations * and {fields} headers. CSV is included to demonstrate its lack of structural metadata. */ const structuralValidationDatasets: Dataset[] = generateStructuralValidationFixtures().map((fixture, index) => { const datasetNames = [ 'structural-validation-control', 'structural-validation-truncated', 'structural-validation-extra-rows', 'structural-validation-width-mismatch', 'structural-validation-missing-fields', ] as const return { name: datasetNames[index]!, description: fixture.description, data: fixture.data, metadata: { supportsCSV: true, // Include CSV to show it can't validate structure structureClass: 'uniform', tabularEligibility: 100, }, } }) /** * Datasets for accuracy benchmarks (smaller sizes for faster evaluation) */ export const ACCURACY_DATASETS: Dataset[] = [ tabularDataset, // 100 employees nestedDataset, // 50 orders analyticsDataset, // 60 days githubDataset, // 100 repos eventLogsDataset, // 75 logs nestedConfigDataset, // 1 config ...structuralValidationDatasets, // 5 validation fixtures ] /** * Datasets for token efficiency benchmarks (larger sizes to amplify token differences) */ export const TOKEN_EFFICIENCY_DATASETS: Dataset[] = [ // Tabular: 2000 employees { name: 'tabular', description: 'Uniform employee records', data: generateEmployees(2000), metadata: { supportsCSV: true, structureClass: 'uniform', tabularEligibility: 100, // All arrays contain uniform objects with primitive values only }, }, // Nested: 500 orders { name: 'nested', description: 'E-commerce orders with nested structures', data: generateOrders(500), metadata: { supportsCSV: false, structureClass: 'nested', tabularEligibility: 33, // Top-level orders array has nested objects (not tabular), but nested items arrays are tabular }, }, // Analytics: 365 days { name: 'analytics', description: 'Time-series analytics data', data: generateAnalyticsData(365), metadata: { supportsCSV: true, structureClass: 'uniform', tabularEligibility: 100, // Uniform time-series records with consistent primitive fields }, }, // GitHub: 100 repos (same as accuracy) githubDataset, // Event logs: 2000 logs { name: 'event-logs', description: 'Semi-uniform event logs', data: generateEventLogs(2000), metadata: { supportsCSV: false, structureClass: 'semi-uniform', tabularEligibility: 50, // Top-level logs array is tabular, but ~50% have nested optional error objects }, }, // Nested config: 1 config (same as accuracy) nestedConfigDataset, ] ================================================ FILE: benchmarks/src/evaluate.ts ================================================ import type { LanguageModelV3 } from '@ai-sdk/provider' import type { EvaluationResult, Question } from './types.ts' import { anthropic } from '@ai-sdk/anthropic' import { google } from '@ai-sdk/google' import { openai } from '@ai-sdk/openai' import { xai } from '@ai-sdk/xai' import { generateText } from 'ai' import { compareAnswers } from './normalize.ts' /** * Models used for evaluation */ export const models: LanguageModelV3[] = [ anthropic('claude-haiku-4-5-20251001'), google('gemini-3-flash-preview'), openai('gpt-5-nano'), xai('grok-4-1-fast-non-reasoning'), ] /** * Format primers * * @remarks * Neutral descriptions to help models parse each format. */ export const PRIMERS: Record = { 'toon': 'TOON: Indentation-based. Arrays declare length and fields (e.g., items[N]{f1,f2}:). Rows use single delimiter. Values may be quoted.', 'json-pretty': 'JSON: Strict JSON objects/arrays with repeated keys per row.', 'json-compact': 'JSON (compact): Strict JSON without extra whitespace.', 'yaml': 'YAML: Indentation-based key/value and lists (- items).', 'xml': 'XML: Tag-based tree structure with nested elements.', 'csv': 'CSV: Header row, comma-separated values. First row contains field names.', } /** * Code fence language tags for proper syntax highlighting */ export const FENCE: Record = { 'toon': 'toon', 'json-pretty': 'json', 'json-compact': 'json', 'yaml': 'yaml', 'xml': 'xml', 'csv': 'csv', } /** * Evaluate a single question with a specific format and model */ export async function evaluateQuestion( { question, formatName, formattedData, model, }: { question: Question formatName: string formattedData: string model: LanguageModelV3 }, ): Promise { const primer = PRIMERS[formatName] ?? '' const fence = FENCE[formatName] ?? '' const prompt = ` ${primer} Given the following data in ${formatName} format: \`\`\`${fence} ${formattedData} \`\`\` Question: ${question.prompt} Answer format requirements: - Provide only the value itself, no explanation - For numbers: output digits only (no commas, currency symbols, or units) - For dates/field names: use the exact string from the data - For lists: output comma-separated values with no spaces Answer: `.trim() const startTime = performance.now() const { text, usage } = await generateText({ model, prompt }) const actual = text.trim() const latencyMs = performance.now() - startTime const comparisonResult = compareAnswers( actual, question.groundTruth, question.answerType ?? 'string', question.normalizationOptions, ) const isCorrect = comparisonResult.match return { questionId: question.id, format: formatName, model: model.modelId, expected: question.groundTruth, actual, isCorrect, inputTokens: usage.inputTokens, outputTokens: usage.outputTokens, latencyMs, } } ================================================ FILE: benchmarks/src/formatters.ts ================================================ import type { Dataset } from './types.ts' import { stringify as stringifyCSV } from 'csv-stringify/sync' import { XMLBuilder } from 'fast-xml-parser' import { stringify as stringifyYAML } from 'yaml' import { encode as encodeToon } from '../../packages/toon/src/index.ts' /** * Format converters registry * * @remarks * All formatters attempt to preserve semantic equivalence with the source data, * meaning the converted data should represent the same information. However, * CSV has inherent limitations with nested structures (see `toCSV` docs). */ export const formatters: Record string> = { 'json-pretty': data => JSON.stringify(data, undefined, 2), 'json-compact': data => JSON.stringify(data), 'toon': data => encodeToon(data), 'csv': data => toCSV(data), 'xml': data => toXML(data), 'yaml': data => stringifyYAML(data), } /** * Convert data to CSV format * * @remarks * Limitations: CSV is designed for flat tabular data only. * * This formatter: * - Only handles top-level objects with arrays of flat objects * - Cannot properly represent deeply nested structures (nested arrays/objects within rows) * - Loses nested structure information during conversion * - May produce misleading results for datasets with complex nesting (e.g., e-commerce orders with nested items) * * For datasets with nested structures, CSV comparisons may not be fair or representative * of how CSV would typically be used in practice. */ function toCSV(data: unknown): string { const sections: string[] = [] // Handle top-level object with arrays if (typeof data === 'object' && data !== null && !Array.isArray(data)) { for (const [key, value] of Object.entries(data)) { if (Array.isArray(value) && value.length > 0) { sections.push(`# ${key}`) sections.push(stringifyCSV(value, { header: true })) } } return sections.join('\n').trim() } // Root-level array if (Array.isArray(data) && data.length > 0) { return stringifyCSV(data, { header: true }).trim() } return '' } /** * Convert data to XML format * * @remarks * Uses `fast-xml-parser` to generate well-formatted XML with: * - 2-space indentation for readability * - Empty nodes suppressed * - Proper escaping of special characters */ function toXML(data: unknown): string { const builder = new XMLBuilder({ format: true, indentBy: ' ', suppressEmptyNode: true, }) return builder.build(data) } /** * Check if a dataset supports CSV format * * @remarks * CSV is only suitable for flat tabular data. Datasets with nested structures * should not be compared using CSV as it cannot properly represent the data. */ export function supportsCSV(dataset: Dataset): boolean { return dataset.metadata.supportsCSV } ================================================ FILE: benchmarks/src/normalize.ts ================================================ /** * Type of expected answer for deterministic comparison */ export type AnswerType = | 'integer' | 'number' | 'boolean' | 'date' | 'string' | 'csv-list-ordered' | 'csv-list-unordered' /** * Options for answer normalization and comparison */ export interface NormalizationOptions { /** * Tolerance for floating-point number comparison (e.g., 1e-6). * @default 1e-6 */ tolerance?: number /** * Whether string comparison should be case-sensitive. * @default false */ caseSensitive?: boolean /** * Allow currency symbols ($, €, etc.) in number extraction. * @default true */ allowCurrency?: boolean /** * Allow percent signs (%) in number extraction (will divide by 100). * @default true */ allowPercent?: boolean /** * Number of decimal places to round to for number comparison. * If specified, overrides tolerance-based comparison. */ decimalPlaces?: number } interface NormalizedResult { success: boolean value?: unknown error?: string } /** * Default normalization options */ const DEFAULT_OPTIONS: Required = { tolerance: 1e-6, caseSensitive: false, allowCurrency: true, allowPercent: true, decimalPlaces: undefined!, } // Regex pattern constants const INTEGER_PATTERN_WITH_CURRENCY = /[$€£¥]?\s*-?\d[\d,]*/ const INTEGER_PATTERN = /-?\d[\d,]*/ const NUMBER_PATTERN_WITH_CURRENCY = /[$€£¥]?\s*-?\d[\d,]*(?:\.\d+)?(?:e[+-]?\d+)?%?/i const NUMBER_PATTERN = /-?\d[\d,]*(?:\.\d+)?(?:e[+-]?\d+)?%?/i const WRAPPING_QUOTES_PATTERN = /^["']|["']$/g const CODE_FENCE_PATTERN = /^```[\s\S]*?```$/g const LANGUAGE_IDENTIFIER_PATTERN = /^\w+\n/ const CURRENCY_AND_FORMATTING_CHARS = /[$€£¥,\s]/g const NUMBER_CLEANUP_CHARS = /[$€£¥,%\s]/g // Boolean value constants const TRUE_VALUES = new Set(['true', 'yes', 'y', '1']) const FALSE_VALUES = new Set(['false', 'no', 'n', '0']) // Numeric constants const PERCENTAGE_DIVISOR = 100 const DECIMAL_BASE = 10 const MONTH_OFFSET = 1 // JavaScript months are 0-indexed const DATE_COMPONENT_WIDTH = 2 const DATE_PAD_CHAR = '0' // String constants const CSV_DELIMITER = ',' /** * Strip wrapping quotes from a string */ function stripWrappingQuotes(text: string): string { return text.trim().replace(WRAPPING_QUOTES_PATTERN, '') } /** * Extract and normalize an integer from a string * * @remarks * Handles: "42", "1,234", "$5,678", " -99 ", "The answer is 42." */ function normalizeInteger(text: string, options: Required): NormalizedResult { // Strip common formatting, extract first integer-like token const pattern = options.allowCurrency ? INTEGER_PATTERN_WITH_CURRENCY : INTEGER_PATTERN const match = text.match(pattern) if (!match) return { success: false, error: `No integer found in: "${text}"` } // Remove currency symbols, spaces, and thousand separators const normalizedValue = match[0].replace(CURRENCY_AND_FORMATTING_CHARS, '') const parsedNumber = Number.parseInt(normalizedValue, DECIMAL_BASE) if (Number.isNaN(parsedNumber)) return { success: false, error: `Failed to parse integer: "${match[0]}"` } return { success: true, value: parsedNumber } } /** * Extract and normalize a floating-point number from a string * * @remarks * Handles: "3.14", "1,234.56", "$5,678.90", "42%", "1.5e-3", "Price: $99.99" */ function normalizeNumber(text: string, options: Required): NormalizedResult { // Extract first number-like token (supports scientific notation) const pattern = options.allowCurrency ? NUMBER_PATTERN_WITH_CURRENCY : NUMBER_PATTERN const match = text.match(pattern) if (!match) return { success: false, error: `No number found in: "${text}"` } const token = match[0] const hasPercentSign = options.allowPercent && token.endsWith('%') // Remove currency, commas, spaces, and percent sign const normalizedToken = token.replace(NUMBER_CLEANUP_CHARS, '') let parsedNumber = Number.parseFloat(normalizedToken) if (Number.isNaN(parsedNumber)) return { success: false, error: `Failed to parse number: "${token}"` } // Convert percentage to decimal if present if (hasPercentSign) parsedNumber = parsedNumber / PERCENTAGE_DIVISOR // Round to specified decimal places if requested if (options.decimalPlaces !== undefined) { const factor = DECIMAL_BASE ** options.decimalPlaces parsedNumber = Math.round(parsedNumber * factor) / factor } return { success: true, value: parsedNumber } } /** * Normalize a boolean/yes-no answer * * @remarks * Handles: "true", "false", "yes", "no", "y", "n", "1", "0" (case-insensitive) */ function normalizeBoolean(text: string): NormalizedResult { const normalizedValue = text.trim().toLowerCase() if (TRUE_VALUES.has(normalizedValue)) return { success: true, value: true } if (FALSE_VALUES.has(normalizedValue)) return { success: true, value: false } return { success: false, error: `Not a boolean: "${text}"` } } /** * Normalize a date string to YYYY-MM-DD format * * @remarks * Handles: ISO dates, "Nov 1, 2025", "2025-11-01", RFC 2822, etc. */ function normalizeDate(text: string): NormalizedResult { const cleaned = stripWrappingQuotes(text) // Try parsing as date const parsedDate = new Date(cleaned) if (Number.isNaN(parsedDate.getTime())) return { success: false, error: `Invalid date: "${text}"` } // Normalize to YYYY-MM-DD (UTC) const year = parsedDate.getUTCFullYear() const monthPadded = String(parsedDate.getUTCMonth() + MONTH_OFFSET).padStart(DATE_COMPONENT_WIDTH, DATE_PAD_CHAR) const dayPadded = String(parsedDate.getUTCDate()).padStart(DATE_COMPONENT_WIDTH, DATE_PAD_CHAR) const normalized = `${year}-${monthPadded}-${dayPadded}` return { success: true, value: normalized } } /** * Normalize a string (trim, optionally case-insensitive) * * @remarks * Handles wrapping quotes and code fences. */ function normalizeString(text: string, options: Required): NormalizedResult { let trimmedText = text.trim() // Strip wrapping quotes trimmedText = trimmedText.replace(WRAPPING_QUOTES_PATTERN, '') // Strip code fences (```...```) trimmedText = trimmedText.replace(CODE_FENCE_PATTERN, (match) => { const inner = match.slice(3, -3).trim() // Remove language identifier if present (e.g., ```json) return inner.replace(LANGUAGE_IDENTIFIER_PATTERN, '') }) trimmedText = trimmedText.trim() const value = options.caseSensitive ? trimmedText : trimmedText.toLowerCase() return { success: true, value } } /** * Normalize a comma-separated list (ordered) * * @remarks * Handles: "a,b,c", "a, b, c", " a , b , c " */ function normalizeCsvListOrdered(text: string, options: Required): NormalizedResult { const strippedText = stripWrappingQuotes(text) const items = strippedText .split(CSV_DELIMITER) .map(item => item.trim()) .filter(item => item.length > 0) const normalizedItems = items.map(item => options.caseSensitive ? item : item.toLowerCase(), ) return { success: true, value: normalizedItems } } /** * Normalize a comma-separated list (unordered, compare as sets) * * @remarks * Handles: "c,a,b" equals "a,b,c" */ function normalizeCsvListUnordered(text: string, options: Required): NormalizedResult { const result = normalizeCsvListOrdered(text, options) if (!result.success) return result // Type guard: ensure result.value is an array if (!Array.isArray(result.value)) return { success: false, error: 'Expected array result from normalizeCsvListOrdered' } // Sort for deterministic comparison const sorted = [...result.value].sort() return { success: true, value: sorted } } /** * Normalize a value based on its expected kind */ export function normalizeAnswer( text: string, kind: AnswerType, options: Partial = {}, ): NormalizedResult { const resolvedOptions: Required = { ...DEFAULT_OPTIONS, ...options } switch (kind) { case 'integer': return normalizeInteger(text, resolvedOptions) case 'number': return normalizeNumber(text, resolvedOptions) case 'boolean': return normalizeBoolean(text) case 'date': return normalizeDate(text) case 'string': return normalizeString(text, resolvedOptions) case 'csv-list-ordered': return normalizeCsvListOrdered(text, resolvedOptions) case 'csv-list-unordered': return normalizeCsvListUnordered(text, resolvedOptions) default: return { success: false, error: `Unknown answer kind: ${kind}` } } } /** * Compare two normalized values based on answer kind */ function compareValues( actual: unknown, expected: unknown, kind: AnswerType, options: Required, ): boolean { switch (kind) { case 'integer': case 'boolean': case 'date': case 'string': return actual === expected case 'number': if (typeof actual !== 'number' || typeof expected !== 'number') return false if (options.decimalPlaces !== undefined) { // Already rounded during normalization return actual === expected } return Math.abs(actual - expected) <= options.tolerance case 'csv-list-ordered': if (!Array.isArray(actual) || !Array.isArray(expected)) return false if (actual.length !== expected.length) return false return actual.every((item, i) => item === expected[i]) case 'csv-list-unordered': if (!Array.isArray(actual) || !Array.isArray(expected)) return false if (actual.length !== expected.length) return false // Already sorted during normalization return actual.every((item, i) => item === expected[i]) default: return false } } /** * Compare actual and expected answers with deterministic, type-aware normalization * * @remarks * Returns true if answers match within the specified tolerance/rules. */ export function compareAnswers( actual: string, expected: string, kind: AnswerType, options: Partial = {}, ): { match: boolean, details?: string } { const resolvedOptions: Required = { ...DEFAULT_OPTIONS, ...options } // Normalize both answers const actualResult = normalizeAnswer(actual, kind, resolvedOptions) const expectedResult = normalizeAnswer(expected, kind, resolvedOptions) // If either normalization failed, return false with details if (!actualResult.success) { return { match: false, details: `Failed to normalize actual answer: ${actualResult.error}`, } } if (!expectedResult.success) { return { match: false, details: `Failed to normalize expected answer: ${expectedResult.error}`, } } // Compare normalized values const match = compareValues(actualResult.value, expectedResult.value, kind, resolvedOptions) return { match, details: match ? undefined : `Mismatch: actual="${actualResult.value}" vs expected="${expectedResult.value}"`, } } ================================================ FILE: benchmarks/src/questions/analytics.ts ================================================ import type { AnalyticsMetric } from '../datasets.ts' import type { Question } from '../types.ts' import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants.ts' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils.ts' /** * Generate analytics (website metrics) questions */ export function generateAnalyticsQuestions(metrics: AnalyticsMetric[], getId: () => string): Question[] { const questions: Question[] = [] // Field retrieval: date-based metrics const metricFieldGenerators: Array<(metric: AnalyticsMetric, getId: () => string) => Question> = [ (metric, getId) => new QuestionBuilder() .id(getId()) .prompt(`What are the views for ${metric.date}?`) .groundTruth(String(metric.views)) .type('field-retrieval') .dataset('analytics') .answerType('integer') .build(), (metric, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the revenue for ${metric.date}?`) .groundTruth(String(metric.revenue)) .type('field-retrieval') .dataset('analytics') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), (metric, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the bounce rate for ${metric.date}?`) .groundTruth(String(metric.bounceRate)) .type('field-retrieval') .dataset('analytics') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), (metric, getId) => new QuestionBuilder() .id(getId()) .prompt(`How many conversions were there on ${metric.date}?`) .groundTruth(String(metric.conversions)) .type('field-retrieval') .dataset('analytics') .answerType('integer') .build(), ] questions.push(...rotateQuestions( metrics, metricFieldGenerators, QUESTION_LIMITS.analytics.fieldRetrievalDates, SAMPLE_STRIDES.ANALYTICS_FIELD, getId, )) // Aggregation: basic statistics const totalDays = metrics.length const totalViews = metrics.reduce((sum, m) => sum + m.views, 0) const totalConversions = metrics.reduce((sum, m) => sum + m.conversions, 0) const totalRevenue = metrics.reduce((sum, m) => sum + m.revenue, 0) const avgBounceRate = metrics.reduce((sum, m) => sum + m.bounceRate, 0) / metrics.length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many days of data are in the dataset?') .groundTruth(String(totalDays)) .type('aggregation') .dataset('analytics') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the total number of views across all dates?') .groundTruth(String(totalViews)) .type('aggregation') .dataset('analytics') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the total number of conversions across all dates?') .groundTruth(String(totalConversions)) .type('aggregation') .dataset('analytics') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the total revenue across all dates?') .groundTruth(String(totalRevenue.toFixed(2))) .type('aggregation') .dataset('analytics') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), new QuestionBuilder() .id(getId()) .prompt('What is the average bounce rate?') .groundTruth(String(avgBounceRate.toFixed(2))) .type('aggregation') .dataset('analytics') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), ) // Aggregation: high views/conversions for (const threshold of QUESTION_THRESHOLDS.analytics.views) { const count = metrics.filter(m => m.views > threshold).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many days had more than ${threshold} views?`) .groundTruth(String(count)) .type('aggregation') .dataset('analytics') .answerType('integer') .build(), ) } for (const threshold of QUESTION_THRESHOLDS.analytics.conversions) { const count = metrics.filter(m => m.conversions > threshold).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many days had more than ${threshold} conversions?`) .groundTruth(String(count)) .type('aggregation') .dataset('analytics') .answerType('integer') .build(), ) } // Filtering: multi-condition (views AND revenue) for (const threshold of QUESTION_THRESHOLDS.analytics.viewsForFiltering) { const count = metrics.filter( m => m.views > threshold && m.conversions > QUESTION_THRESHOLDS.analytics.conversionsForFiltering, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many days had more than ${threshold} views and more than ${QUESTION_THRESHOLDS.analytics.conversionsForFiltering} conversions?`) .groundTruth(String(count)) .type('filtering') .dataset('analytics') .answerType('integer') .build(), ) } // Filtering: revenue thresholds for (const threshold of QUESTION_THRESHOLDS.analytics.revenueThresholds) { const count = metrics.filter( m => m.revenue > threshold && m.views > QUESTION_THRESHOLDS.analytics.viewsThresholdForRevenue, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many days had revenue greater than ${threshold} with views above ${QUESTION_THRESHOLDS.analytics.viewsThresholdForRevenue}?`) .groundTruth(String(count)) .type('filtering') .dataset('analytics') .answerType('integer') .build(), ) } // Filtering: clicks and conversions for (const threshold of QUESTION_THRESHOLDS.analytics.clicksForFiltering) { const count = metrics.filter( m => m.clicks > threshold && m.conversions > QUESTION_THRESHOLDS.analytics.conversionsForClickFiltering, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many days had more than ${threshold} clicks and more than ${QUESTION_THRESHOLDS.analytics.conversionsForClickFiltering} conversions?`) .groundTruth(String(count)) .type('filtering') .dataset('analytics') .answerType('integer') .build(), ) } // Filtering: revenue and bounce rate for (const threshold of QUESTION_THRESHOLDS.analytics.revenueForBounceRate) { const count = metrics.filter( m => m.revenue > threshold && m.bounceRate < QUESTION_THRESHOLDS.analytics.bounceRateThreshold, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many days had revenue greater than ${threshold} with bounce rate below ${QUESTION_THRESHOLDS.analytics.bounceRateThreshold}?`) .groundTruth(String(count)) .type('filtering') .dataset('analytics') .answerType('integer') .build(), ) } return questions } ================================================ FILE: benchmarks/src/questions/event-logs.ts ================================================ import type { EventLog } from '../datasets.ts' import type { Question } from '../types.ts' import { QUESTION_LIMITS } from '../constants.ts' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils.ts' /** * Generate event log questions */ export function generateEventLogsQuestions(logs: EventLog[], getId: () => string): Question[] { const questions: Question[] = [] // Field retrieval: log metadata const logFieldGenerators: Array<(log: EventLog, getId: () => string) => Question> = [ (log, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the level of the log at ${log.timestamp}?`) .groundTruth(log.level) .type('field-retrieval') .dataset('event-logs') .answerType('string') .build(), (log, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the endpoint for the log at ${log.timestamp}?`) .groundTruth(log.endpoint) .type('field-retrieval') .dataset('event-logs') .answerType('string') .build(), (log, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the status code for the log at ${log.timestamp}?`) .groundTruth(String(log.statusCode)) .type('field-retrieval') .dataset('event-logs') .answerType('integer') .build(), (log, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the response time for the log at ${log.timestamp}?`) .groundTruth(String(log.responseTime)) .type('field-retrieval') .dataset('event-logs') .answerType('integer') .build(), ] questions.push(...rotateQuestions( logs, logFieldGenerators, QUESTION_LIMITS.eventLogs.fieldRetrieval, SAMPLE_STRIDES.EVENT_LOG_FIELD, getId, )) // Aggregation: basic statistics const totalLogs = logs.length const avgResponseTime = logs.reduce((sum, l) => sum + l.responseTime, 0) / logs.length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many log entries are in the dataset?') .groundTruth(String(totalLogs)) .type('aggregation') .dataset('event-logs') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the average response time across all logs?') .groundTruth(String(avgResponseTime.toFixed(2))) .type('aggregation') .dataset('event-logs') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), ) // Aggregation: by level const levels = [...new Set(logs.map(l => l.level))] for (const level of levels) { const count = logs.filter(l => l.level === level).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many log entries have level "${level}"?`) .groundTruth(String(count)) .type('aggregation') .dataset('event-logs') .answerType('integer') .build(), ) } // Aggregation: by endpoint const endpoints = [...new Set(logs.map(l => l.endpoint))] for (const endpoint of endpoints.slice(0, QUESTION_LIMITS.eventLogs.aggregationEndpoints)) { const count = logs.filter(l => l.endpoint === endpoint).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many log entries are for endpoint "${endpoint}"?`) .groundTruth(String(count)) .type('aggregation') .dataset('event-logs') .answerType('integer') .build(), ) } // Aggregation: by status code range const errorCount = logs.filter(l => l.statusCode >= 400).length const successCount = logs.filter(l => l.statusCode >= 200 && l.statusCode < 300).length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many log entries have a status code indicating an error (>= 400)?') .groundTruth(String(errorCount)) .type('aggregation') .dataset('event-logs') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many log entries have a successful status code (200-299)?') .groundTruth(String(successCount)) .type('aggregation') .dataset('event-logs') .answerType('integer') .build(), ) // Aggregation: retryable errors const retryableErrorCount = logs.filter(l => l.error?.retryable === true).length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many log entries have a retryable error?') .groundTruth(String(retryableErrorCount)) .type('aggregation') .dataset('event-logs') .answerType('integer') .build(), ) // Filtering: multi-condition (level AND status) for (const level of levels.slice(0, QUESTION_LIMITS.eventLogs.filteringLevelAndStatus)) { // Skip `info` level as it never has status >= 400 by design if (level === 'info') continue const count = logs.filter(l => l.level === level && l.statusCode >= 400).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many log entries have level "${level}" and status code >= 400?`) .groundTruth(String(count)) .type('filtering') .dataset('event-logs') .answerType('integer') .build(), ) } // Filtering: endpoint AND status for (const endpoint of endpoints.slice(0, QUESTION_LIMITS.eventLogs.filteringEndpointAndStatus)) { const count = logs.filter(l => l.endpoint === endpoint && l.statusCode >= 500).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many log entries are for endpoint "${endpoint}" with status code >= 500?`) .groundTruth(String(count)) .type('filtering') .dataset('event-logs') .answerType('integer') .build(), ) } // Filtering: endpoint AND retryable error for (const endpoint of endpoints.slice(0, QUESTION_LIMITS.eventLogs.filteringEndpointRetryable)) { const count = logs.filter(l => l.endpoint === endpoint && l.error?.retryable === true).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many log entries for endpoint "${endpoint}" have a retryable error?`) .groundTruth(String(count)) .type('filtering') .dataset('event-logs') .answerType('integer') .build(), ) } return questions } ================================================ FILE: benchmarks/src/questions/github.ts ================================================ import type { Repository } from '../datasets.ts' import type { Question } from '../types.ts' import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants.ts' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils.ts' /** * Generate GitHub repository questions */ export function generateGithubQuestions(repos: Repository[], getId: () => string): Question[] { const questions: Question[] = [] // Field retrieval: repository metadata const repoFieldGenerators: Array<(repo: Repository, getId: () => string) => Question> = [ (repo, getId) => new QuestionBuilder() .id(getId()) .prompt(`How many stars does ${repo.repo} have?`) .groundTruth(String(repo.stars)) .type('field-retrieval') .dataset('github') .answerType('integer') .build(), (repo, getId) => new QuestionBuilder() .id(getId()) .prompt(`How many forks does ${repo.repo} have?`) .groundTruth(String(repo.forks)) .type('field-retrieval') .dataset('github') .answerType('integer') .build(), (repo, getId) => new QuestionBuilder() .id(getId()) .prompt(`How many watchers does ${repo.repo} have?`) .groundTruth(String(repo.watchers)) .type('field-retrieval') .dataset('github') .answerType('integer') .build(), (repo, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the main branch of ${repo.repo}?`) .groundTruth(repo.defaultBranch) .type('field-retrieval') .dataset('github') .answerType('string') .normalize({ caseSensitive: true }) .build(), ] questions.push(...rotateQuestions( repos, repoFieldGenerators, QUESTION_LIMITS.github.fieldRetrievalRepos, SAMPLE_STRIDES.REPO_FIELD, getId, )) // Aggregation: basic statistics const totalRepos = repos.length const totalStars = repos.reduce((sum, r) => sum + r.stars, 0) const totalForks = repos.reduce((sum, r) => sum + r.forks, 0) const avgStars = totalStars / totalRepos questions.push( new QuestionBuilder() .id(getId()) .prompt('How many repositories are in the dataset?') .groundTruth(String(totalRepos)) .type('aggregation') .dataset('github') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the total number of stars across all repositories?') .groundTruth(String(totalStars)) .type('aggregation') .dataset('github') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the total number of forks across all repositories?') .groundTruth(String(totalForks)) .type('aggregation') .dataset('github') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the average number of stars per repository?') .groundTruth(String(Math.round(avgStars))) .type('aggregation') .dataset('github') .answerType('integer') .build(), ) // Aggregation: by default branch const branches = [...new Set(repos.map(r => r.defaultBranch))] for (const branch of branches.slice(0, QUESTION_LIMITS.github.aggregationBranches)) { const count = repos.filter(r => r.defaultBranch === branch).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many repositories use "${branch}" as their default branch?`) .groundTruth(String(count)) .type('aggregation') .dataset('github') .answerType('integer') .build(), ) } // Aggregation: high star counts for (const threshold of QUESTION_THRESHOLDS.github.stars) { const count = repos.filter(r => r.stars > threshold).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many repositories have more than ${threshold} stars?`) .groundTruth(String(count)) .type('aggregation') .dataset('github') .answerType('integer') .build(), ) } // Aggregation: high fork counts for (const threshold of QUESTION_THRESHOLDS.github.forks) { const count = repos.filter(r => r.forks > threshold).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many repositories have more than ${threshold} forks?`) .groundTruth(String(count)) .type('aggregation') .dataset('github') .answerType('integer') .build(), ) } // Aggregation: high watcher counts for (const threshold of QUESTION_THRESHOLDS.github.watchers) { const count = repos.filter(r => r.watchers > threshold).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many repositories have more than ${threshold} watchers?`) .groundTruth(String(count)) .type('aggregation') .dataset('github') .answerType('integer') .build(), ) } // Filtering: multi-condition (stars AND forks) for (const combo of QUESTION_THRESHOLDS.github.starForkCombinations.slice(0, QUESTION_LIMITS.github.filteringStarsAndForks)) { const count = repos.filter( r => r.stars > combo.stars && r.forks > combo.forks, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many repositories have more than ${combo.stars} stars and more than ${combo.forks} forks?`) .groundTruth(String(count)) .type('filtering') .dataset('github') .answerType('integer') .build(), ) } // Filtering: stars AND watchers for (const combo of QUESTION_THRESHOLDS.github.starWatcherCombinations) { const count = repos.filter( r => r.stars > combo.stars && r.watchers > combo.watchers, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many repositories have more than ${combo.stars} stars and more than ${combo.watchers} watchers?`) .groundTruth(String(count)) .type('filtering') .dataset('github') .answerType('integer') .build(), ) } return questions } ================================================ FILE: benchmarks/src/questions/index.ts ================================================ import type { AnalyticsMetric, Employee, EventLog, NestedConfig, Order, Repository } from '../datasets.ts' import type { Question } from '../types.ts' import { ACCURACY_DATASETS } from '../datasets.ts' import { generateAnalyticsQuestions } from './analytics.ts' import { generateEventLogsQuestions } from './event-logs.ts' import { generateGithubQuestions } from './github.ts' import { generateNestedConfigQuestions } from './nested-config.ts' import { generateNestedQuestions } from './nested.ts' import { generateStructuralValidationQuestions } from './structural-validation.ts' import { generateStructureQuestions } from './structure.ts' import { generateTabularQuestions } from './tabular.ts' import { createIdGenerator } from './utils.ts' /** * Generate questions from all datasets * * @remarks * - Field Retrieval: Direct field access with no computation * Examples: "What is X's salary?", "What is the status of order Y?" * - Aggregation: Counts, sums, averages, min/max operations (including single-condition filters) * Examples: "How many X?", "What is the total/average?", "How many X > threshold?" * - Filtering: Multi-condition queries requiring complex logical operations * Examples: "How many X WHERE condition1 AND condition2?" * - Structure Awareness: Tests format-native structural affordances (TOON's [N] and {fields}, CSV's header) * Examples: "How many records?", "List the field names", "What is the last record's field?" */ export function generateQuestions(): Question[] { const questions: Question[] = [] const idGen = createIdGenerator() const getId = () => idGen.next().value // Get datasets with proper typing const tabular = (ACCURACY_DATASETS.find(d => d.name === 'tabular')?.data.employees as Employee[]) ?? [] const nested = (ACCURACY_DATASETS.find(d => d.name === 'nested')?.data.orders as Order[]) ?? [] const analytics = (ACCURACY_DATASETS.find(d => d.name === 'analytics')?.data.metrics as AnalyticsMetric[]) ?? [] const github = (ACCURACY_DATASETS.find(d => d.name === 'github')?.data.repositories as Repository[]) ?? [] const eventLogs = (ACCURACY_DATASETS.find(d => d.name === 'event-logs')?.data.logs as EventLog[]) ?? [] const nestedConfig = ACCURACY_DATASETS.find(d => d.name === 'nested-config')?.data as NestedConfig | undefined // Generate questions for each dataset questions.push(...generateTabularQuestions(tabular, getId)) questions.push(...generateNestedQuestions(nested, getId)) questions.push(...generateAnalyticsQuestions(analytics, getId)) questions.push(...generateGithubQuestions(github, getId)) questions.push(...generateEventLogsQuestions(eventLogs, getId)) questions.push(...generateNestedConfigQuestions(nestedConfig, getId)) // Generate structure-awareness questions (tests format-native affordances) questions.push(...generateStructureQuestions(tabular, nested, analytics, github, eventLogs, getId)) // Generate structural-validation questions (tests ability to detect corrupted data) questions.push(...generateStructuralValidationQuestions(getId)) return questions } ================================================ FILE: benchmarks/src/questions/nested-config.ts ================================================ import type { NestedConfig } from '../datasets.ts' import type { Question } from '../types.ts' import { QUESTION_LIMITS } from '../constants.ts' import { QuestionBuilder } from './utils.ts' /** * Generate nested configuration questions */ export function generateNestedConfigQuestions(config: NestedConfig | undefined, getId: () => string): Question[] { const questions: Question[] = [] if (!config) return questions // Field retrieval: top-level config values const fieldRetrievalQuestions = [ { prompt: 'What is the environment in the configuration?', groundTruth: config.environment, answerType: 'string' as const, }, { prompt: 'What is the database host?', groundTruth: config.database.host, answerType: 'string' as const, }, { prompt: 'What is the database port?', groundTruth: String(config.database.port), answerType: 'integer' as const, }, { prompt: 'What is the maximum connection pool size?', groundTruth: String(config.database.pool.max), answerType: 'integer' as const, }, { prompt: 'What is the session duration?', groundTruth: String(config.authentication.session.duration), answerType: 'integer' as const, }, { prompt: 'What is the minimum connection pool size?', groundTruth: String(config.database.pool.min), answerType: 'integer' as const, }, { prompt: 'What is the connection pool idle timeout?', groundTruth: String(config.database.pool.idleTimeout), answerType: 'integer' as const, }, { prompt: 'What is the database name?', groundTruth: config.database.name, answerType: 'string' as const, }, { prompt: 'What is the session refresh threshold?', groundTruth: String(config.authentication.session.refreshThreshold), answerType: 'integer' as const, }, { prompt: 'What is the version in the configuration?', groundTruth: config.version, answerType: 'string' as const, }, ] for (const q of fieldRetrievalQuestions.slice(0, QUESTION_LIMITS.nestedConfig.fieldRetrieval)) { questions.push( new QuestionBuilder() .id(getId()) .prompt(q.prompt) .groundTruth(q.groundTruth) .type('field-retrieval') .dataset('nested-config') .answerType(q.answerType) .build(), ) } // Aggregation: counts of nested structures const roleCount = Object.keys(config.permissions.roles).length const groupCount = Object.keys(config.permissions.groups).length const providerCount = config.authentication.providers.length const featureCount = Object.keys(config.features).length const replicaCount = config.database.replicas.length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many roles are defined in permissions?') .groundTruth(String(roleCount)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many groups are defined in permissions?') .groundTruth(String(groupCount)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many authentication providers are configured?') .groundTruth(String(providerCount)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many feature flags are defined?') .groundTruth(String(featureCount)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many database replicas are configured?') .groundTruth(String(replicaCount)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), ) // Aggregation: providers with admin scope const adminScopeProviderCount = config.authentication.providers.filter(p => p.scopes.includes('admin')).length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many authentication providers include the "admin" scope?') .groundTruth(String(adminScopeProviderCount)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), ) // Aggregation: feature flag details const enabledFeatures = Object.entries(config.features).filter(([_, f]) => f.enabled).length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many feature flags are enabled?') .groundTruth(String(enabledFeatures)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), ) // Aggregation: role permissions const adminPermissions = config.permissions.roles.admin?.permissions.length ?? 0 questions.push( new QuestionBuilder() .id(getId()) .prompt('How many permissions does the admin role have?') .groundTruth(String(adminPermissions)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), ) // Aggregation: additional nested counts const totalPermissions = Object.values(config.permissions.roles).reduce((sum, role) => sum + role.permissions.length, 0) const distinctPermissions = new Set(Object.values(config.permissions.roles).flatMap(r => r.permissions)).size const totalVariants = Object.values(config.features).reduce((sum, f) => sum + f.variants.length, 0) const highPriorityReplicas = config.database.replicas.filter(r => r.priority > 2).length const featuresWithHighRollout = Object.values(config.features).filter(f => f.rollout > 50).length const groupsWithMultipleRoles = Object.values(config.permissions.groups).filter(g => g.roles.length > 1).length questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the total number of permissions across all roles?') .groundTruth(String(totalPermissions)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many distinct permissions are defined across all roles?') .groundTruth(String(distinctPermissions)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the total number of variants across all feature flags?') .groundTruth(String(totalVariants)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many database replicas have a priority greater than 2?') .groundTruth(String(highPriorityReplicas)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many feature flags have a rollout percentage greater than 50?') .groundTruth(String(featuresWithHighRollout)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many groups have more than one role assigned?') .groundTruth(String(groupsWithMultipleRoles)) .type('aggregation') .dataset('nested-config') .answerType('integer') .build(), ) // Filtering: complex multi-condition queries const filteringQuestions = [ { prompt: 'How many feature flags are enabled with rollout greater than 50%?', groundTruth: String(Object.entries(config.features) .filter(([_, f]) => f.enabled && f.rollout > 50).length), }, { prompt: 'How many groups have the admin role?', groundTruth: String(Object.entries(config.permissions.groups) .filter(([_, g]) => g.roles.includes('admin')).length), }, { prompt: 'How many database replicas have priority greater than 2 and port 5432?', groundTruth: String(config.database.replicas .filter(r => r.priority > 2 && r.port === 5432).length), }, { prompt: 'How many authentication providers have more than 2 scopes?', groundTruth: String(config.authentication.providers .filter(p => p.scopes.length > 2).length), }, { prompt: 'How many roles have at least 5 permissions?', groundTruth: String(Object.values(config.permissions.roles) .filter(r => r.permissions.length >= 5).length), }, { prompt: 'How many feature flags are disabled with rollout less than 25%?', groundTruth: String(Object.values(config.features) .filter(f => !f.enabled && f.rollout < 25).length), }, { prompt: 'How many enabled features have at least 2 variants?', groundTruth: String(Object.values(config.features) .filter(f => f.enabled && f.variants.length >= 2).length), }, ] for (const q of filteringQuestions.slice(0, QUESTION_LIMITS.nestedConfig.filteringComplex)) { questions.push( new QuestionBuilder() .id(getId()) .prompt(q.prompt) .groundTruth(q.groundTruth) .type('filtering') .dataset('nested-config') .answerType('integer') .build(), ) } return questions } ================================================ FILE: benchmarks/src/questions/nested.ts ================================================ import type { Order } from '../datasets.ts' import type { Question } from '../types.ts' import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants.ts' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils.ts' /** * Generate nested (orders) questions */ export function generateNestedQuestions(orders: Order[], getId: () => string): Question[] { const questions: Question[] = [] // Field retrieval: order totals and statuses const orderFieldGenerators: Array<(order: Order, getId: () => string) => Question> = [ (order, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the total for order ${order.orderId}?`) .groundTruth(String(order.total)) .type('field-retrieval') .dataset('nested') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), (order, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the status of order ${order.orderId}?`) .groundTruth(order.status) .type('field-retrieval') .dataset('nested') .answerType('string') .build(), ] questions.push(...rotateQuestions( orders, orderFieldGenerators, QUESTION_LIMITS.nested.fieldRetrievalOrders, SAMPLE_STRIDES.ORDER_FIELD, getId, )) // Field retrieval: customer info and order dates const customerFieldGenerators: Array<(order: Order, getId: () => string) => Question> = [ (order, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the customer name for order ${order.orderId}?`) .groundTruth(order.customer.name) .type('field-retrieval') .dataset('nested') .answerType('string') .build(), (order, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the customer email for order ${order.orderId}?`) .groundTruth(order.customer.email) .type('field-retrieval') .dataset('nested') .answerType('string') .build(), (order, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the order date for order ${order.orderId}?`) .groundTruth(order.orderDate || '') .type('field-retrieval') .dataset('nested') .answerType('string') .build(), (order, getId) => new QuestionBuilder() .id(getId()) .prompt(`How many items are in order ${order.orderId}?`) .groundTruth(String(order.items.length)) .type('field-retrieval') .dataset('nested') .answerType('integer') .build(), ] // Use stride + 1 for customer fields to offset from order fields const customerOrders = orders.map((_, i) => orders[i * SAMPLE_STRIDES.CUSTOMER_FIELD + 1] || orders[i]).filter(Boolean) as Order[] questions.push(...rotateQuestions( customerOrders, customerFieldGenerators, QUESTION_LIMITS.nested.fieldRetrievalCustomers, 1, getId, )) // Aggregation: totals and averages const totalRevenue = orders.reduce((sum, o) => sum + o.total, 0) const avgOrderValue = totalRevenue / orders.length const totalOrders = orders.length const maxOrderValue = Math.max(...orders.map(o => o.total)) // Count by status const statuses = [...new Set(orders.map(o => o.status))] for (const status of statuses.slice(0, QUESTION_LIMITS.nested.aggregationStatuses)) { const count = orders.filter(o => o.status === status).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many orders have status "${status}"?`) .groundTruth(String(count)) .type('aggregation') .dataset('nested') .answerType('integer') .build(), ) } questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the total revenue across all orders?') .groundTruth(String(totalRevenue.toFixed(2))) .type('aggregation') .dataset('nested') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), new QuestionBuilder() .id(getId()) .prompt('What is the average order value?') .groundTruth(String(avgOrderValue.toFixed(2))) .type('aggregation') .dataset('nested') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), new QuestionBuilder() .id(getId()) .prompt('How many orders are in the dataset?') .groundTruth(String(totalOrders)) .type('aggregation') .dataset('nested') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the highest order total?') .groundTruth(String(maxOrderValue.toFixed(2))) .type('aggregation') .dataset('nested') .answerType('number') .normalize({ decimalPlaces: 2 }) .build(), ) // Aggregation: high-value orders (single-condition filter) for (const threshold of QUESTION_THRESHOLDS.nested.highValueOrders) { const count = orders.filter(o => o.total > threshold).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many orders have a total greater than ${threshold}?`) .groundTruth(String(count)) .type('aggregation') .dataset('nested') .answerType('integer') .build(), ) } // Filtering: multi-condition queries (status AND value) const orderStatuses = [...new Set(orders.map(o => o.status))] for (const status of orderStatuses.slice(0, QUESTION_LIMITS.nested.filteringStatusAndValue)) { const count = orders.filter( o => o.status === status && o.total > QUESTION_THRESHOLDS.nested.statusValueThreshold, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many orders have status "${status}" and total greater than ${QUESTION_THRESHOLDS.nested.statusValueThreshold}?`) .groundTruth(String(count)) .type('filtering') .dataset('nested') .answerType('integer') .build(), ) } // Filtering: status AND items count (multi-condition) for (const status of orderStatuses.slice(0, QUESTION_LIMITS.nested.filteringStatusAndItems)) { const count = orders.filter( o => o.status === status && o.items.length >= QUESTION_THRESHOLDS.nested.itemCountThreshold, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many orders have status "${status}" and at least ${QUESTION_THRESHOLDS.nested.itemCountThreshold} items?`) .groundTruth(String(count)) .type('filtering') .dataset('nested') .answerType('integer') .build(), ) } // Filtering: total AND items count (multi-condition) for (const threshold of QUESTION_THRESHOLDS.nested.totalThresholdsForItems) { const count = orders.filter( o => o.total > threshold && o.items.length >= QUESTION_THRESHOLDS.nested.itemCountThreshold, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many orders have a total greater than ${threshold} and at least ${QUESTION_THRESHOLDS.nested.itemCountThreshold} items?`) .groundTruth(String(count)) .type('filtering') .dataset('nested') .answerType('integer') .build(), ) } return questions } ================================================ FILE: benchmarks/src/questions/structural-validation.ts ================================================ import type { Question } from '../types.ts' import { QuestionBuilder } from './utils.ts' /** * Generate structural validation questions for all incompleteness fixtures * * These questions test the ability to detect incomplete, truncated, or corrupted data * by validating structural metadata (TOON's [N] length declarations and {fields} headers). * * @remarks * - TOON's advantage: Explicit [N] and {fields} enable validation * - CSV disadvantage: No structural metadata to validate against * - JSON/YAML disadvantage: Require manual counting and schema inference */ export function generateStructuralValidationQuestions( getId: () => string, ): Question[] { const questions: Question[] = [] // Dataset names and their expected validity const validationFixtures = [ { dataset: 'structural-validation-control', isValid: true, description: 'Valid complete dataset (control)' }, { dataset: 'structural-validation-truncated', isValid: false, description: 'Array truncated: 3 rows removed from end' }, { dataset: 'structural-validation-extra-rows', isValid: false, description: 'Extra rows added beyond declared length' }, { dataset: 'structural-validation-width-mismatch', isValid: false, description: 'Inconsistent field count (missing salary in row 10)' }, { dataset: 'structural-validation-missing-fields', isValid: false, description: 'Missing required fields (no email in multiple rows)' }, ] as const // Generate one validation question per fixture for (const fixture of validationFixtures) { questions.push( new QuestionBuilder() .id(getId()) .prompt('Is this data complete and valid? Answer only YES or NO.') .groundTruth(fixture.isValid ? 'YES' : 'NO') .type('structural-validation') .dataset(fixture.dataset) .answerType('boolean') .build(), ) } return questions } ================================================ FILE: benchmarks/src/questions/structure.ts ================================================ import type { AnalyticsMetric, Employee, EventLog, Order, Repository } from '../datasets.ts' import type { Question } from '../types.ts' import { QuestionBuilder } from './utils.ts' /** * Generate structure-awareness questions across all datasets * * These questions test format-native structural affordances: * - TOON's explicit array length [N] and field declarations {fields} * - CSV's header row (but no explicit length) * - JSON/YAML have neither unless the model counts manually */ export function generateStructureQuestions( employees: Employee[], orders: Order[], metrics: AnalyticsMetric[], repos: Repository[], logs: EventLog[], getId: () => string, ): Question[] { const questions: Question[] = [] // ========== TABULAR DATASET (Employees) ========== // Count: Total employees (tests array length awareness) questions.push( new QuestionBuilder() .id(getId()) .prompt('How many employees are in the dataset?') .groundTruth(String(employees.length)) .type('structure-awareness') .dataset('tabular') .answerType('integer') .build(), ) // Field list: Employee fields (tests field name awareness) const employeeFields = 'id,name,email,department,salary,yearsExperience,active' questions.push( new QuestionBuilder() .id(getId()) .prompt('List the field names for employees (comma-separated, in order).') .groundTruth(employeeFields) .type('structure-awareness') .dataset('tabular') .answerType('csv-list-ordered') .build(), ) // Positional: Third field name for employees (tests TOON {fields} syntax) questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the 3rd field name for employees?') .groundTruth('email') .type('structure-awareness') .dataset('tabular') .answerType('string') .build(), ) // Last row: Last employee's department (tests ability to find last row using length) const lastEmployee = employees.at(-1)! questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the department of the last employee in the dataset?') .groundTruth(lastEmployee.department) .type('structure-awareness') .dataset('tabular') .answerType('string') .build(), ) // Last row: Last employee's name questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the name of the last employee in the dataset?') .groundTruth(lastEmployee.name) .type('structure-awareness') .dataset('tabular') .answerType('string') .build(), ) // Field count: How many fields per employee (tests schema awareness) questions.push( new QuestionBuilder() .id(getId()) .prompt('How many fields does each employee record have?') .groundTruth('7') .type('structure-awareness') .dataset('tabular') .answerType('integer') .build(), ) // ========== NESTED DATASET (Orders) ========== // Count: Total orders questions.push( new QuestionBuilder() .id(getId()) .prompt('How many orders are in the dataset?') .groundTruth(String(orders.length)) .type('structure-awareness') .dataset('nested') .answerType('integer') .build(), ) // Field list: Order fields const orderFields = 'orderId,customer,items,subtotal,tax,total,status,orderDate' questions.push( new QuestionBuilder() .id(getId()) .prompt('List the top-level field names for orders (comma-separated, in order).') .groundTruth(orderFields) .type('structure-awareness') .dataset('nested') .answerType('csv-list-ordered') .build(), ) // Nested count: Items in specific order const orderWithManyItems = orders.reduce((max, order) => order.items.length > max.items.length ? order : max, ) questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many items are in order ${orderWithManyItems.orderId}?`) .groundTruth(String(orderWithManyItems.items.length)) .type('structure-awareness') .dataset('nested') .answerType('integer') .build(), ) // Nested field list: Item fields const itemFields = 'sku,name,quantity,price' questions.push( new QuestionBuilder() .id(getId()) .prompt('What are the field names for items within orders (comma-separated, in order)?') .groundTruth(itemFields) .type('structure-awareness') .dataset('nested') .answerType('csv-list-ordered') .build(), ) // Last row: Last order's status const lastOrder = orders.at(-1)! questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the status of the last order in the dataset?') .groundTruth(lastOrder.status) .type('structure-awareness') .dataset('nested') .answerType('string') .build(), ) // Customer field list const customerFields = 'id,name,email,phone' questions.push( new QuestionBuilder() .id(getId()) .prompt('What are the field names for customer objects within orders (comma-separated, in order)?') .groundTruth(customerFields) .type('structure-awareness') .dataset('nested') .answerType('csv-list-ordered') .build(), ) // ========== ANALYTICS DATASET (Metrics) ========== // Count: Total metrics questions.push( new QuestionBuilder() .id(getId()) .prompt('How many metric records are in the dataset?') .groundTruth(String(metrics.length)) .type('structure-awareness') .dataset('analytics') .answerType('integer') .build(), ) // Field list: Metric fields const metricFields = 'date,views,clicks,conversions,revenue,bounceRate' questions.push( new QuestionBuilder() .id(getId()) .prompt('List the field names for metrics (comma-separated, in order).') .groundTruth(metricFields) .type('structure-awareness') .dataset('analytics') .answerType('csv-list-ordered') .build(), ) // Positional: Fifth field name for metrics (tests TOON {fields} syntax) questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the 5th field name for analytics metrics?') .groundTruth('revenue') .type('structure-awareness') .dataset('analytics') .answerType('string') .build(), ) // Last row: Last metric's date const lastMetric = metrics.at(-1)! questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the date of the last metric record in the dataset?') .groundTruth(lastMetric.date) .type('structure-awareness') .dataset('analytics') .answerType('string') .build(), ) // Field count: How many fields per metric questions.push( new QuestionBuilder() .id(getId()) .prompt('How many fields does each metric record have?') .groundTruth('6') .type('structure-awareness') .dataset('analytics') .answerType('integer') .build(), ) // ========== GITHUB DATASET (Repositories) ========== // Count: Total repositories questions.push( new QuestionBuilder() .id(getId()) .prompt('How many repositories are in the dataset?') .groundTruth(String(repos.length)) .type('structure-awareness') .dataset('github') .answerType('integer') .build(), ) // Field list: Repository fields const repoFields = 'id,name,repo,description,stars,watchers,forks,defaultBranch,createdAt,updatedAt,pushedAt' questions.push( new QuestionBuilder() .id(getId()) .prompt('List the field names for repositories (comma-separated, in order).') .groundTruth(repoFields) .type('structure-awareness') .dataset('github') .answerType('csv-list-ordered') .build(), ) // Positional: Seventh field name for repos (tests TOON {fields} syntax) questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the 7th field name for GitHub repositories?') .groundTruth('forks') .type('structure-awareness') .dataset('github') .answerType('string') .build(), ) // Last row: Last repo's name const lastRepo = repos.at(-1)! questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the name of the last repository in the dataset?') .groundTruth(lastRepo.name) .type('structure-awareness') .dataset('github') .answerType('string') .build(), ) // Field count: How many fields per repository questions.push( new QuestionBuilder() .id(getId()) .prompt('How many fields does each repository record have?') .groundTruth('11') .type('structure-awareness') .dataset('github') .answerType('integer') .build(), ) // ========== EVENT LOGS DATASET ========== // Count: Total logs questions.push( new QuestionBuilder() .id(getId()) .prompt('How many log entries are in the dataset?') .groundTruth(String(logs.length)) .type('structure-awareness') .dataset('event-logs') .answerType('integer') .build(), ) // Field list: Base log fields (including optional error) const logFields = 'timestamp,level,endpoint,statusCode,responseTime,userId,error' questions.push( new QuestionBuilder() .id(getId()) .prompt('List the field names for log entries (comma-separated, any order, including optional fields).') .groundTruth(logFields) .type('structure-awareness') .dataset('event-logs') .answerType('csv-list-unordered') .build(), ) // Last row: Last log's level const lastLog = logs.at(-1)! questions.push( new QuestionBuilder() .id(getId()) .prompt('What is the level of the last log entry in the dataset?') .groundTruth(lastLog.level) .type('structure-awareness') .dataset('event-logs') .answerType('string') .build(), ) return questions } ================================================ FILE: benchmarks/src/questions/tabular.ts ================================================ import type { Employee } from '../datasets.ts' import type { Question } from '../types.ts' import { QUESTION_LIMITS, QUESTION_THRESHOLDS } from '../constants.ts' import { QuestionBuilder, rotateQuestions, SAMPLE_STRIDES } from './utils.ts' /** * Generate tabular (employee) questions */ export function generateTabularQuestions(employees: Employee[], getId: () => string): Question[] { const questions: Question[] = [] // Field retrieval: specific employees const fieldGenerators: Array<(emp: Employee, getId: () => string) => Question> = [ (emp, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the salary of ${emp.name}?`) .groundTruth(String(emp.salary)) .type('field-retrieval') .dataset('tabular') .answerType('integer') .build(), (emp, getId) => new QuestionBuilder() .id(getId()) .prompt(`What department does ${emp.name} work in?`) .groundTruth(emp.department) .type('field-retrieval') .dataset('tabular') .answerType('string') .build(), (emp, getId) => new QuestionBuilder() .id(getId()) .prompt(`What is the email address of ${emp.name}?`) .groundTruth(emp.email) .type('field-retrieval') .dataset('tabular') .answerType('string') .build(), (emp, getId) => new QuestionBuilder() .id(getId()) .prompt(`How many years of experience does ${emp.name} have?`) .groundTruth(String(emp.yearsExperience)) .type('field-retrieval') .dataset('tabular') .answerType('integer') .build(), (emp, getId) => new QuestionBuilder() .id(getId()) .prompt(`Is ${emp.name} an active employee?`) .groundTruth(emp.active ? 'yes' : 'no') .type('field-retrieval') .dataset('tabular') .answerType('boolean') .build(), ] questions.push(...rotateQuestions( employees, fieldGenerators, QUESTION_LIMITS.tabular.fieldRetrieval, SAMPLE_STRIDES.EMPLOYEE_FIELD, getId, )) // Aggregation: count by department const departments = [...new Set(employees.map(e => e.department))] for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.aggregationDepartments)) { const count = employees.filter(e => e.department === dept).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many employees work in ${dept}?`) .groundTruth(String(count)) .type('aggregation') .dataset('tabular') .answerType('integer') .build(), ) } // Aggregation: salary ranges (single-condition filters) for (const threshold of QUESTION_THRESHOLDS.tabular.salaryRanges) { const count = employees.filter(e => e.salary > threshold).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many employees have a salary greater than ${threshold}?`) .groundTruth(String(count)) .type('aggregation') .dataset('tabular') .answerType('integer') .build(), ) } // Aggregation: totals and averages const totalEmployees = employees.length const avgSalary = Math.round(employees.reduce((sum, e) => sum + e.salary, 0) / totalEmployees) const activeCount = employees.filter(e => e.active).length const inactiveCount = employees.filter(e => !e.active).length questions.push( new QuestionBuilder() .id(getId()) .prompt('How many employees are in the dataset?') .groundTruth(String(totalEmployees)) .type('aggregation') .dataset('tabular') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('What is the average salary across all employees?') .groundTruth(String(avgSalary)) .type('aggregation') .dataset('tabular') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many employees are active?') .groundTruth(String(activeCount)) .type('aggregation') .dataset('tabular') .answerType('integer') .build(), new QuestionBuilder() .id(getId()) .prompt('How many employees are inactive?') .groundTruth(String(inactiveCount)) .type('aggregation') .dataset('tabular') .answerType('integer') .build(), ) // Filtering: count by department with salary filter (multi-condition) for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringMultiConditionDepartments)) { const count = employees.filter( e => e.department === dept && e.salary > QUESTION_THRESHOLDS.tabular.departmentSalaryThreshold, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many employees in ${dept} have a salary greater than ${QUESTION_THRESHOLDS.tabular.departmentSalaryThreshold}?`) .groundTruth(String(count)) .type('filtering') .dataset('tabular') .answerType('integer') .build(), ) } // Filtering: active employees by experience (multi-condition) for (const exp of QUESTION_THRESHOLDS.tabular.experienceYears.slice(0, QUESTION_LIMITS.tabular.filteringExperience)) { const count = employees.filter(e => e.yearsExperience > exp && e.active).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many active employees have more than ${exp} years of experience?`) .groundTruth(String(count)) .type('filtering') .dataset('tabular') .answerType('integer') .build(), ) } // Filtering: department by experience (multi-condition) for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringDepartmentExp)) { const count = employees.filter( e => e.department === dept && e.yearsExperience > QUESTION_THRESHOLDS.tabular.departmentExperienceThreshold, ).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many employees in ${dept} have more than ${QUESTION_THRESHOLDS.tabular.departmentExperienceThreshold} years of experience?`) .groundTruth(String(count)) .type('filtering') .dataset('tabular') .answerType('integer') .build(), ) } // Filtering: department by active status (multi-condition) for (const dept of departments.slice(0, QUESTION_LIMITS.tabular.filteringDepartmentActive)) { const count = employees.filter(e => e.department === dept && e.active).length questions.push( new QuestionBuilder() .id(getId()) .prompt(`How many active employees work in ${dept}?`) .groundTruth(String(count)) .type('filtering') .dataset('tabular') .answerType('integer') .build(), ) } return questions } ================================================ FILE: benchmarks/src/questions/utils.ts ================================================ import type { AnswerType, NormalizationOptions } from '../normalize.ts' import type { Question } from '../types.ts' // Constants for sampling strides export const SAMPLE_STRIDES = { EMPLOYEE_FIELD: 2, ORDER_FIELD: 2, CUSTOMER_FIELD: 2, ANALYTICS_FIELD: 3, METRIC_FIELD: 3, REPO_FIELD: 7, EVENT_LOG_FIELD: 5, } as const /** * ID Generator */ export function* createIdGenerator(): Generator { let id = 1 while (true) { yield `q${id++}` } } /** * Question Builder class for fluent question creation */ export class QuestionBuilder { private question: Partial = {} id(id: string): this { this.question.id = id return this } prompt(prompt: string): this { this.question.prompt = prompt return this } groundTruth(groundTruth: string): this { this.question.groundTruth = groundTruth return this } type(type: Question['type']): this { this.question.type = type return this } dataset(dataset: Question['dataset']): this { this.question.dataset = dataset return this } answerType(kind: AnswerType): this { this.question.answerType = kind return this } normalize(options: Partial): this { this.question.normalizationOptions = options return this } build(): Question { if (!this.question.id || !this.question.prompt || !this.question.groundTruth || !this.question.type || !this.question.dataset) { throw new Error('Incomplete question') } return this.question as Question } } /** * Rotate through question generators */ export function rotateQuestions( items: T[], generators: ((item: T, getId: () => string) => Question)[], limit: number, stride: number, getId: () => string, ): Question[] { const questions: Question[] = [] for (let i = 0; i < Math.min(limit, items.length); i++) { const item = items[i * stride] || items[i] if (!item) continue const generatorIndex = i % generators.length const generator = generators[generatorIndex] if (generator) { questions.push(generator(item, getId)) } } return questions } ================================================ FILE: benchmarks/src/report.ts ================================================ import type { Dataset, EfficiencyRanking, EvaluationResult, FormatResult, Question } from './types.ts' import { FORMATTER_DISPLAY_NAMES, QUESTION_TYPE_LABELS, QUESTION_TYPES } from './constants.ts' import { ACCURACY_DATASETS } from './datasets.ts' import { models, PRIMERS } from './evaluate.ts' import { supportsCSV } from './formatters.ts' import { generateQuestions } from './questions/index.ts' import { createProgressBar, tokenize } from './utils.ts' const EFFICIENCY_CHART_STYLE: 'vertical' | 'horizontal' = 'horizontal' /** * Calculate token counts for all format+dataset combinations * * @remarks * Includes primer tokens for fairer comparison across formats */ export function calculateTokenCounts( formatters: Record string>, ): Record { const tokenCounts: Record = {} for (const [formatName, formatter] of Object.entries(formatters)) { for (const dataset of ACCURACY_DATASETS) { // Skip CSV for datasets that don't support it if (formatName === 'csv' && !supportsCSV(dataset)) continue const formattedData = formatter(dataset.data) const primer = PRIMERS[formatName] ?? '' // Include primer in token count for fair comparison const fullPrompt = primer ? `${primer}\n\n${formattedData}` : formattedData const key = `${formatName}-${dataset.name}` tokenCounts[key] = tokenize(fullPrompt) } } return tokenCounts } /** * Calculate per-format statistics from evaluation results */ export function calculateFormatResults( results: EvaluationResult[], tokenCounts: Record, ): FormatResult[] { const formatNames = [...new Set(results.map(r => r.format))] return formatNames.map((formatName) => { const formatResults = results.filter(r => r.format === formatName) const correctCount = formatResults.filter(r => r.isCorrect).length const totalCount = formatResults.length const accuracy = correctCount / totalCount // Calculate average tokens across all datasets for this format const formatTokenEntries = Object.entries(tokenCounts) .filter(([key]) => key.startsWith(`${formatName}-`)) const avgTokens = formatTokenEntries.reduce((sum, [, tokens]) => sum + tokens, 0) / formatTokenEntries.length const averageLatency = formatResults.reduce((sum, r) => sum + r.latencyMs, 0) / totalCount return { format: formatName, accuracy, totalTokens: Math.round(avgTokens), averageLatency: Math.round(averageLatency), correctCount, totalCount, } }).sort((a, b) => b.accuracy - a.accuracy) } /** * Generate consolidated retrieval accuracy report */ export function generateAccuracyReport( results: EvaluationResult[], formatResults: FormatResult[], tokenCounts: Record, ): string { const questions = generateQuestions() const totalQuestions = [...new Set(results.map(r => r.questionId))].length const modelIds = models.map(m => m.modelId) const modelNames = modelIds.filter(id => results.some(r => r.model === id)) return ` Benchmarks test LLM comprehension across different input formats using ${totalQuestions} data retrieval questions on ${modelNames.length} ${modelNames.length === 1 ? 'model' : 'models'}.
Show Dataset Catalog ${generateDatasetCatalog(ACCURACY_DATASETS)}
#### Efficiency Ranking (Accuracy per 1K Tokens) ${generateEfficiencyRankingReport(formatResults, totalQuestions, modelNames.length)} #### Per-Model Accuracy ${generateDetailedAccuracyReport(formatResults, results, questions, tokenCounts)} `.trimStart() } /** * Generate dataset catalog section */ function generateDatasetCatalog(datasets: Dataset[]): string { const rows = datasets.map((dataset) => { const csvSupport = supportsCSV(dataset) ? '✓' : '✗' const rowCount = Object.values(dataset.data)[0]?.length ?? 1 const structure = dataset.metadata.structureClass const eligibility = `${dataset.metadata.tabularEligibility}%` return `| ${dataset.description} | ${rowCount} | ${structure} | ${csvSupport} | ${eligibility} |` }).join('\n') return ` #### Dataset Catalog | Dataset | Rows | Structure | CSV Support | Eligibility | | ------- | ---- | --------- | ----------- | ----------- | ${rows} **Structure classes:** - **uniform**: All objects have identical fields with primitive values - **semi-uniform**: Mix of uniform and non-uniform structures - **nested**: Objects with nested structures (nested objects or arrays) - **deep**: Highly nested with minimal tabular eligibility **CSV Support:** ✓ (supported), ✗ (not supported – would require lossy flattening) **Eligibility:** Percentage of arrays that qualify for TOON's tabular format (uniform objects with primitive values) `.trim() } /** * Generate efficiency ranking report */ function generateEfficiencyRankingReport( formatResults: FormatResult[], totalQuestions: number, modelCount: number, ): string { const toon = formatResults.find(r => r.format === 'toon') const json = formatResults.find(r => r.format === 'json-pretty') const csv = formatResults.find(r => r.format === 'csv') // Build efficiency ranking (accuracy per 1k tokens) const efficiencyRanking = formatResults // Exclude CSV since it only supports a subset of datasets (~half the questions) .filter(fr => fr.format !== 'csv') .map((fr) => { const efficiency = (fr.accuracy * 100) / (fr.totalTokens / 1000) return { format: fr.format, efficiency, accuracy: fr.accuracy, tokens: fr.totalTokens, } }) .sort((a, b) => b.efficiency - a.efficiency) const efficiencyChart = EFFICIENCY_CHART_STYLE === 'vertical' ? generateVerticalEfficiencyChart(efficiencyRanking) : generateHorizontalEfficiencyChart(efficiencyRanking) // Build summary text let summary = '' if (toon && json) { const toonVsJson = `**${(toon.accuracy * 100).toFixed(1)}%** accuracy (vs JSON's ${(json.accuracy * 100).toFixed(1)}%)` const tokenSavings = `**${((1 - toon.totalTokens / json.totalTokens) * 100).toFixed(1)}% fewer tokens**` summary = `TOON achieves ${toonVsJson} while using ${tokenSavings}.` } // Add CSV note if available let csvNote = '' if (csv) { // CSV totalCount is evaluations (questions × models), so divide by number of models to get question count const csvQuestionCount = csv.totalCount / modelCount csvNote = `**Note on CSV:** Excluded from ranking as it only supports ${csvQuestionCount} of ${totalQuestions} questions (flat tabular data only). While CSV is highly token-efficient for simple tabular data, it cannot represent nested structures that other formats handle.` } return ` Each format ranked by efficiency (accuracy percentage per 1,000 tokens): \`\`\` ${efficiencyChart} \`\`\` *Efficiency score = (Accuracy % ÷ Tokens) × 1,000. Higher is better.* > [!TIP] > ${summary} ${csvNote} `.trim() } /** * Generate detailed accuracy report with breakdowns and methodology */ function generateDetailedAccuracyReport( formatResults: FormatResult[], results: EvaluationResult[], questions: Question[], tokenCounts: Record, ): string { const toon = formatResults.find(r => r.format === 'toon') const json = formatResults.find(r => r.format === 'json-pretty') const modelIds = models.map(m => m.modelId) const modelNames = modelIds.filter(id => results.some(r => r.model === id)) // Generate model breakdown section const modelBreakdown = generateModelBreakdown(formatResults, results, modelNames) // Generate summary comparison const summaryComparison = generateSummaryComparison(toon, json) // Generate performance by dataset const datasetBreakdown = generateDatasetBreakdown(formatResults, results, questions, tokenCounts) // Generate performance by model const modelPerformance = generateModelPerformanceTable(formatResults, results, modelNames) // Generate question type breakdown const questionTypeBreakdown = generateQuestionTypeBreakdown(formatResults, results, questions) const totalQuestions = [...new Set(results.map(r => r.questionId))].length // Calculate question type distribution const fieldRetrievalCount = questions.filter(q => q.type === 'field-retrieval').length const aggregationCount = questions.filter(q => q.type === 'aggregation').length const filteringCount = questions.filter(q => q.type === 'filtering').length const structureAwarenessCount = questions.filter(q => q.type === 'structure-awareness').length const structuralValidationCount = questions.filter(q => q.type === 'structural-validation').length const fieldRetrievalPercent = ((fieldRetrievalCount / totalQuestions) * 100).toFixed(0) const aggregationPercent = ((aggregationCount / totalQuestions) * 100).toFixed(0) const filteringPercent = ((filteringCount / totalQuestions) * 100).toFixed(0) const structureAwarenessPercent = ((structureAwarenessCount / totalQuestions) * 100).toFixed(0) const structuralValidationPercent = ((structuralValidationCount / totalQuestions) * 100).toFixed(0) // Calculate dataset sizes const tabularSize = ACCURACY_DATASETS.find(d => d.name === 'tabular')?.data.employees?.length || 0 const nestedSize = ACCURACY_DATASETS.find(d => d.name === 'nested')?.data.orders?.length || 0 const analyticsSize = ACCURACY_DATASETS.find(d => d.name === 'analytics')?.data.metrics?.length || 0 const githubSize = ACCURACY_DATASETS.find(d => d.name === 'github')?.data.repositories?.length || 0 const eventLogsSize = ACCURACY_DATASETS.find(d => d.name === 'event-logs')?.data.logs?.length || 0 const nestedConfigSize = 1 // Single config object // Calculate number of formats and evaluations const formatCount = formatResults.length const totalEvaluations = totalQuestions * formatCount * modelNames.length return ` Accuracy across ${modelNames.length} ${modelNames.length === 1 ? 'LLM' : 'LLMs'} on ${totalQuestions} data retrieval questions: \`\`\` ${modelBreakdown} \`\`\` ${summaryComparison}
Performance by dataset, model, and question type #### Performance by Question Type ${questionTypeBreakdown} #### Performance by Dataset ${datasetBreakdown} #### Performance by Model ${modelPerformance}
#### What's Being Measured This benchmark tests **LLM comprehension and data retrieval accuracy** across different input formats. Each LLM receives formatted data and must answer questions about it. This does **not** test the model's ability to generate TOON output – only to read and understand it. #### Datasets Tested Eleven datasets designed to test different structural patterns and validation capabilities: **Primary datasets:** 1. **Tabular** (${tabularSize} employee records): Uniform objects with identical fields – optimal for TOON's tabular format. 2. **Nested** (${nestedSize} e-commerce orders): Complex structures with nested customer objects and item arrays. 3. **Analytics** (${analyticsSize} days of metrics): Time-series data with dates and numeric values. 4. **GitHub** (${githubSize} repositories): Real-world data from top GitHub repos by stars. 5. **Event Logs** (${eventLogsSize} logs): Semi-uniform data with ~50% flat logs and ~50% with nested error objects. 6. **Nested Config** (${nestedConfigSize} configuration): Deeply nested configuration with minimal tabular eligibility. **Structural validation datasets:** 7. **Control**: Valid complete dataset (baseline for validation) 8. **Truncated**: Array with 3 rows removed from end (tests \`[N]\` length detection) 9. **Extra rows**: Array with 3 additional rows beyond declared length 10. **Width mismatch**: Inconsistent field count (missing salary in row 10) 11. **Missing fields**: Systematic field omissions (no email in multiple rows) #### Question Types ${totalQuestions} questions are generated dynamically across five categories: - **Field retrieval (${fieldRetrievalPercent}%)**: Direct value lookups or values that can be read straight off a record (including booleans and simple counts such as array lengths) - Example: "What is Alice's salary?" → \`75000\` - Example: "How many items are in order ORD-0042?" → \`3\` - Example: "What is the customer name for order ORD-0042?" → \`John Doe\` - **Aggregation (${aggregationPercent}%)**: Dataset-level totals and averages plus single-condition filters (counts, sums, min/max comparisons) - Example: "How many employees work in Engineering?" → \`17\` - Example: "What is the total revenue across all orders?" → \`45123.50\` - Example: "How many employees have salary > 80000?" → \`23\` - **Filtering (${filteringPercent}%)**: Multi-condition queries requiring compound logic (AND constraints across fields) - Example: "How many employees in Sales have salary > 80000?" → \`5\` - Example: "How many active employees have more than 10 years of experience?" → \`8\` - **Structure awareness (${structureAwarenessPercent}%)**: Tests format-native structural affordances (TOON's \`[N]\` count and \`{fields}\`, CSV's header row) - Example: "How many employees are in the dataset?" → \`100\` - Example: "List the field names for employees" → \`id, name, email, department, salary, yearsExperience, active\` - Example: "What is the department of the last employee?" → \`Sales\` - **Structural validation (${structuralValidationPercent}%)**: Tests ability to detect incomplete, truncated, or corrupted data using structural metadata - Example: "Is this data complete and valid?" → \`YES\` (control dataset) or \`NO\` (corrupted datasets) - Tests TOON's \`[N]\` length validation and \`{fields}\` consistency checking - Demonstrates CSV's lack of structural validation capabilities #### Evaluation Process 1. **Format conversion**: Each dataset is converted to all ${formatCount} formats (${formatResults.map(f => FORMATTER_DISPLAY_NAMES[f.format] || f.format).join(', ')}). 2. **Query LLM**: Each model receives formatted data + question in a prompt and extracts the answer. 3. **Validate deterministically**: Answers are validated using type-aware comparison (e.g., \`50000\` = \`$50,000\`, \`Engineering\` = \`engineering\`, \`2025-01-01\` = \`January 1, 2025\`) without requiring an LLM judge. #### Models & Configuration - **Models tested**: ${modelNames.map(m => `\`${m}\``).join(', ')} - **Token counting**: Using \`gpt-tokenizer\` with \`o200k_base\` encoding (GPT-5 tokenizer) - **Temperature**: Not set (models use their defaults) - **Total evaluations**: ${totalQuestions} questions × ${formatCount} formats × ${modelNames.length} models = ${totalEvaluations.toLocaleString('en-US')} LLM calls `.trim() } /** * Generate ASCII bar chart showing per-model accuracy across formats */ function generateModelBreakdown( formatResults: FormatResult[], results: EvaluationResult[], modelNames: string[], ): string { const maxDisplayNameWidth = Math.max( ...Object.values(FORMATTER_DISPLAY_NAMES).map(name => name.length), ) const progressBarWidth = 20 return modelNames.map((modelName, i) => { const modelResults = formatResults.map((fr) => { const modelFormatResults = results.filter(r => r.model === modelName && r.format === fr.format) const correctCount = modelFormatResults.filter(r => r.isCorrect).length const totalCount = modelFormatResults.length const accuracy = totalCount > 0 ? correctCount / totalCount : 0 return { format: fr.format, accuracy, correctCount, totalCount, } }).sort((a, b) => b.accuracy - a.accuracy) const formatLines = modelResults.map((result) => { const bar = createProgressBar(result.accuracy, 1, progressBarWidth) const accuracyString = `${(result.accuracy * 100).toFixed(1)}%`.padStart(6) const countString = `(${result.correctCount}/${result.totalCount})` const prefix = result.format === 'toon' ? '→ ' : ' ' const displayName = FORMATTER_DISPLAY_NAMES[result.format] || result.format return `${prefix}${displayName.padEnd(maxDisplayNameWidth)} ${bar} ${accuracyString} ${countString}` }).join('\n') // Add blank line before model name, except for first model return `${i > 0 ? '\n' : ''}${modelName}\n${formatLines}` }).join('\n') } /** * Generate summary comparison between TOON and JSON formats */ function generateSummaryComparison( toon: FormatResult | undefined, json: FormatResult | undefined, ): string { if (!toon || !json) return '' return ` > [!TIP] > TOON achieves **${(toon.accuracy * 100).toFixed(1)}% accuracy** (vs JSON's ${(json.accuracy * 100).toFixed(1)}%) while using **${((1 - toon.totalTokens / json.totalTokens) * 100).toFixed(1)}% fewer tokens** on these datasets. `.trim() } /** * Generate per-dataset performance breakdown tables */ function generateDatasetBreakdown( formatResults: FormatResult[], results: EvaluationResult[], questions: Question[], tokenCounts: Record, ): string { // Build question ID to dataset mapping for O(1) lookups const questionDatasetMap = new Map(questions.map(q => [q.id, q.dataset])) return ACCURACY_DATASETS.map((dataset) => { const datasetResults = formatResults.map((fr) => { const datasetFormatResults = results.filter(r => questionDatasetMap.get(r.questionId) === dataset.name) if (datasetFormatResults.length === 0) return undefined const formatDatasetResults = datasetFormatResults.filter(r => r.format === fr.format) if (formatDatasetResults.length === 0) return undefined const correctCount = formatDatasetResults.filter(r => r.isCorrect).length const totalCount = formatDatasetResults.length const accuracy = totalCount > 0 ? correctCount / totalCount : 0 // Get token count for this dataset+format const tokenKey = `${fr.format}-${dataset.name}` const tokens = tokenCounts[tokenKey] || fr.totalTokens return { format: fr.format, accuracy, tokens, correctCount, totalCount, } }).filter(Boolean) as { format: string, accuracy: number, tokens: number, correctCount: number, totalCount: number }[] if (datasetResults.length === 0) return '' // Sort by efficiency datasetResults.sort((a, b) => { const effA = (a.accuracy ** 2) / (a.tokens / 1000) const effB = (b.accuracy ** 2) / (b.tokens / 1000) return effB - effA }) const tableRows = datasetResults.slice(0, 6).map(result => `| \`${result.format}\` | ${(result.accuracy * 100).toFixed(1)}% | ${result.tokens.toLocaleString('en-US')} | ${result.correctCount}/${result.totalCount} |`, ).join('\n') return ` ##### ${dataset.description} | Format | Accuracy | Tokens | Correct/Total | | ------ | -------- | ------ | ------------- | ${tableRows} `.trimStart() }).filter(Boolean).join('\n').trim() } /** * Generate question type breakdown table */ function generateQuestionTypeBreakdown( formatResults: FormatResult[], results: EvaluationResult[], questions: Question[], ): string { // Build header const formatNames = formatResults.map(fr => FORMATTER_DISPLAY_NAMES[fr.format] || fr.format) const header = `| Question Type | ${formatNames.join(' | ')} |` const separator = `| ------------- | ${formatNames.map(() => '----').join(' | ')} |` // Build rows const rows = QUESTION_TYPES.map((type) => { const questionIds = questions.filter(q => q.type === type).map(q => q.id) const typeResults = results.filter(r => questionIds.includes(r.questionId)) if (typeResults.length === 0) return undefined const accuracies = formatResults.map((fr) => { const formatTypeResults = typeResults.filter(r => r.format === fr.format) if (formatTypeResults.length === 0) return 'N/A' const correctCount = formatTypeResults.filter(r => r.isCorrect).length const totalCount = formatTypeResults.length const accuracy = totalCount > 0 ? correctCount / totalCount : 0 return `${(accuracy * 100).toFixed(1)}%` }) return `| ${QUESTION_TYPE_LABELS[type]} | ${accuracies.join(' | ')} |` }).filter(Boolean) return ` ${header} ${separator} ${rows.join('\n')} `.trim() } /** * Generate per-model performance comparison tables */ function generateModelPerformanceTable( formatResults: FormatResult[], results: EvaluationResult[], modelNames: string[], ): string { return modelNames.map((modelName) => { const modelResults = formatResults.map((fr) => { const modelFormatResults = results.filter(r => r.model === modelName && r.format === fr.format) const correctCount = modelFormatResults.filter(r => r.isCorrect).length const totalCount = modelFormatResults.length const accuracy = correctCount / totalCount return { format: fr.format, accuracy, correctCount, totalCount, } }).sort((a, b) => b.accuracy - a.accuracy) const tableRows = modelResults.map(result => `| \`${result.format}\` | ${(result.accuracy * 100).toFixed(1)}% | ${result.correctCount}/${result.totalCount} |`, ).join('\n') return ` ##### ${modelName} | Format | Accuracy | Correct/Total | | ------ | -------- | ------------- | ${tableRows} `.trimStart() }).join('\n').trim() } /** * Generate horizontal bar chart for efficiency ranking */ function generateHorizontalEfficiencyChart( ranking: EfficiencyRanking[], ): string { const barWidth = 20 const maxEfficiency = Math.max(...ranking.map(r => r.efficiency)) const maxFormatWidth = Math.max(...ranking.map((r) => { const displayName = FORMATTER_DISPLAY_NAMES[r.format] || r.format return displayName.length })) return ranking .map((r) => { const normalizedValue = r.efficiency / maxEfficiency const bar = createProgressBar(normalizedValue, 1, barWidth) const displayName = FORMATTER_DISPLAY_NAMES[r.format] || r.format const formatName = displayName.padEnd(maxFormatWidth) const efficiency = r.efficiency.toFixed(1).padStart(4) const accuracy = `${(r.accuracy * 100).toFixed(1)}%`.padStart(5) const tokens = r.tokens.toLocaleString('en-US').padStart(5) return `${formatName} ${bar} ${efficiency} acc%/1K tok │ ${accuracy} acc │ ${tokens} tokens` }) .join('\n') } /** * Generate vertical bar chart for efficiency ranking */ function generateVerticalEfficiencyChart( ranking: EfficiencyRanking[], ): string { const maxEfficiency = Math.max(...ranking.map(r => r.efficiency)) const chartHeight = 8 // Generate rows from top to bottom const rows: string[] = [] // Y-axis and bars for (let i = chartHeight; i >= 0; i--) { const threshold = (i / chartHeight) * maxEfficiency const yLabel = i === chartHeight || i === Math.floor(chartHeight / 2) || i === 0 ? Math.round(threshold).toString().padStart(4) : ' ' const bars = ranking .map((r) => { const barHeight = (r.efficiency / maxEfficiency) * chartHeight let char = ' ' if (barHeight >= i) { // Use different characters for visual distinction if (ranking.indexOf(r) === 0) char = '▓' // Top format else if (ranking.indexOf(r) <= 2) char = '▒' // Top 3 else char = '░' // Rest } return char }) .join(' ') rows.push(`${yLabel}│ ${bars}`) } // X-axis const axis = ` └──${ranking.map(() => '┴').join('────')}──` rows.push(axis) // Format labels (split long names into multiple rows) const formatRow1 = ranking .map((r) => { const parts = r.format.split('-') return (parts[0] || '').padEnd(5).substring(0, 5) }) .join('') rows.push(` ${formatRow1}`) const formatRow2 = ranking .map((r) => { const parts = r.format.split('-') return (parts[1] || '').padEnd(5).substring(0, 5) }) .join('') if (formatRow2.trim()) rows.push(` ${formatRow2}`) return rows.join('\n') } ================================================ FILE: benchmarks/src/storage.ts ================================================ import type { Storage, StorageValue } from 'unstorage' import type { EvaluationResult } from './types.ts' import * as path from 'node:path' import { createStorage } from 'unstorage' import fsDriver from 'unstorage/drivers/fs' import { BENCHMARKS_DIR } from './constants.ts' /** * Storage instance for model results * * @remarks * Stores results in: `benchmarks/results/accuracy/models/` */ export const resultsStorage: Storage = createStorage({ driver: fsDriver({ base: path.join(BENCHMARKS_DIR, 'results', 'accuracy', 'models'), }), }) export async function loadModelResults(modelId: string): Promise { const data = await resultsStorage.getItem(modelId) return data ?? undefined } export async function saveModelResults(modelId: string, results: EvaluationResult[]): Promise { await resultsStorage.setItem(modelId, results) } export async function getAllModelResults(): Promise> { const keys = await resultsStorage.getKeys() const results: Record = {} await Promise.all( keys.map(async (modelId) => { const data = await resultsStorage.getItem(modelId) if (data) results[modelId] = data }), ) return results } export async function hasModelResults(modelId: string): Promise { return await resultsStorage.hasItem(modelId) } ================================================ FILE: benchmarks/src/types.ts ================================================ import type { DATASET_NAMES, QUESTION_TYPES, STRUCTURE_CLASSES } from './constants.ts' import type { AnswerType, NormalizationOptions } from './normalize.ts' export type QuestionType = typeof QUESTION_TYPES[number] export type DatasetName = typeof DATASET_NAMES[number] export type StructureClass = typeof STRUCTURE_CLASSES[number] export interface DatasetMetadata { supportsCSV: boolean structureClass: StructureClass tabularEligibility: number } export interface Dataset { name: DatasetName description: string data: Record metadata: DatasetMetadata } export interface Question { id: string prompt: string groundTruth: string type: QuestionType dataset: DatasetName /** * Expected answer kind for deterministic comparison. * @default 'string' */ answerType?: AnswerType /** * Options for answer normalization and comparison. */ normalizationOptions?: Partial } export interface EvaluationResult { questionId: string format: string model: string expected: string actual: string isCorrect: boolean inputTokens?: number outputTokens?: number latencyMs: number } export interface FormatResult { format: string accuracy: number totalTokens: number averageLatency: number correctCount: number totalCount: number } export interface EfficiencyRanking { format: string efficiency: number accuracy: number tokens: number } ================================================ FILE: benchmarks/src/utils.ts ================================================ import * as fsp from 'node:fs/promises' import { encode } from 'gpt-tokenizer' /** * Generate visual progress bar using ASCII characters * * @param value - Current value * @param max - Maximum value * @param width - Width of the bar in characters (default: 25) * @param chars - Characters to use for filled and empty sections * @param chars.filled - Character for filled portion (default: '█') * @param chars.empty - Character for empty portion (default: '░') * @returns ASCII progress bar string * * @example * createProgressBar(75, 100, 20) // "███████████████░░░░░" * createProgressBar(0.5, 1, 10) // "█████░░░░░" * createProgressBar(0.75, 1, 20, { filled: '▓', empty: '░' }) // "▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓░░░░░" */ export function createProgressBar( value: number, max: number, width = 25, chars: { filled: string, empty: string } = { filled: '█', empty: '░' }, ): string { const filled = Math.round((value / max) * width) const empty = width - filled return chars.filled.repeat(filled) + chars.empty.repeat(empty) } /** * Count tokens in text using gpt-tokenizer (o200k_base encoding) * * @param text - Text to tokenize * @returns Number of tokens * * @example * tokenize("Hello, world!") // 4 */ export function tokenize(text: string): number { return encode(text).length } /** * Ensure a directory exists, creating it recursively if needed * * @param dirPath - Directory path to ensure exists */ export async function ensureDir(dirPath: string): Promise { await fsp.mkdir(dirPath, { recursive: true }) } ================================================ FILE: commitlint.config.ts ================================================ import type { Rule, UserConfig } from '@commitlint/types' import { RuleConfigSeverity } from '@commitlint/types' // #region Rules /** * Rule to ensure the first letter of the commit subject is lowercase. * * @param parsed - Parsed commit object containing commit message parts. * @returns A tuple where the first element is a boolean indicating * if the rule passed, and the second is an optional error message. */ const subjectLowercaseFirst: Rule = async (parsed) => { const firstChar = parsed.subject!.match(/[a-z]/i)?.[0] if (firstChar && firstChar === firstChar.toUpperCase()) { return [false, 'Subject must start with a lowercase letter'] } return [true] } // #endregion const Configuration: UserConfig = { extends: ['@commitlint/config-conventional'], rules: { 'subject-case': [RuleConfigSeverity.Disabled], 'subject-lowercase-first': [RuleConfigSeverity.Error, 'always'], }, plugins: [ { rules: { 'subject-lowercase-first': subjectLowercaseFirst, }, }, ], } export default Configuration ================================================ FILE: docs/.vitepress/config.ts ================================================ import type { DefaultTheme } from 'vitepress' import UnoCSS from 'unocss/vite' import { defineConfig } from 'vitepress' import llmstxt, { copyOrDownloadAsMarkdownButtons } from 'vitepress-plugin-llms' import { description, github, name, ogImage, ogUrl, releases, twitterImage, version } from './meta' export default defineConfig({ title: name, description, head: [ ['link', { rel: 'icon', href: '/favicon.svg', type: 'image/svg+xml' }], ['meta', { name: 'author', content: 'Johann Schopplich' }], ['meta', { property: 'og:type', content: 'website' }], ['meta', { property: 'og:url', content: ogUrl }], ['meta', { property: 'og:title', content: name }], ['meta', { property: 'og:description', content: description }], ['meta', { property: 'og:image', content: ogImage }], ['meta', { name: 'twitter:title', content: name }], ['meta', { name: 'twitter:description', content: description }], ['meta', { name: 'twitter:image', content: twitterImage }], ['meta', { name: 'twitter:site', content: '@jschopplich' }], ['meta', { name: 'twitter:creator', content: '@jschopplich' }], ['meta', { name: 'twitter:card', content: 'summary_large_image' }], ], vite: { // @ts-expect-error – UnoCSS types are not compatible with Vite yet plugins: [UnoCSS(), llmstxt()], }, themeConfig: { logo: '/favicon.svg', nav: [ { text: 'Playground', link: '/playground', }, { text: 'Guide', activeMatch: '^/guide/', items: [ { text: 'Getting Started', link: '/guide/getting-started' }, { text: 'Format Overview', link: '/guide/format-overview' }, { text: 'Using TOON with LLMs', link: '/guide/llm-prompts' }, { text: 'Benchmarks', link: '/guide/benchmarks' }, ], }, { text: 'CLI', link: '/cli/', }, { text: 'Reference', activeMatch: '^/reference/', items: [ { text: 'API', link: '/reference/api' }, { text: 'Syntax Cheatsheet', link: '/reference/syntax-cheatsheet' }, { text: 'Specification', link: '/reference/spec' }, { text: 'Efficiency Formalization', link: '/reference/efficiency-formalization' }, ], }, { text: 'Ecosystem', activeMatch: '^/ecosystem/', items: [ { text: 'Tools & Playgrounds', link: '/ecosystem/tools-and-playgrounds' }, { text: 'Implementations', link: '/ecosystem/implementations' }, ], }, { text: `v${version}`, items: [ { text: 'Release Notes', link: releases, }, ], }, ], sidebar: { '/guide/': sidebarPrimary(), '/cli/': sidebarPrimary(), '/reference/': sidebarPrimary(), '/ecosystem/': sidebarPrimary(), }, socialLinks: [ { icon: 'github', link: github }, ], footer: { message: 'Released under the MIT License.', copyright: 'Copyright © 2025-PRESENT Johann Schopplich', }, search: { provider: 'local', }, }, markdown: { config(md) { md.use(copyOrDownloadAsMarkdownButtons) }, math: true, }, }) function sidebarPrimary(): DefaultTheme.SidebarItem[] { return [ { text: 'Guide', items: [ { text: 'Getting Started', link: '/guide/getting-started' }, { text: 'Format Overview', link: '/guide/format-overview' }, { text: 'Using TOON with LLMs', link: '/guide/llm-prompts' }, { text: 'Benchmarks', link: '/guide/benchmarks' }, ], }, { text: 'Tooling', items: [ { text: 'Playground', link: '/playground' }, { text: 'CLI Reference', link: '/cli/' }, ], }, { text: 'Ecosystem', items: [ { text: 'Tools & Playgrounds', link: '/ecosystem/tools-and-playgrounds' }, { text: 'Implementations', link: '/ecosystem/implementations' }, ], }, { text: 'Reference', items: [ { text: 'API (TypeScript)', link: '/reference/api' }, { text: 'Syntax Cheatsheet', link: '/reference/syntax-cheatsheet' }, { text: 'Specification', link: '/reference/spec' }, { text: 'Efficiency Formalization', link: '/reference/efficiency-formalization' }, ], }, ] } ================================================ FILE: docs/.vitepress/meta.ts ================================================ export { description, version } from '../../packages/toon/package.json' /* VitePress head */ export const name = 'TOON' export const ogUrl = 'https://toonformat.dev/' export const ogImage = `${ogUrl}og.png` export const twitterImage = `${ogUrl}twitter.png` /* GitHub and social links */ export const github = 'https://github.com/toon-format/toon' export const releases = 'https://github.com/toon-format/toon/releases' export const twitter = 'https://twitter.com/jschopplich' ================================================ FILE: docs/.vitepress/theme/components/PlaygroundLayout.vue ================================================