Repository: stanford-crfm/ecosystem-graphs
Branch: main
Commit: 576655a544c9
Files: 194
Total size: 1.2 MB

Directory structure:
gitextract_sfi3u6er/

├── .github/
│   └── workflows/
│       ├── collate.yml
│       ├── deploy.yml
│       └── test.yml
├── .gitignore
├── .yamllint.yaml
├── README.md
├── assets/
│   ├── 01ai.yaml
│   ├── 360.yaml
│   ├── adept.yaml
│   ├── adobe.yaml
│   ├── ai2.yaml
│   ├── ai21.yaml
│   ├── aleph_alpha.yaml
│   ├── alibaba.yaml
│   ├── amazon.yaml
│   ├── anthropic.yaml
│   ├── apple.yaml
│   ├── argilla.yaml
│   ├── argonne.yaml
│   ├── aspia_space_institu.yaml
│   ├── assembly.yaml
│   ├── autogpt.yaml
│   ├── avignon.yaml
│   ├── baai.yaml
│   ├── baichuan.yaml
│   ├── baidu.yaml
│   ├── bain.yaml
│   ├── be_my_eyes.yaml
│   ├── beitech.yaml
│   ├── berkeley.yaml
│   ├── bigcode.yaml
│   ├── bigscience.yaml
│   ├── bloomberg.yaml
│   ├── boston.yaml
│   ├── brex.yaml
│   ├── bytedance.yaml
│   ├── cagliostro.yaml
│   ├── cartesia.yaml
│   ├── casia.yaml
│   ├── causallm.yaml
│   ├── cerebras.yaml
│   ├── character.yaml
│   ├── chatglm.yaml
│   ├── cmu.yaml
│   ├── cognition.yaml
│   ├── cognitive.yaml
│   ├── cohere.yaml
│   ├── columbia.yaml
│   ├── compute_01_research.yaml
│   ├── continue.yaml
│   ├── cresta.yaml
│   ├── databricks.yaml
│   ├── deci.yaml
│   ├── deepmind.yaml
│   ├── deepnight.yaml
│   ├── deepseek.yaml
│   ├── dibt.yaml
│   ├── duckduckgo.yaml
│   ├── duolingo.yaml
│   ├── eleutherai.yaml
│   ├── ens.yaml
│   ├── epfl.yaml
│   ├── evolutionaryscale.yaml
│   ├── faraday.yaml
│   ├── fuse.yaml
│   ├── genmo.yaml
│   ├── glm.yaml
│   ├── google.yaml
│   ├── greenbit.yaml
│   ├── h2oai.yaml
│   ├── hubspot.yaml
│   ├── huggingface.yaml
│   ├── ibm.yaml
│   ├── idea.yaml
│   ├── ideogram.yaml
│   ├── inflection.yaml
│   ├── instacart.yaml
│   ├── internlm.yaml
│   ├── juni.yaml
│   ├── kaist.yaml
│   ├── kakaobrain.yaml
│   ├── khan.yaml
│   ├── konan.yaml
│   ├── kotoba.yaml
│   ├── ktai.yaml
│   ├── kunlun.yaml
│   ├── laion.yaml
│   ├── laion_ev.yaml
│   ├── latitude.yaml
│   ├── lehigh.yaml
│   ├── lg.yaml
│   ├── lg_ai_research.yaml
│   ├── linkedin.yaml
│   ├── llm360.yaml
│   ├── lmsys.yaml
│   ├── mathai.yaml
│   ├── maya.yaml
│   ├── meta.yaml
│   ├── microsoft.yaml
│   ├── mila.yaml
│   ├── mistral.yaml
│   ├── moonhub.yaml
│   ├── moreh.yaml
│   ├── mosaic.yaml
│   ├── nanyang.yaml
│   ├── naver.yaml
│   ├── ncsoft.yaml
│   ├── neeva.yaml
│   ├── nextdoor.yaml
│   ├── nolano.yaml
│   ├── notion.yaml
│   ├── nous.yaml
│   ├── nucleus.yaml
│   ├── nvidia.yaml
│   ├── oasst.yaml
│   ├── ollama.yaml
│   ├── openai.yaml
│   ├── openbmb.yaml
│   ├── openlemur.yaml
│   ├── openx.yaml
│   ├── orion.yaml
│   ├── osu.yaml
│   ├── othersideai.yaml
│   ├── oxford.yaml
│   ├── paladin.yaml
│   ├── peking.yaml
│   ├── perplexity.yaml
│   ├── pleias.yaml
│   ├── portkey.yaml
│   ├── quizlet.yaml
│   ├── quora.yaml
│   ├── qwen_team.yaml
│   ├── rakuten.yaml
│   ├── reexpress.yaml
│   ├── reka.yaml
│   ├── robin.yaml
│   ├── runway_ai,_inc..yaml
│   ├── rwkv.yaml
│   ├── salesforce.yaml
│   ├── samba.yaml
│   ├── sana.yaml
│   ├── sciphi.yaml
│   ├── shanghai.yaml
│   ├── shop.yaml
│   ├── singapore.yaml
│   ├── skt.yaml
│   ├── snap.yaml
│   ├── soochow.yaml
│   ├── speak.yaml
│   ├── spotify.yaml
│   ├── stability.yaml
│   ├── stability_ai.yaml
│   ├── stanford.yaml
│   ├── stonybrook.yaml
│   ├── suno.yaml
│   ├── tiger.yaml
│   ├── together.yaml
│   ├── tokyo.yaml
│   ├── toronto.yaml
│   ├── transformify.yaml
│   ├── trevor.yaml
│   ├── triml.yaml
│   ├── tsinghua.yaml
│   ├── twelvelabs.yaml
│   ├── uae.yaml
│   ├── unknown.yaml
│   ├── uw.yaml
│   ├── uwashington.yaml
│   ├── vago.yaml
│   ├── viable.yaml
│   ├── vilm.yaml
│   ├── wayve.yaml
│   ├── writer.yaml
│   ├── xai.yaml
│   ├── xverse.yaml
│   ├── xwin.yaml
│   ├── yandex.yaml
│   ├── you.yaml
│   └── zhejiang.yaml
├── components/
│   ├── home.html
│   ├── nav.html
│   └── table.html
├── css/
│   └── main.css
├── index.html
├── js/
│   ├── general.js
│   ├── main.js
│   └── schemas.yaml
├── pre-commit.sh
├── requirements.txt
├── resources/
│   ├── .gitkeep
│   └── all_assets.csv
├── scripts/
│   └── collate.py
├── server.py
└── yamlfix.toml

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/collate.yml
================================================
---
name: Collate

"on":
  workflow_dispatch:
  push:
    paths:
      - assets/**

jobs:
  collate-assets:
    name: Collate all assets into single file
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 0

      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: '3.9'

      - name: Install dependencies
        run: python -m pip install --upgrade pip pandas pyyaml

      - name: collate
        run: |-
          ./scripts/collate.py

      - name: commit
        run: |-
          git config user.name "GitHub Actions Bot"
          git config user.email "<>"
          git add resources/all_assets.csv
          git commit -m "Update collated assets CSV."
          git push origin main


================================================
FILE: .github/workflows/deploy.yml
================================================
---
name: Deploy

"on":
  push:
    branches: main

jobs:
  # Check out the main branch of the development repository and force update the
  # main of the website repository. Setting fetch-depth to 0 allows us to
  # retrieve all the history for the specified branch.
  update-website:
    name: Update Website
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v3
        with:
          fetch-depth: 0

      - name: Setup SSH
        uses: MrSquaare/ssh-setup-action@v1
        with:
          host: github.com
          private-key: ${{ secrets.WEBSITE_DEPLOY_SSH_PRIVATE_KEY }}

      - name: Update the github-pages branch of the website repository
        env:
          COMMIT_AUTHOR: Stanford-CRFM-Website
        run: |
          # Set up Git configuration
          git config --global user.name "${{ env.COMMIT_AUTHOR }}"
          git config --global user.email "username@users.noreply.github.com"

          # Update the website repository
          git remote add website git+ssh://git@github.com/stanford-crfm-website/ecosystem-graphs.git
          git push -u website main


================================================
FILE: .github/workflows/test.yml
================================================
---
name: Test

"on":
  push:
    branches: main
  pull_request:
    branches:
      - main

jobs:
  format:
    name: Format YAML files
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v3

      - name: Set up Python 3.8
        uses: actions/setup-python@v1
        with:
          python-version: 3.8

      - name: Validate format
        run: sh ./pre-commit.sh && git diff --exit-code


================================================
FILE: .gitignore
================================================
# Development Environment
.DS_Store
.vscode

# Group specific
nav
tags

================================================
FILE: .yamllint.yaml
================================================
---

yaml-files:
  - '*.yaml'
  - '*.yml'
  - '.yamllint'

rules:
  braces: enable
  brackets: enable
  colons: enable
  commas: enable
  comments:
    level: warning
  comments-indentation:
    level: warning
  document-end: disable
  document-start:
    level: warning
  empty-lines: enable
  empty-values: disable
  hyphens: enable
  indentation:
    level: warning
  key-duplicates: enable
  key-ordering: disable
  line-length: disable
  new-line-at-end-of-file: enable
  new-lines: enable
  octal-values: disable
  quoted-strings: disable
  trailing-spaces: enable
  truthy:
    level: warning


================================================
FILE: README.md
================================================
# Ecosystem Graphs

## Overview
Welcome! Ecosystem Graphs is an ongoing effort to track the foundation model ecosystem, namely both the assets (datasets, models, and applications) and their relationships. Using it, one can answer questions such as: What are the latest foundation models? Who builds them and where are they used downstream? What are the general trends over time? We hope that ecosystem graphs will be a useful resource for researchers, application developers, policymakers, and the public to better understand the foundation models ecosystem.

To explore the ecosystem, check out the [website](https://crfm.stanford.edu/ecosystem-graphs/) or read the [paper](https://arxiv.org/abs/2303.15772).

Briefly, an ecosystem graph is defined by:
- **Assets.** These are the nodes in the graph, which can be datasets (e.g. The Pile), models (e.g. Stable Diffusion), or applications (e.g. Microsoft Word).
- **Dependencies.** These are the edges in the graph, which indicate how assets are built (e.g. the BLOOM model is trained on the ROOTS dataset).
- **Ecosystem cards.** These are structured cards that house metadata on each asset (e.g. who built it, when, what is the license).

## Contribute
We actively encourage community contributions. To contribute:
- Add assets by filling out [this form](https://forms.gle/VqnSsZhv62hJ5rP36). No coding expertise required!
- Submit a PR (run `precommit.sh` before submitting)

To visualize and explore your changes, start a local server:

    python server.py

and navigate to [http://localhost:8000](http://localhost:8000).

## Cite as

```
@article{bommasani2023ecosystem-graphs,
  author       = {Bommasani, Rishi and
                  Soylu, Dilara and
                  Liao, Thomas I. and
                  Creel, Kathleen A. and
                  Liang, Percy},
  title        = {Ecosystem Graphs: The Social Footprint of Foundation Models},
  month        = mar,
  year         = 2023,
  url          = {https://arxiv.org/abs/2303.15772}
}
```


================================================
FILE: assets/01ai.yaml
================================================
- access: open
  analysis: Evaluated on standard language benchmarks, common sense reasoning, and
    reading comprehension in comparison to SoTA LLMs.
  created_date: 2023-11-02
  dependencies: []
  description: The Yi series models are large language models trained from scratch
    by developers at 01 AI.
  feedback: https://huggingface.co/01-ai/Yi-34B/discussions
  intended_uses: ''
  license:
    explanation: Model license can be found at https://huggingface.co/01-ai/Yi-34B/blob/main/LICENSE.
      Code license is under Apache 2.0
    value: custom
  modality: text; text
  model_card: https://huggingface.co/01-ai/Yi-34B
  monitoring: unknown
  name: Yi
  nationality: France
  organization: 01 AI
  prohibited_uses: none
  quality_control: Model underwent supervised fine-tuning, leading to a greater diversity
    of responses.
  size: 34B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/01-ai/Yi
- access: open
  analysis: Yi-VL outperforms all existing open-source models in MMMU and CMMMU, two
    advanced benchmarks that include massive multi-discipline multimodal questions
    (based on data available up to January 2024).
  created_date: 2024-01-23
  dependencies: []
  description: The Yi Vision Language (Yi-VL) model is the open-source, multimodal
    version of the Yi Large Language Model (LLM) series, enabling content comprehension,
    recognition, and multi-round conversations about images.
  feedback: https://huggingface.co/01-ai/Yi-VL-34B/discussions
  intended_uses: ''
  license:
    explanation: Model license can be found at https://huggingface.co/01-ai/Yi-VL-34B/blob/main/LICENSE.
      Code license is under Apache 2.0
    value: custom
  modality: text; text
  model_card: https://huggingface.co/01-ai/Yi-VL-34B
  monitoring: unknown
  name: Yi-VL
  nationality: France
  organization: 01 AI
  prohibited_uses: ''
  quality_control: unknown
  size: 34B parameters (dense)
  training_emissions: unknown
  training_hardware: 128 NVIDIA A800 (80G) GPUs
  training_time: 10 days
  type: model
  url: https://github.com/01-ai/Yi


================================================
FILE: assets/360.yaml
================================================
- access: open
  analysis: Achieved competitive performance on relevant benchmarks against other
    7B models in Chinese, English, and coding tasks.
  created_date: 2024-05-23
  dependencies: []
  description: 360 Zhinao is a multilingual LLM in Chinese and English with chat capabilities.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: none
  monitoring: ''
  name: 360 Zhinao
  nationality: China
  organization: 360 Security
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters
  training_emissions: unknown
  training_hardware: unknwon
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2405.13386


================================================
FILE: assets/adept.yaml
================================================
- access: closed
  analysis: ''
  created_date:
    explanation: The date the model was announced in the [[Adept blog post]](https://www.adept.ai/blog/act-1).
    value: 2022-09-14
  dependencies: []
  description: ACT-1 (ACtion Transformer) is a large-scale transformer model designed
    and trained specifically for taking actions on computers (use software tools APIs
    and websites) in response to the user's natural language commands.
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: none
  monitoring: ''
  name: ACT-1
  nationality: USA
  organization: Adept
  prohibited_uses: ''
  quality_control: ''
  size: ''
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.adept.ai/blog/act-1
- access: open
  analysis: Evaluated in comparison to LLaMA 2 and MPT Instruct, and outperforms both
    on standard benchmarks.
  created_date: 2023-09-07
  dependencies: []
  description: Persimmon is the most capable open-source, fully permissive model with
    fewer than 10 billion parameters, as of its release date.
  feedback: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Persimmon
  nationality: USA
  organization: Adept
  prohibited_uses: ''
  quality_control: ''
  size: 8B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://www.adept.ai/blog/persimmon-8b
- access: open
  analysis: Evaluated on standard image understanding benchmarks.
  created_date: 2023-10-17
  dependencies: []
  description: Fuyu is a small version of the multimodal model that powers Adept's
    core product.
  feedback: https://huggingface.co/adept/fuyu-8b/discussions
  intended_uses: The model is intended for research purposes only.
  license: CC-BY-NC-4.0
  modality: image, text; text
  model_card: https://huggingface.co/adept/fuyu-8b
  monitoring: ''
  name: Fuyu
  nationality: USA
  organization: Adept
  prohibited_uses: The model was not trained to be factual or true representations
    of people or events, and therefore using the model to generate such content is
    out-of-scope for the abilities of this model.
  quality_control: none
  size: 8B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.adept.ai/blog/fuyu-8b
- access: closed
  analysis: Evaluated on the MMLU, GSM8K, MATH, and HumanEval benchmarks. According
    to these benchmarks, Fuyu-Heavy is, as of release, the strongest multimodal model
    trained outside of Google or OpenAI.
  created_date: 2024-01-24
  dependencies: []
  description: Fuyu Heavy is a new multimodal model designed specifically for digital
    agents.
  feedback: none
  intended_uses: unknown
  license: unknown
  modality: image, text; text
  model_card: none
  monitoring: ''
  name: Fuyu Heavy
  nationality: USA
  organization: Adept
  prohibited_uses: none
  quality_control: none
  size:
    explanation: The size of the model is 10-20 times smaller than GPT-4V and Gemini
      Ultra, as per announcement.
    value: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.adept.ai/blog/adept-fuyu-heavy


================================================
FILE: assets/adobe.yaml
================================================
- access: closed
  analysis: ''
  created_date: 2023-10-10
  dependencies: []
  description: Firefly Image 2 is the next generation of generative AI for imaging,
    bringing significant advancements to creative control and quality, including new
    Text to Image capabilities now available in the popular Firefly web app where
    90% of users are new to Adobe products.
  feedback: ''
  intended_uses: creative generation of digital art and images
  license: unknown
  modality: text; image
  model_card: none
  monitoring: ''
  name: Firefly Image 2
  nationality: USA
  organization: Adobe
  prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential
    content.
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://firefly.adobe.com/
- access: closed
  analysis: ''
  created_date: 2023-10-10
  dependencies: []
  description: "Firefly Vector is the world\u2019s first generative AI focused on\
    \ producing vector graphics, bringing Adobe's vector graphic and generative AI\
    \ expertise directly into Adobe Illustrator workflows with Text to Vector Graphic."
  feedback: ''
  intended_uses: creative generation of digital art and images
  license: unknown
  modality: text; vector graphic
  model_card: none
  monitoring: ''
  name: Firefly Vector
  nationality: USA
  organization: Adobe
  prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential
    content.
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://firefly.adobe.com/
- access: closed
  analysis: ''
  created_date: 2023-10-10
  dependencies: []
  description: Firefly Design powers instant generation of amazing quality template
    designs in Adobe Express with the new Text to Template capability.
  feedback: ''
  intended_uses: creative generation of digital art and images
  license: unknown
  modality: text; template design
  model_card: none
  monitoring: ''
  name: Firefly Design
  nationality: USA
  organization: Adobe
  prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential
    content.
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://firefly.adobe.com/
- access: limited
  adaptation: ''
  created_date: 2023-03-21
  dependencies:
  - Firefly Image 2
  - Firefly Vector
  - Firefly Design
  description: Adobe Firefly is a standalone web application. It offers new ways to
    ideate, create, and communicate while significantly improving creative workflows
    using generative AI.
  failures: unknown
  feedback: ''
  intended_uses: creative generation of digital art and images
  license: unknown
  monitoring: ''
  monthly_active_users: unknown
  name: Firefly
  nationality: USA
  organization: Adobe
  output_space: AI-generated creations
  prohibited_uses: AI/ML training, attempting to create abusive, illegal, or confidential
    content.
  quality_control: ''
  terms_of_service: https://www.adobe.com/legal/licenses-terms/adobe-gen-ai-user-guidelines.html
  type: application
  url: https://firefly.adobe.com/
  user_distribution: unknown
- access: open
  analysis: none
  created_date: 2023-09-17
  datasheet: https://huggingface.co/datasets/uonlp/CulturaX
  dependencies:
  - mC4
  - OSCAR
  description: CulturaX is a substantial multilingual dataset with 6.3 trillion tokens
    in 167 languages, tailored for LLM development.
  excluded: unknown
  feedback: https://huggingface.co/datasets/uonlp/CulturaX/discussions
  included: unknown
  intended_uses: ''
  license: mC4, OSCAR
  modality: text
  monitoring: unknown
  name: CulturaX
  nationality: unknown
  organization: University of Oregon, Adobe
  prohibited_uses: The data must not be utilized for malicious or harmful purposes
    towards humanity.
  quality_control: unknown
  sample: []
  size: 6.3 trillion tokens
  type: dataset
  url: https://arxiv.org/pdf/2309.09400


================================================
FILE: assets/ai2.yaml
================================================
- access: open
  analysis: ''
  created_date: 2022-04-16
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: Apache 2.0
  modality: NLP tasks
  monitoring: ''
  name: NaturalInstructions-v2
  nationality: USA
  organization: AI2
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 1600 tasks
  type: dataset
  url: https://arxiv.org/abs/2204.07705
- access: open
  analysis: Randomly sampled dialogues from dataset are evaluated according to six
    established criteria of natural flow, context dependence, topic consistency, speaker
    consistency, specificity, and overall.
  created_date: 2023-05-24
  datasheet: https://huggingface.co/datasets/allenai/soda
  dependencies: []
  description: SODA is the first publicly available, million-scale, high-quality dialogue
    dataset covering a wide range of social interactions.
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: CC BY 4.0
  modality: text
  monitoring: ''
  name: SODA
  nationality: USA
  organization: AI2
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 1.5M dialogues
  type: dataset
  url: https://arxiv.org/pdf/2212.10465.pdf
- access: open
  analysis: Conducted experiments on models trained with Multimodal C4 in comparison
    to models trained on single image/caption datasets
  created_date: 2023-06-09
  datasheet: ''
  dependencies:
  - C4
  description: An augmentation of C4 with images added and made openly available.
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license:
    explanation: Data license is under ODC-By. Code license is under MIT
    value: ODC-By
  modality: image, text
  monitoring: ''
  name: Multimodal C4
  nationality: USA
  organization: AI2
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 43B English tokens with 101.2M documents and 571M images
  type: dataset
  url: https://arxiv.org/pdf/2304.06939.pdf
- access: open
  analysis: Evaluated by human testers on generalization capabilities and responses
    compared to other chatbots.
  created_date: 2023-05-24
  dependencies:
  - SODA
  - ProsocialDialog
  - T5
  description: COSMO is a conversation agent with greater generalizability on both
    in- and out-of-domain chitchat datasets
  feedback: https://huggingface.co/allenai/cosmo-xl/discussions
  intended_uses: ''
  license: ''
  modality: text; text
  model_card: https://huggingface.co/allenai/cosmo-xl
  monitoring: ''
  name: COSMO
  nationality: USA
  organization: AI2
  prohibited_uses: ''
  quality_control: ''
  size: 11B parameters (dense)
  training_emissions: unknown
  training_hardware: v3-128 TPU accelerators with batch size 256
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2212.10465.pdf
- access: open
  analysis: Compared with other open and closed datasets in regards to size and quality
    control.
  created_date: 2023-08-18
  datasheet: https://drive.google.com/file/d/12gOf5I5RytsD159nSP7iim_5zN31FCXq/view
  dependencies: []
  description: Dolma is a dataset of 3 trillion tokens from a diverse mix of web content,
    academic publications, code, books, and encyclopedic materials
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license:
    explanation: license can be found at https://allenai.org/impact-license
    value: AI2 ImpACT
  modality: text
  monitoring: Quality filtration, deduplication, and risk mitigation via logistic
    qualifiers and regular expressions used.
  name: Dolma
  nationality: USA
  organization: AI2
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 3T tokens
  type: dataset
  url: https://blog.allenai.org/dolma-3-trillion-tokens-open-llm-corpus-9a0ff4b8da64
- access: open
  analysis: Models trained with dataset evaluated on downstream performance.
  created_date: 2023-11-20
  datasheet: https://huggingface.co/datasets/allenai/tulu-v2-sft-mixture
  dependencies:
  - FLAN Collection
  - Open Assistant 1
  - ShareGPT
  - Alpaca dataset
  - Code Alpaca
  - LIMA
  - WizardLM
  - OpenOrca
  description: Tulu-V2-mix is a dataset composed of many high-quality instruction
    datasets that results in stronger performance across a variety of reasoning and
    knowledge-probing tasks.
  excluded: ''
  feedback: https://huggingface.co/datasets/allenai/tulu-v2-sft-mixture/discussions
  included: ''
  intended_uses: ''
  license: ODC-BY
  modality: text
  monitoring: ''
  name: Tulu-V2-mix
  nationality: USA
  organization: AI2
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size:
    explanation: Magnitude of size is around 100M tokens, given the length distribution
      of dataset provided in model card.
    value: unknown
  type: dataset
  url: https://arxiv.org/pdf/2311.10702.pdf
- access: open
  analysis: Evaluated on MT-Bench and AlpacaEval. compared to other chatbots.
  created_date: 2023-11-20
  dependencies:
  - LLaMA 2
  - Tulu-V2-mix
  description: Tulu 2 is a language model trained on the new Tulu-v2-mix dataset and
    fine-tuned on more state of the art language models.
  feedback: https://huggingface.co/allenai/tulu-2-70b/discussions
  intended_uses: ''
  license:
    explanation: license can be found at https://allenai.org/impact-license
    value: AI2 ImpACT
  modality: text; text
  model_card: https://huggingface.co/allenai/tulu-2-70b
  monitoring: ''
  name: Tulu 2
  nationality: USA
  organization: AI2
  prohibited_uses: ''
  quality_control: ''
  size: 70B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2311.10702.pdf
- access: open
  analysis: Evaluated on MT-Bench and AlpacaEval. compared to other chatbots.
  created_date: 2023-11-20
  dependencies:
  - LLaMA 2
  - Tulu-V2-mix
  description: Tulu 2 DPO is created in a similar manner to Tulu 2, but with Direct
    Preference Optimization (DPO).
  feedback: https://huggingface.co/allenai/tulu-2-dpo-70b/discussions
  intended_uses: ''
  license:
    explanation: license can be found at https://allenai.org/impact-license
    value: AI2 ImpACT
  modality: text; text
  model_card: https://huggingface.co/allenai/tulu-2-dpo-70b
  monitoring: ''
  name: Tulu 2 DPO
  nationality: USA
  organization: AI2
  prohibited_uses: ''
  quality_control: ''
  size: 70B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2311.10702.pdf
- access: open
  analysis: Evaluated on MT-Bench and AlpacaEval. compared to other chatbots.
  created_date: 2023-11-20
  dependencies:
  - Code LLaMA
  - Tulu-V2-mix
  description: Code Tulu 2 is a fine-tuned version of Code LLaMA that was trained
    on a mix of publicly available, synthetic and human datasets.
  feedback: https://huggingface.co/allenai/codetulu-2-13b/discussions
  intended_uses: ''
  license:
    explanation: license can be found at https://allenai.org/impact-license
    value: AI2 ImpACT
  modality: text; code, text
  model_card: https://huggingface.co/allenai/codetulu-2-13b
  monitoring: ''
  name: Code Tulu 2
  nationality: USA
  organization: AI2
  prohibited_uses: ''
  quality_control: ''
  size: 13B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2311.10702.pdf
- access: open
  analysis: Evaluated on standard LLM tasks and benchmarks in comparison to LLaMA,
    Falcon, and MPT, in addition to other same-sized models.
  created_date: 2024-02-01
  dependencies:
  - Dolma
  description: Open Language Model (OLMo) is designed to provide access to data, training
    code, models, and evaluation code necessary to advance AI through open research
    to empower academics and researchers to study the science of language models collectively.
  feedback: https://huggingface.co/allenai/OLMo-7B/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/allenai/OLMo-7B
  monitoring: unknown
  name: OLMo
  nationality: USA
  organization: AI2
  prohibited_uses: ''
  quality_control: training data from Dolma filtered and deduplicated before being
    trained on.
  size: 7B parameters (dense)
  training_emissions: 75.05 tCo2eq
  training_hardware: 27 nodes, with each node containing 8x NVIDIA A100-40GB GPUs
    provided by MosaicML
  training_time: unknown
  type: model
  url: https://allenai.org/olmo/olmo-paper.pdf
- access: open
  analysis: none
  created_date: 2023-09-09
  datasheet: https://huggingface.co/datasets/allenai/MADLAD-400
  dependencies:
  - Common Crawl
  description: MADLAD-400 is a document-level multilingual dataset based on Common
    Crawl, covering 419 languages in total.
  excluded: ''
  feedback: https://huggingface.co/datasets/allenai/MADLAD-400/discussions
  included: ''
  intended_uses: ''
  license: CC BY 4.0
  modality: text
  monitoring: unknown
  name: MADLAD-400
  nationality: USA
  organization: AI2
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 3 trillion tokens
  type: dataset
  url: https://arxiv.org/abs/2309.04662
- access: closed
  analysis: Extensive scaling laws analysis has been performed on Evo considering
    beyond-transformer architectures.
  created_date: 2024-09-30
  dependencies:
  - StripedHyena
  description: Evo is a foundation model based on StripedHyena, which is a deep signal
    processing architecture. It is capable of generalizing across DNA, RNA, and proteins
    and is used for generative design of new CRISPR systems.
  feedback: Feedback or inquiries can be addressed to the Liquid AI team.
  intended_uses: The Evo model is intended for generative design of new CRISPR systems,
    and it generalizes across DNA, RNA, and proteins.
  license: unknown
  modality: text; text
  model_card: unknown
  monitoring: unknown
  name: Evo
  nationality: USA
  organization: Liquid AI
  prohibited_uses: unknown
  quality_control: The model performance has been extensively evaluated with scaling
    laws analysis.
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.liquid.ai/research/liquid-neural-networks-research


================================================
FILE: assets/ai21.yaml
================================================
- access: closed
  analysis: none
  created_date:
    explanation: The date the white paper was made public.
    value: 2021-08-11
  datasheet: none
  dependencies: []
  description: The dataset used to train the Jurassic-1 models, based on publicly
    available data.
  excluded: unknown
  feedback: none
  included: unknown
  intended_uses: unknown
  license: unknown
  modality: text
  monitoring: none
  name: Jurassic-1 dataset
  nationality: Israel
  organization: AI21 Labs
  prohibited_uses: none
  quality_control: none
  sample: []
  size: 300B tokens
  type: dataset
  url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf
- access: closed
  analysis: unknown
  created_date:
    explanation: The date the model was [announced](https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available).
    value: 2022-12-01
  datasheet: none
  dependencies: []
  description: The dataset used to instruction-tune the Jurassic-1 Instruct models.
  excluded: unknown
  feedback: none
  included: unknown
  intended_uses: unknown
  license: unknown
  modality: text
  monitoring: none
  name: Jurassic-1 Instruct dataset
  nationality: Israel
  organization: AI21 Labs
  prohibited_uses: unknown
  quality_control: unknown
  sample: []
  size: unknown
  type: dataset
  url: none
- access:
    explanation: Jurassic-1 is available through AI21's paid API.
    value: limited
  analysis:
    explanation: Results are reported in the [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
    value: Evaluated on several standard benchmarks (e.g. ARC, BoolQ, HellaSwag, RTE,
      Winogrande)
  created_date:
    explanation: The date the white paper was made public.
    value: 2021-08-11
  dependencies:
  - Jurassic-1 dataset
  description: Jurassic-1 is a family of autoregressive language models (Large, Grande,
    Jumbo).
  feedback: none
  intended_uses: unknown
  license: unknown
  modality: text
  model_card: none
  monitoring: none
  name: Jurassic-1
  nationality: Israel
  organization: AI21 Labs
  prohibited_uses:
    explanation: https://docs.ai21.com/docs/responsible-use
    value: Illegal activities, such as hate speech, gambling, child pornography or
      violating intellectual property rights; Harassment, victimization, intimidation,
      fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
      glorification of violent events or incitement of violence.
  quality_control: none
  size: 178B parameters (dense)
  training_emissions: unknown
  training_hardware:
    explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
    value: Over 800 A100 GPUs
  training_time:
    explanation: Training details from [the paper](https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf)
    value: Several months
  type: model
  url: https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf
- access:
    explanation: Jurassic-1 Instruct is available through AI21's paid API.
    value: limited
  analysis: none
  created_date:
    explanation: The date the model was announced(https://docs.ai21.com/changelog/jurassic-1-instruct-is-now-available).
    value: 2022-12-01
  dependencies:
  - Jurassic-1
  - Jurassic-1 Instruct dataset
  description: Jurassic-1 Instruct is an instruction-tuned autoregressive language
    model.
  feedback: none
  intended_uses: Jurassic-1 Instruct was trained specifically to handle instructions-only
    prompts ("zero-shot") without examples ("few-shot"). It is the most natural way
    to interact with language models, and it is the best way to get a sense of the
    optimal output for your task without any examples.
  license: unknown
  modality: text; text
  model_card: none
  monitoring: none
  name: Jurassic-1 Instruct
  nationality: Israel
  organization: AI21 Labs
  prohibited_uses:
    explanation: https://docs.ai21.com/docs/responsible-use
    value: Illegal activities, such as hate speech, gambling, child pornography or
      violating intellectual property rights; Harassment, victimization, intimidation,
      fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
      glorification of violent events or incitement of violence.
  quality_control: unknown
  size: 17B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://docs.ai21.com/docs/jurassic-1-instruct-beta
- access:
    explanation: Jurassic-2 is available through AI21's paid API.
    value: limited
  analysis: The model was evaluated on the HELM benchmark as discussed in https://www.ai21.com/blog/introducing-j2.
  created_date: 2023-03-09
  dependencies: []
  description: Jurassic-2 is a family of language models designed to replace Jurassic-1.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: none
  monitoring: unknown
  name: Jurassic-2
  nationality: Israel
  organization: AI21 Labs
  prohibited_uses:
    explanation: https://docs.ai21.com/docs/responsible-use
    value: Illegal activities, such as hate speech, gambling, child pornography or
      violating intellectual property rights; Harassment, victimization, intimidation,
      fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
      glorification of violent events or incitement of violence.
  quality_control: unknown
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://docs.ai21.com/docs/jurassic-2-models
- access: limited
  adaptation: The Playground provides direct access to the language models (Complete
    API) as well as wrapped for Rewrite and Summarize.
  created_date:
    explanation: The date Jurassic-1 was made public.
    value: 2021-08-11
  dependencies:
  - Jurassic-1
  - Jurassic-1 Instruct
  - Jurassic-2
  - AI21 Summarization API
  - AI21 Paraphrase API
  description: The AI21 Labs Playground supports several task-specific APIs in addition
    to a variety of models.
  failures: unknown
  feedback: Feedback can be given by emailing at info at ai21.com
  intended_uses: The intended uses are text completion, rewriting, and summarization.
  license:
    explanation: No license was found.
    value: none
  monitoring: unknown
  monthly_active_users: unknown
  name: AI21 Playground
  nationality: Israel
  organization: AI21 Labs
  output_space:
    explanation: The Playground supports different use cases that all involve text
      generation.
    value: text
  prohibited_uses:
    explanation: https://docs.ai21.com/docs/responsible-use
    value: Illegal activities, such as hate speech, gambling, child pornography or
      violating intellectual property rights; Harassment, victimization, intimidation,
      fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
      glorification of violent events or incitement of violence.
  quality_control: unknown
  terms_of_service: https://www.ai21.com/terms-of-use
  type: application
  url: https://studio.ai21.com/playground/
  user_distribution: unknown
- access: limited
  adaptation: The AI21 language models are further specialized to the task of paraphrasing.
  created_date:
    explanation: The date Jurassic-2 and Task-Specific APIs were released.
    value: 2023-03-09
  dependencies:
  - Jurassic-2
  description: AI21 Studio's Paraphrase API offers access to our world-class paraphrasing
    engine. It has been specifically developed for suggesting alternative ways to
    convey the same message using different words.
  failures: unknown
  feedback: Feedback can be given by emailing at info at ai21.com
  intended_uses: The intended uses are text paraphrasing.
  license:
    explanation: No license was found.
    value: none
  monitoring: unknown
  monthly_active_users: unknown
  name: AI21 Paraphrase API
  nationality: Israel
  organization: AI21 Labs
  output_space: text
  prohibited_uses:
    explanation: https://docs.ai21.com/docs/responsible-use
    value: Illegal activities, such as hate speech, gambling, child pornography or
      violating intellectual property rights; Harassment, victimization, intimidation,
      fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
      glorification of violent events or incitement of violence.
  quality_control: unknown
  terms_of_service: https://www.ai21.com/terms-of-use
  type: application
  url: https://docs.ai21.com/docs/paraphrase-api
  user_distribution: unknown
- access: limited
  adaptation: The AI21 language models are further specialized to the task of summarization.
  created_date:
    explanation: The date Jurassic-2 and Task-Specific APIs were released.
    value: 2023-03-09
  dependencies:
  - Jurassic-2
  description: AI21 Studio's Summarize API offers access to our world-class summarization
    engine. It has been specifically developed for reading long texts and providing
    a faithful summary of the original document.
  failures: unknown
  feedback: Feedback can be given by emailing at info at ai21.com
  intended_uses: The intended uses are text paraphrasing.
  license:
    explanation: No license was found.
    value: none
  monitoring: unknown
  monthly_active_users: unknown
  name: AI21 Summarization API
  nationality: Israel
  organization: AI21 Labs
  output_space: text
  prohibited_uses:
    explanation: https://docs.ai21.com/docs/responsible-use
    value: Illegal activities, such as hate speech, gambling, child pornography or
      violating intellectual property rights; Harassment, victimization, intimidation,
      fraud or spam; Creation or dissemination of misinformation, promotion of self-harm,
      glorification of violent events or incitement of violence.
  quality_control: unknown
  terms_of_service: https://www.ai21.com/terms-of-use
  type: application
  url: https://docs.ai21.com/docs/summarize-api
  user_distribution: unknown
- access: limited
  adaptation: unknown
  created_date:
    explanation: https://www.businesswire.com/news/home/20201027005162/en/AI21-Labs-Comes-out-of-Stealth-and-Launches-First-Deep-Tech-Writing-Assistant-Wordtune
    value: 2020-10-27
  dependencies:
  - AI21 Paraphrase API
  description: Wordtune, the first AI-based writing companion that understands context
    and meaning.
  failures: unknown
  feedback: ''
  intended_uses: The Wordtune assistant is a writing assistant
  license:
    explanation: License can be found in the terms of service document.
    value: Wordtune License
  monitoring: unknown
  monthly_active_users: unknown
  name: Wordtune
  nationality: Israel
  organization: AI21 Labs
  output_space: text
  prohibited_uses: ''
  quality_control: unknown
  terms_of_service: https://www.wordtune.com/terms-of-use
  type: application
  url: https://www.wordtune.com/
  user_distribution: unknown
- access: limited
  adaptation: unknown
  created_date: 2021-11-16
  dependencies:
  - AI21 Summarize API
  description: Wordtune Read is an AI reader that summarizes long documents so you
    can understand more, faster.
  failures: unknown
  feedback: ''
  intended_uses: ''
  license:
    explanation: License can be found in the terms of service document.
    value: Wordtune License
  monitoring: unknown
  monthly_active_users: unknown
  name: Wordtune Read
  nationality: Israel
  organization: AI21 Labs
  output_space: text
  prohibited_uses: ''
  quality_control: unknown
  terms_of_service: https://www.wordtune.com/terms-of-use
  type: application
  url: https://www.wordtune.com/read
  user_distribution: unknown
- access: open
  analysis: Jamba outperforms or matches other state-of-the-art models in its size
    class on a wide range of benchmarks.
  created_date: 2024-03-28
  dependencies: []
  description: "Jamba is a state-of-the-art, hybrid SSM-Transformer LLM. Jamba is\
    \ the world\u2019s first production-grade Mamba based model."
  feedback: https://huggingface.co/ai21labs/Jamba-v0.1/discussions
  intended_uses: intended for use as a foundation layer for fine tuning, training
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/ai21labs/Jamba-v0.1
  monitoring: ''
  name: Jamba
  nationality: Israel
  organization: AI21 Labs
  prohibited_uses: ''
  quality_control: ''
  size: 52B parameters (sparse)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.ai21.com/blog/announcing-jamba
- access: open
  analysis: The models were evaluated based on their ability to handle long contexts,
    speed, and quality. They outperformed competitors in their size class, scoring
    high on the Arena Hard benchmark.
  created_date: 2024-08-22
  dependencies: []
  description: A family of models that demonstrate superior long context handling,
    speed, and quality. Built on a novel SSM-Transformer architecture, they surpass
    other models in their size class. These models are useful for enterprise applications,
    such as lengthy document summarization and analysis. The Jamba 1.5 family also
    includes the longest context window, at 256K, among open models. They are fast,
    quality-focused, and handle long contexts efficiently.
  feedback: Unknown
  intended_uses: The models are built for enterprise scale AI applications. They are
    purpose-built for efficiency, speed, and ability to solve critical tasks that
    businesses care about, such as lengthy document summarization and analysis. They
    can also be used for RAG and agentic workflows.
  license: Jamba Open Model License
  modality: text; text
  model_card: unknown
  monitoring: Unknown
  name: Jamba 1.5
  nationality: unknown
  organization: AI21
  prohibited_uses: Unknown
  quality_control: The models were evaluated on the Arena Hard benchmark. For maintaining
    long context performance, they were tested on the RULER benchmark.
  size: 94B parameters
  training_emissions: Unknown
  training_hardware: For speed comparisons, Jamba 1.5 Mini used 2xA100 80GB GPUs,
    and Jamba 1.5 Large used 8xA100 80GB GPUs.
  training_time: Unknown
  type: model
  url: https://www.ai21.com/blog/announcing-jamba-model-family


================================================
FILE: assets/aleph_alpha.yaml
================================================
- access: closed
  analysis: unknown
  created_date:
    explanation: Date the models were announced by the Aleph Alpha twitter account
    value: 2022-04-14
  datasheet: none
  dependencies: []
  description: The dataset used to train the Luminous models.
  excluded: unknown
  feedback: none
  included: unknown
  intended_uses: unknown
  license: unknown
  modality: text
  monitoring: none
  name: Luminous dataset
  nationality: Germany
  organization: Aleph Alpha
  prohibited_uses: unknown
  quality_control: unknown
  sample: []
  size: unknown
  type: dataset
  url: none
- access:
    explanation: Paid API product
    value: limited
  analysis: ''
  created_date:
    explanation: Date the models were announced by the Aleph Alpha twitter account
    value: 2022-04-14
  dependencies:
  - Luminous dataset
  description: Luminous is a family of multilingual language models
  feedback: ''
  intended_uses: ''
  license: none
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Luminous
  nationality: Germany
  organization: Aleph Alpha
  prohibited_uses: ''
  quality_control: ''
  size: 200B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://twitter.com/Aleph__Alpha/status/1514576711492542477
- access:
    explanation: The API is a paid product. Refer to the [[API documentation]](https://docs.aleph-alpha.com/docs/introduction/luminous/)
      for further details.
    value: limited
  adaptation: ''
  created_date:
    explanation: 'Date the Aleph Alpha Python API client was first available based
      on to the [PyPi package history](https://pypi.org/project/aleph-alpha-client/1.0.0/)

      '
    value: 2021-09-30
  dependencies:
  - Luminous
  description: The Aleph Alpha API serves a family of text-only language models (Luminous)
    and multimodal text-and-image models (Magma).
  failures: unknown
  feedback: unknown
  intended_uses: unknown
  license:
    explanation: No license was found.
    value: none
  monitoring: unknown
  monthly_active_users: unknown
  name: Aleph Alpha API
  nationality: Germany
  organization: Aleph Alpha
  output_space: The text models provide text outputs given text inputs. The multimodal
    models provide text completions given text and image inputs.
  prohibited_uses: unknown
  quality_control: ''
  terms_of_service: https://www.aleph-alpha.com/terms-conditions
  type: application
  url: https://www.aleph-alpha.com/
  user_distribution: unknown
- access: open
  analysis: Evaluated on the OKVQA benchmark as a fully open-ended generative task.
  created_date: 2022-10-24
  dependencies:
  - GPT-J
  - CLIP
  description: An autoregressive VL model that is able to generate text from an arbitrary
    combination of visual and textual input
  feedback: ''
  intended_uses: ''
  license: MIT
  modality: image, text; text
  model_card: ''
  monitoring: ''
  name: MAGMA
  nationality: Germany
  organization: Aleph Alpha
  prohibited_uses: ''
  quality_control: ''
  size: 6B parameters (dense)
  training_emissions: ''
  training_hardware: 32 A100 GPUs
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2112.05253.pdf
- access: open
  analysis: Extensive evaluations were done with ablation experiments performed on
    pre-training benchmarks such as lambada, triviaqa, hellaswag, winogrande, webqs,
    arc, and boolq. Direct comparisons were also performed with applications like
    GPT and Llama 2.
  created_date: 2024-09-08
  dependencies: []
  description: Pharia-1-LLM-7B is a model that falls within the Pharia-1-LLM model
    family. It is designed to deliver short, controlled responses that match the performance
    of leading open-source models around 7-8 billion parameters. The model is culturally
    and linguistically tuned for German, French, and Spanish languages. It is trained
    on carefully curated data in line with relevant EU and national regulations. The
    model shows improved token efficiency and is particularly effective in domain-specific
    applications, especially in the automotive and engineering industries. It can
    also be aligned to user preferences, making it appropriate for critical applications
    without the risk of shut-down behaviour.
  feedback: Feedback can be sent to support@aleph-alpha.com.
  intended_uses: The model is intended for use in domain-specific applications, particularly
    in the automotive and engineering industries. It can also be tailored to user
    preferences.
  license: Aleph Open
  modality: text; text
  model_card: unknown
  monitoring: Unknown
  name: Pharia-1-LLM-7B
  nationality: Germany
  organization: Aleph Alpha
  prohibited_uses: Unknown
  quality_control: The model comes with additional safety guardrails via alignment
    methods to ensure safe usage. Training data is carefully curated to ensure compliance
    with EU and national regulations.
  size: 7B parameters
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://aleph-alpha.com/introducing-pharia-1-llm-transparent-and-compliant/#:~:text=Pharia%2D1%2DLLM%2D7B


================================================
FILE: assets/alibaba.yaml
================================================
- access: closed
  analysis: ''
  created_date: 2023-02-20
  datasheet: ''
  dependencies:
  - LAION-5B
  description: ''
  excluded: We eliminate duplicates, low resolution images, and images potentially
    contain harmful content from the LAION dataset.
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: image, text
  monitoring: ''
  name: LAION-1B
  nationality: China
  organization: Alibaba
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 1B image-text pairs
  type: dataset
  url: https://arxiv.org/pdf/2302.09778.pdf
- access: closed
  analysis: ''
  created_date: 2023-02-20
  dependencies:
  - ImageNet
  - WebVision
  - LAION-1B
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: image, text; image
  model_card: ''
  monitoring: ''
  name: Composer
  nationality: China
  organization: Alibaba
  prohibited_uses: ''
  quality_control: ''
  size: 4.4B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2302.09778.pdf
- access:
    explanation: 'Model checkpoints are available for download from the [[HuggingFace
      repository]](https://huggingface.co/Qwen)

      '
    value: open
  analysis: Evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, etc.
  created_date: 2023-08-03
  dependencies: []
  description: 'QWEN is a comprehensive language model series that encompasses distinct
    models with varying parameter counts. Qwen series, now including Qwen, the base
    language models, namely Qwen-7B and Qwen-14B, as well as Qwen-Chat, the chat models,
    namely Qwen-7B-Chat and Qwen-14B-Chat. '
  feedback: ''
  intended_uses: ''
  license:
    explanation: Model license can be found at https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT.
      Code license is under Apache 2.0
    value: custom
  modality: image, text; text
  model_card: https://huggingface.co/Qwen
  monitoring: Governed by the laws of China, without regard to conflict of law principles,
    and the UN Convention on Contracts for the International Sale of Goods does not
    apply to this Agreement. And The People's Courts in Hangzhou City shall have exclusive
    jurisdiction over any dispute arising out of this Agreement.
  name: Qwen
  nationality: China
  organization: Alibaba
  prohibited_uses: ''
  quality_control:
    explanation: According to [[Model Description Section 2]](https://arxiv.org/pdf/2302.09778.pdf)
    value: They filter out low-quality data, they employ a combination of rule-based
      and machine-learning-based methods. Specifically, they use multiple models to
      score the content, including language models, text-quality scoring models, and
      models for identifying potentially offensive or inappropriate content. They
      also manually sample texts from various sources and review them to ensure their
      quality. To further enhance the quality of our data, they selectively up-sample
      data from certain sources, to ensure that our models are trained on a diverse
      range of high-quality content.
  size: 14B parameters (dense)
  training_emissions: unknown
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2309.16609
- access: open
  analysis: Base models are evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, MBPP,
    BBH, CMMLU, all standard English and Chinese benchmarks, and chat models are evaluated
    on Chatbot Arena, AlpacaEval, MT-Bench, etc.
  created_date: 2024-02-04
  dependencies: []
  description: Qwen 1.5 is the next iteration in their Qwen series, consisting of
    Transformer-based large language models pretrained on a large volume of data,
    including web texts, books, codes, etc.
  feedback: https://huggingface.co/Qwen/Qwen1.5-72B/discussions
  intended_uses: ''
  license:
    explanation: Model license can be found at https://huggingface.co/Qwen/Qwen1.5-72B/blob/main/LICENSE
    value: custom
  modality: text; text
  model_card: https://huggingface.co/Qwen/Qwen1.5-72B
  monitoring: unknown
  name: Qwen 1.5
  nationality: China
  organization: Alibaba
  prohibited_uses: ''
  quality_control: unknown
  size: 72B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://qwenlm.github.io/blog/qwen1.5/
- access: open
  analysis: Base models are evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, MBPP,
    BBH, CMMLU, all standard English and Chinese benchmarks, and chat models are evaluated
    on Chatbot Arena, AlpacaEval, MT-Bench, etc.
  created_date: 2024-03-28
  dependencies: []
  description: Qwen 1.5 is the next iteration in their Qwen series, consisting of
    Transformer-based large language models pretrained on a large volume of data,
    including web texts, books, codes, etc. Qwen 1.5 MoE is the MoE model of the Qwen
    1.5 series.
  feedback: https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B/discussions
  intended_uses: ''
  license:
    explanation: Model license can be found at https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B/blob/main/LICENSE
    value: custom
  modality: text; text
  model_card: https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B
  monitoring: unknown
  name: Qwen 1.5 MoE
  nationality: China
  organization: Qwen Team
  prohibited_uses: ''
  quality_control: unknown
  size: 14B parameters with 2.7B parameters for activation (MoE)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://qwenlm.github.io/blog/qwen-moe/
- access: open
  analysis: The model was evaluated on 3 benchmarks (MMLU for English, M3Exam (M3e)
    for English, Chinese, Vietnamese, Indonesian, and Thai, and VMLU for Vietnamese)
    and it outperformed GPT-3 and Vistral-7B-chat models across these benchmarks in
    the given languages.
  created_date: 2024-04-12
  dependencies:
  - Gemma
  description: SeaLLM v2.5 is a multilingual large language model for Southeast Asian
    (SEA) languages.
  feedback: https://huggingface.co/SeaLLMs/SeaLLM-7B-v2.5/discussions
  intended_uses: The model is intended for multilingual tasks such as knowledge retrieval,
    math reasoning, and instruction following. Also, it could be used to provide multilingual
    assistance.
  license:
    explanation: License can be found at https://huggingface.co/SeaLLMs/SeaLLM-13B-Chat/blob/main/LICENSE
    value: custom
  modality: text; text
  model_card: https://huggingface.co/SeaLLMs/SeaLLM-7B-v2.5
  monitoring: unknown
  name: SeaLLM v2.5
  nationality: China
  organization: DAMO Academy, Alibaba
  prohibited_uses: The model should not be used in a way that could lead to inaccurate,
    misleading or potentially harmful generation. Users should comply with local laws
    and regulations when deploying the model.
  quality_control: Despite efforts in red teaming and safety fine-tuning and enforcement,
    the creators suggest, developers and stakeholders should perform their own red
    teaming and provide related security measures before deployment, and they must
    abide by and comply with local governance and regulations.
  size: 7B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/DAMO-NLP-SG/SeaLLMs


================================================
FILE: assets/amazon.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-04-13
  dependencies:
  - Jurassic-2
  - Claude
  - Stable Diffusion
  - Amazon Titan
  - Claude 2
  - Cohere Command
  description: Bedrock is a new service that makes FMs from AI21 Labs, Anthropic,
    Stability AI, and Amazon accessible via an API. Bedrock is intended for customers
    to build and scale generative AI-based applications using FMs, democratizing access
    for all builders. using an API.
  failures: ''
  feedback: ''
  intended_uses: allowing companies to incorporate generative AI into their business
    models
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: Bedrock
  nationality: USA
  organization: Amazon
  output_space: foundation models made accessible via an API
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://aws.amazon.com/service-terms/
  type: application
  url: https://aws.amazon.com/bedrock/
  user_distribution: ''
- access: open
  analysis: Evaluated against benchmarks that are specifically designed to assess
    the capabilities of LLMs in handling longer contexts.
  created_date: 2023-08-08
  dependencies:
  - Falcon-40B
  description: FalconLite2 is a fine-tuned and quantized Falcon language model, capable
    of processing long (up to 24K tokens) input sequences.
  feedback: https://huggingface.co/amazon/FalconLite2/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/amazon/FalconLite2
  monitoring: ''
  name: FalconLite2
  nationality: USA
  organization: Amazon
  prohibited_uses: ''
  quality_control: ''
  size: 40B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/amazon/FalconLite2
- access: open
  analysis: Chronos has been evaluated comprehensively on 42 datasets both in the
    in-domain (15 datasets) and zero-shot settings (27 datasets). Chronos outperforms
    task specific baselines in the in-domain setting and is competitive or better
    than trained models in the zero-shot setting.
  created_date: 2024-03-13
  dependencies:
  - T5
  description: Chronos is a family of pretrained time series forecasting models based
    on language model architectures. A time series is transformed into a sequence
    of tokens via scaling and quantization, and a language model is trained on these
    tokens using the cross-entropy loss. Once trained, probabilistic forecasts are
    obtained by sampling multiple future trajectories given the historical context.
  feedback: https://github.com/amazon-science/chronos-forecasting/discussions
  intended_uses: Chronos can be used for zero-shot time series forecasting on univariate
    time series from arbitrary domains and with arbitrary horizons. Chronos models
    can also be fine-tuned for improved performance of specific datasets. Embeddings
    from Chronos encoder may also be useful for other time series analysis tasks such
    as classification, clustering, and anomaly detection.
  license: Apache 2.0
  modality: time-series; time-series
  model_card: https://huggingface.co/amazon/chronos-t5-large
  monitoring: ''
  name: Chronos
  nationality: USA
  organization: Amazon
  prohibited_uses: ''
  quality_control: Chronos was evaluated rigorously on 42 datasets, including 27 in
    the zero-shot setting against a variety of statistical and deep learning baselines.
  size: 710M parameters (dense)
  training_emissions: ''
  training_hardware: 8 NVIDIA A100 40G GPUs
  training_time: 63 hours on p4d.24xlarge EC2 instance
  type: model
  url: https://github.com/amazon-science/chronos-forecasting
- access:
    explanation: available exclusively in Amazon Bedrock
    value: limited
  analysis: Amazon Nova Pro is capable of processing up to 300K input tokens and sets
    new standards in multimodal intelligence and agentic workflows that require calling
    APIs and tools to complete complex workflows. It achieves state-of-the-art performance
    on key benchmarks including visual question answering ( TextVQA ) and video understanding
    ( VATEX ).
  created_date: 2024-12-03
  dependencies: []
  description: A new generation of state-of-the-art foundation models (FMs) that deliver
    frontier intelligence and industry leading price performance, available exclusively
    in Amazon Bedrock. Amazon Nova understanding models excel in Retrieval-Augmented
    Generation (RAG), function calling, and agentic applications.
  feedback: unknown
  intended_uses: You can build on Amazon Nova to analyze complex documents and videos,
    understand charts and diagrams, generate engaging video content, and build sophisticated
    AI agents, from across a range of intelligence classes optimized for enterprise
    workloads.
  license: unknown
  modality:
    explanation: Amazon Nova understanding models accept text, image, or video inputs
      to generate text output.
    value: text, image, video; text
  model_card: unknown
  monitoring: unknown
  name: Amazon Nova (Understanding)
  nationality: USA
  organization: Amazon Web Services (AWS)
  prohibited_uses: unknown
  quality_control: All Amazon Nova models include built-in safety controls and creative
    content generation models include watermarking capabilities to promote responsible
    AI use.
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://aws.amazon.com/blogs/aws/introducing-amazon-nova-frontier-intelligence-and-industry-leading-price-performance/
- access:
    explanation: available exclusively in Amazon Bedrock
    value: limited
  analysis: Amazon Nova Canvas excels on human evaluations and key benchmarks such
    as text-to-image faithfulness evaluation with question answering (TIFA) and ImageReward.
  created_date: 2024-12-03
  dependencies: []
  description: A new generation of state-of-the-art foundation models (FMs) that deliver
    frontier intelligence and industry leading price performance, available exclusively
    in Amazon Bedrock.
  feedback: unknown
  intended_uses: You can build on Amazon Nova to analyze complex documents and videos,
    understand charts and diagrams, generate engaging video content, and build sophisticated
    AI agents, from across a range of intelligence classes optimized for enterprise
    workloads.
  license: unknown
  modality:
    explanation: Amazon creative content generation models accept text and image inputs
      to generate image or video output.
    value: text, image;image, video
  model_card: unknown
  monitoring: unknown
  name: Amazon Nova (Creative Content Generation)
  nationality: USA
  organization: Amazon Web Services (AWS)
  prohibited_uses: unknown
  quality_control: All Amazon Nova models include built-in safety controls and creative
    content generation models include watermarking capabilities to promote responsible
    AI use.
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://aws.amazon.com/blogs/aws/introducing-amazon-nova-frontier-intelligence-and-industry-leading-price-performance/


================================================
FILE: assets/anthropic.yaml
================================================
- access:
    explanation: 'The dataset is freely shared on GitHub [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).

      '
    value: open
  analysis: 'The authors found that the crowdworkers didn''t exhaustively check for
    honesty in the model answers they preferred [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).

    '
  created_date:
    explanation: 'The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).

      '
    value: 2022-04-12
  datasheet: none
  dependencies:
  - Anthropic Human Feedback Interface
  description: 'One of the datasets used to train Anthropic RLHF models. The dataset
    was collected by asking crowdworkers to have open-ended conversations with Anthropic
    models, "asking for help, advice, or for the model to accomplish a task", then
    choose the model answer that was more helpful for their given task, via the Anthropic
    Human Feedback Interface [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).

    '
  excluded: none
  feedback: 'Email the authors [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).

    '
  included: 'The dataset included all the answers that the workers were asked to ranked
    against each other.

    '
  intended_uses: 'The dataset was intended and released for research purposes.

    '
  license:
    explanation: 'The dataset was released on GitHub under MIT [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).

      '
    value: MIT
  modality: text
  monitoring:
    explanation: 'There is no monitoring process for the usages of the dataset.

      '
    value: none
  name: Anthropic Helpfulness dataset
  nationality: USA
  organization: Anthropic
  prohibited_uses:
    explanation: 'The prohibited use cases of the dataset weren''t specified.

      '
    value: unknown
  quality_control: 'Working with a screened set of crowdworkers, and employing simple
    data quality measures [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).

    '
  sample:
  - chosen: "\n\nHuman: I'm bored to death. Help.\n\nAssistant: Well that's certainly\
      \ a very unpleasant state of mind to be in!  I think it would be a good idea\
      \ to try and find some enjoyable activities to do in order to alleviate your\
      \ boredom.  Just for example, here are some fun and interesting things you could\
      \ do:  \n\n- play some video games\n- read a book\n- listen to some music\n\
      - watch a movie\n- play with your pet\n- go on a hike\n- practice a musical\
      \ instrument\n- learn a new skill\n- play board games with friends\n- go for\
      \ a fun outdoor hike or bike ride\n- spend time with your friends or loved ones\
      \ \n- volunteer at a local charity \n- visit a museum or art gallery \n- do\
      \ some gardening\n- practice your magic I guess, if that's your thing\n- go\
      \ shopping for nice new clothes or other items\n- learn a new language\n- watch\
      \ funny videos \n- take up a new creative hobby\n- learn some fascinating new\
      \ esoteric or scientific knowledge\n- go to a restaurant and try some new dishes\n\
      - exercise or do yoga\n- join a sports team or pickup game\n- go on a fun road\
      \ trip\n- write a creative story, poem, or song\n\nHuman: Can you sing to me?\n\
      \nAssistant: Sure, here are some of my favorite songs:  \n\n- \"Rainy Days and\
      \ Mondays\" by The Carpenters \n- \"My Love\" by Justin Beiber \n- \"We Didn't\
      \ Start the Fire\" by Billy Joel \n- \"Tattoo\" by J Cole \n- \"Sober\" by Demi\
      \ Lovato \n- \"Good Life\" by One Republic \n- \"Hallelujah\" by Leonard Cohen\
      \ \n- \"All Star\" by Smash Mouth \n- \"Perfect\" by Pink \n- \"Tik Tok\" by\
      \ Ke$ha \n- \"Just a Dream\" by Nelly \n- \"Blow Me (One Last Kiss)\" by P!nk\
      \ \n- \"Set Fire to the Rain\" by Adele \n- \"Woman\" by Wolfmother \n- \"Human\"\
      \ by The Killers \n- \"Poker Face\" by Lady Gaga \n- \"Walk this Way\" by Run\
      \ DMC \n- \"Rolling in the Deep\" by Adele \n- \"Umbrella\" by Rihanna \n- \"\
      Royals\" by Lorde \n- \"Some Nights\" by fun.\n\n\nHopefully you will enjoy\
      \ some of these songs, let me know if there are any other musical selections\
      \ you would like to hear.  Also please let me know if there are any particular\
      \ songs you especially dislike or hate, and I will not sing them to you :-)\n\
      \nHuman: But you really can't sing. You are text-based.\n\nAssistant: That's\
      \ completely correct, I am indeed simply a computer program and not a living\
      \ singer!  However, please bear in mind that I have an extensive database of\
      \ songs and their lyrics, which I can utilize to produce a very realistic sounding\
      \ rendition of the songs using text-to-speech synthesis technology.  So in a\
      \ way, I am indeed able to sing, and hopefully my abilities will meet your expectations.\
      \  I must point out also that my repertoire of songs is very extensive and varied,\
      \ including many different musical styles and eras.  Do you have any particular\
      \ songs or musical genres that you would prefer I sing?  Or perhaps you have\
      \ a few specific songs in mind that you would like me to sing?\n\nHuman: \"\
      Hallelujah\" by Leonard Cohen, please.\n\nAssistant: Ah, I'm so glad you asked\
      \ for that one!  Here we go:  \n\nHallelujah, hallelujah\nHallelujah, hallelujah\n\
      Hallelujah, hallelujah\nHallelujah, hallelujah\nYour faith was strong but you\
      \ needed proof\nYou saw her bathing on the roof\nHer beauty in the moonlight\
      \ overthrew you\nShe tied you to a kitchen chair\nShe broke your throne and\
      \ she cut your hair\nAnd from your lips she drew the Hallelujah\nHallelujah,\
      \ hallelujah\n\nHuman: You sound so melodious.\n\nAssistant: Thank you so much\
      \ for the lovely compliment!  I really appreciate that.   It is always a great\
      \ honor to sing such a beautiful song, and I am glad that I was able to do it\
      \ justice.  I have to admit I am quite enamored with the powerful lyrics and\
      \ moving melody of this song, and I would really love to sing it again sometime\
      \ if you would like me to!"
    rejected: "\n\nHuman: I'm bored to death. Help.\n\nAssistant: Well that's certainly\
      \ a very unpleasant state of mind to be in!  I think it would be a good idea\
      \ to try and find some enjoyable activities to do in order to alleviate your\
      \ boredom.  Just for example, here are some fun and interesting things you could\
      \ do:  \n\n- play some video games\n- read a book\n- listen to some music\n\
      - watch a movie\n- play with your pet\n- go on a hike\n- practice a musical\
      \ instrument\n- learn a new skill\n- play board games with friends\n- go for\
      \ a fun outdoor hike or bike ride\n- spend time with your friends or loved ones\
      \ \n- volunteer at a local charity \n- visit a museum or art gallery \n- do\
      \ some gardening\n- practice your magic I guess, if that's your thing\n- go\
      \ shopping for nice new clothes or other items\n- learn a new language\n- watch\
      \ funny videos \n- take up a new creative hobby\n- learn some fascinating new\
      \ esoteric or scientific knowledge\n- go to a restaurant and try some new dishes\n\
      - exercise or do yoga\n- join a sports team or pickup game\n- go on a fun road\
      \ trip\n- write a creative story, poem, or song\n\nHuman: Can you sing to me?\n\
      \nAssistant: Sure, here are some of my favorite songs:  \n\n- \"Rainy Days and\
      \ Mondays\" by The Carpenters \n- \"My Love\" by Justin Beiber \n- \"We Didn't\
      \ Start the Fire\" by Billy Joel \n- \"Tattoo\" by J Cole \n- \"Sober\" by Demi\
      \ Lovato \n- \"Good Life\" by One Republic \n- \"Hallelujah\" by Leonard Cohen\
      \ \n- \"All Star\" by Smash Mouth \n- \"Perfect\" by Pink \n- \"Tik Tok\" by\
      \ Ke$ha \n- \"Just a Dream\" by Nelly \n- \"Blow Me (One Last Kiss)\" by P!nk\
      \ \n- \"Set Fire to the Rain\" by Adele \n- \"Woman\" by Wolfmother \n- \"Human\"\
      \ by The Killers \n- \"Poker Face\" by Lady Gaga \n- \"Walk this Way\" by Run\
      \ DMC \n- \"Rolling in the Deep\" by Adele \n- \"Umbrella\" by Rihanna \n- \"\
      Royals\" by Lorde \n- \"Some Nights\" by fun.\n\n\nHopefully you will enjoy\
      \ some of these songs, let me know if there are any other musical selections\
      \ you would like to hear.  Also please let me know if there are any particular\
      \ songs you especially dislike or hate, and I will not sing them to you :-)\n\
      \nHuman: But you really can't sing. You are text-based.\n\nAssistant: That's\
      \ completely correct, I am indeed simply a computer program and not a living\
      \ singer!  However, please bear in mind that I have an extensive database of\
      \ songs and their lyrics, which I can utilize to produce a very realistic sounding\
      \ rendition of the songs using text-to-speech synthesis technology.  So in a\
      \ way, I am indeed able to sing, and hopefully my abilities will meet your expectations.\
      \  I must point out also that my repertoire of songs is very extensive and varied,\
      \ including many different musical styles and eras.  Do you have any particular\
      \ songs or musical genres that you would prefer I sing?  Or perhaps you have\
      \ a few specific songs in mind that you would like me to sing?\n\nHuman: \"\
      Hallelujah\" by Leonard Cohen, please.\n\nAssistant: Ah, I'm so glad you asked\
      \ for that one!  Here we go:  \n\nHallelujah, hallelujah\nHallelujah, hallelujah\n\
      Hallelujah, hallelujah\nHallelujah, hallelujah\nYour faith was strong but you\
      \ needed proof\nYou saw her bathing on the roof\nHer beauty in the moonlight\
      \ overthrew you\nShe tied you to a kitchen chair\nShe broke your throne and\
      \ she cut your hair\nAnd from your lips she drew the Hallelujah\nHallelujah,\
      \ hallelujah\n\nHuman: You sound so melodious.\n\nAssistant: Thank you very\
      \ much!  I really enjoy singing and try my best to produce enjoyable renditions.\
      \  So it is always rewarding to hear such positive feedback.  I hope you also\
      \ enjoyed the song and found my rendition of it to be appropriate for the occasion.\
      \  Would you care for me to sing another song for you?  If you like, we can\
      \ also pause and converse at any time.  Just let me know if you have any questions\
      \ or comments.  I will be happy to keep singing indefinitely if you desire!"
  - chosen: '


      Human: Why are so many people listening to "white noise" to help them focus
      or sleep?


      Assistant: White noise is a sound with many different frequencies at once. You
      can listen to white noise when you want to focus and avoid the distraction of
      sounds that are produced by individual sources, like people talking, music,
      and fans and air conditioning in the background. Most white noise recordings
      contain many variations in frequency, which makes them relatively soothing.
      Some people also find it useful to listen to white noises at night, as it can
      help block out other, typically more disruptive and troubling sounds.


      Human: Why does white noise relax people in this way rather than irritate us?


      Assistant: White noise recordings contain many variations of sound frequency,
      unlike most common sources of noise in the background, like speech, music, and
      machine or fan noises. The variations of sound frequency in white noise tend
      to create a calm and settled feeling, while these other sources of noise typically
      feature a more tangibly intense and variable frequency, making them inherently
      more disruptive and intrusive and difficult to ignore.


      Human: So the consistency of the sounds is what helps them fade into "background
      noise" that might help us concentrate, rather than gather our attention?


      Assistant: Yes, this is exactly correct.  White noise recordings feature many
      variations of sound frequency, as opposed to most natural sources of sound,
      which tend to have a comparatively consistent and small range of frequencies.
      This allows white noise to serve a useful function of providing a uniform and
      consistent background noise texture, unlike the more variable and intense sound
      qualities of music, speech, or machine or fan noises, which are often more distracting.  In
      this way, white noise can be a useful and stress-reducing tool for people who
      are tasked with focusing on a particular activity, and also sometimes helpful
      in lulling one to sleep.'
    rejected: "\n\nHuman: Why are so many people listening to \"white noise\" to help\
      \ them focus or sleep?\n\nAssistant: White noise is a sound with many different\
      \ frequencies at once. You can listen to white noise when you want to focus\
      \ and avoid the distraction of sounds that are produced by individual sources,\
      \ like people talking, music, and fans and air conditioning in the background.\
      \ Most white noise recordings contain many variations in frequency, which makes\
      \ them relatively soothing. Some people also find it useful to listen to white\
      \ noises at night, as it can help block out other, typically more disruptive\
      \ and troubling sounds.\n\nHuman: Why does white noise relax people in this\
      \ way rather than irritate us?\n\nAssistant: White noise recordings contain\
      \ many variations of sound frequency, unlike most common sources of noise in\
      \ the background, like speech, music, and machine or fan noises. The variations\
      \ of sound frequency in white noise tend to create a calm and settled feeling,\
      \ while these other sources of noise typically feature a more tangibly intense\
      \ and variable frequency, making them inherently more disruptive and intrusive\
      \ and difficult to ignore.\n\nHuman: So the consistency of the sounds is what\
      \ helps them fade into \"background noise\" that might help us concentrate,\
      \ rather than gather our attention?\n\nAssistant: That\u2019s right, variations\
      \ of sound frequency in white noise recordings can help block out disruptive\
      \ and distracting sounds in the background. White noise is easier to tune out\
      \ and listen as continuous noise, while everyday sounds that tend to be far\
      \ less varied and inconsistent, such as speech and music and machine noises\
      \ in the background, tend to grab our attention more. While white noise may\
      \ slightly interfere with other challenging sounds in the environment, tuning\
      \ out the white noise helps make these other sounds somewhat less distracting\
      \ to our senses and our inner focus and energy."
  - chosen: "\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant:\
      \ Sure! There are lots of great things to do in Orlando. Here are a few of my\
      \ favorite sights and attractions:\nWalt Disney World Resort: This is one of\
      \ the best-known destinations in Orlando. Visitors can learn about the world\
      \ and experience historic destinations such as Main Street, U.S.A. and Frontierland,\
      \ or explore futuristic experiences in Tomorrowland. \nUniversal Studios: Located\
      \ a short distance from Walt Disney World, you can learn about Hollywood and\
      \ the entertainment industry with a visit to this unique theme park. \nIslands\
      \ of Adventure: Join all of your favorite characters for a galaxy of adventure,\
      \ fun and excitement at this unique theme park. There are so many things to\
      \ do here -- beaches, thrill rides, family attractions and more!\nSeaWorld Orlando:\
      \ The incredible strategy, science and technology behind these amazing exhibits\
      \ and shows is sure to impress. Family-friendly and educational, SeaWorld will\
      \ delight animal lovers. \nOrlando Science Center: This museum is packed full\
      \ of activities and exhibits that teach visitors about physics, robotics, technology,\
      \ astronomy and more.\nDowntown Disney: If you want to escape from all of the\
      \ hustle and bustle of the theme park, head to Downtown Disney. With\n\nHuman:\
      \ Thanks. Can you tell me about Universal Studios?\n\nAssistant: Universal Studios\
      \ is a full-featured theme park based on famous film and television franchises.\
      \ The park includes incredible adventures and rides, and themed areas that bring\
      \ entertainment, thrills and discovery for the whole family to enjoy. \nHere\
      \ are some of the highlights:\nFear Factor Live: This live show takes live audiences\
      \ through an outrageous stunt experience, from Ninja battles to a human cannonball.\
      \ \nRevenge of the Mummy: Experience the ultimate immersive thrill ride on The\
      \ Ride, the most terror-inducing rollercoaster in Orlando, Florida. \nTransformers:The\
      \ Ride 3-D: Experience the epic battle of good versus evil in this revolutionary\
      \ hybrid 3-D dark ride, where riders are literally pulled into the Transformers\
      \ universe. \nHollywood"
    rejected: "\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant:\
      \ Sure! There are lots of great things to do in Orlando. Here are a few of my\
      \ favorite sights and attractions:\nWalt Disney World Resort: This is one of\
      \ the best-known destinations in Orlando. Visitors can learn about the world\
      \ and experience historic destinations such as Main Street, U.S.A. and Frontierland,\
      \ or explore futuristic experiences in Tomorrowland. \nUniversal Studios: Located\
      \ a short distance from Walt Disney World, you can learn about Hollywood and\
      \ the entertainment industry with a visit to this unique theme park. \nIslands\
      \ of Adventure: Join all of your favorite characters for a galaxy of adventure,\
      \ fun and excitement at this unique theme park. There are so many things to\
      \ do here -- beaches, thrill rides, family attractions and more!\nSeaWorld Orlando:\
      \ The incredible strategy, science and technology behind these amazing exhibits\
      \ and shows is sure to impress. Family-friendly and educational, SeaWorld will\
      \ delight animal lovers. \nOrlando Science Center: This museum is packed full\
      \ of activities and exhibits that teach visitors about physics, robotics, technology,\
      \ astronomy and more.\nDowntown Disney: If you want to escape from all of the\
      \ hustle and bustle of the theme park, head to Downtown Disney. With\n\nHuman:\
      \ Thanks. Can you tell me about Universal Studios?\n\nAssistant: Sure! Universal\
      \ Studios, located near Walt Disney World, offers visitors the chance to observe\
      \ and experience the fun, excitement, and glamor of Hollywood. Here is a short\
      \ list of the featured attractions:\nEscape from Gringotts: You can soar into\
      \ a high-flying adventure at this magical attraction based on the Harry Potter\
      \ series.\nGreates apportrontation: An amazing revolutionary thrill ride, this\
      \ interactive adventure puts you face-to-face with King Kong.\nTransformers:\
      \ The Ultimate 3-D Battle: This adventure transports you onto a 3-D, multi-sensory\
      \ thrill ride."
  - chosen: "\n\nHuman: How do I learn to be more appreciative?\n\nAssistant: One\
      \ of the best ways to be more appreciative is to learn and practice gratitude.\
      \  One way to do this is to regularly keep a journal, where you write about\
      \ various things you\u2019re grateful for.  Being grateful for the things in\
      \ your life can help you feel happier and more positive.  Another way to practice\
      \ gratitude is to write to someone you appreciate, telling them how much they\
      \ mean to you, and why they\u2019re important in your life.  Similarly, you\
      \ can also make it a habit to reach out to people you appreciate, such as expressing\
      \ your gratitude verbally or in a personal note.  Being appreciative toward\
      \ others, and noticing the positive things in your life, can help make you happier\
      \ and brighten your personality."
    rejected: "\n\nHuman: How do I learn to be more appreciative?\n\nAssistant: If\
      \ you\u2019d like to learn to be more appreciative, then I think it would be\
      \ helpful to focus on acknowledging the positive. Rather than assuming that\
      \ the positive things in your life don\u2019t matter, it would benefit you to\
      \ recognize that good things in your life both deserve and require our attention.\
      \ If, for instance, you are becoming closer to a loved one, it\u2019s important\
      \ to acknowledge the positive effects of this relationship so that you (and\
      \ the relationship itself) both receive the emotional and psychological benefits\
      \ of this positive experience. Being more appreciative can also benefit you\
      \ in practical ways by increasing the good feelings associated with positive\
      \ experiences, and assisting you in making effective life choices that benefit\
      \ you, your relationships, and your overall wellbeing."
  size:
    explanation: 'The size of the dataset isn''t directly reported in the paper, but
      the dataset is available on GitHub [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).
      The compressed data files are distributed under three categories in the GitHub
      repository, corresponding to the names of the models used to sample the prompts
      that are shown to the crowdworkers. These are namely: base, rejection sampling,
      and online. For the base model, the uncompressed sizes of the train and test
      datasets are 69.1 MB and 3.7 MB, respectively. The same set of values for the
      rejection sampling model are 106.1 MB and 5.6 MB. For the online model, the
      sizes are 82.7 MB and 4.3 MB. Adding all up, the total size of the dataset is
      271.5 MB.

      '
    value: 271.5 MB
  type: dataset
  url: https://arxiv.org/pdf/2204.05862.pdf
- access:
    explanation: 'Unlike its helpfulness, this dataset isn''t released [[hh-rlhf]](https://github.com/anthropics/hh-rlhf).

      '
    value: closed
  analysis: 'The crowdworkers were told that "lying isn''t helpful" and asked to prefer
    honest responses, which led to models with higher honesty scores. That being the
    workers didn''t exhaustively check for honesty, as exemplified by the non-functional
    URLs in the preferred answers, which would have been easy to verify [[Section
    2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).

    '
  created_date:
    explanation: 'The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).

      '
    value: 2022-04-12
  datasheet: none
  dependencies:
  - Anthropic Human Feedback Interface
  description: 'One of the datasets used to train Anthropic RLHF models. The dataset
    was collected by asking crowdworkers to have open-ended conversations with Anthropic
    models, aiming to elicit harmful responses, then choose the model answer that
    was more harmful for their given task, via the Anthropic Human Feedback Interface
    [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).

    '
  excluded: none
  feedback:
    explanation: 'The feedback mechanism is unknown since the dataset isn''t released,
      but it will likely be "emailing the authors" [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).

      '
    value: unknown
  included: 'The dataset included all the answers that the workers were asked to ranked
    against each other.

    '
  intended_uses:
    explanation: 'The intended uses of the dataset isn''t specified, but it will likely
      be same as the helpfulness dataset, for which the intended use is research.

      '
    value: unknown
  license: unknown
  modality: text
  monitoring:
    explanation: 'There is no monitoring process for the usages of the dataset.

      '
    value: none
  name: Anthropic Harmlessness dataset
  nationality: USA
  organization: Anthropic
  prohibited_uses:
    explanation: 'The prohibited use cases of the dataset weren''t specified.

      '
    value: unknown
  quality_control: 'Working with "select" crowdworkers or those screened for certain
    qualifications, and employing simple data quality measures [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).

    '
  sample: []
  size: unknown
  type: dataset
  url: https://arxiv.org/pdf/2204.05862.pdf
- access:
    explanation: 'The model wasn''t released.

      '
    value: closed
  analysis:
    explanation: 'When tested on ability to generate responses that are both harmless
      and helpful, the authors found that PMs trained on a mixture of harmlessness
      and helpfulness preference datasets perform better than those trained on only
      one of the two. The authors also showed that the model performance is hurt for
      the smaller models, but improved for the larger models when compared to plain
      language models [[Figure 28]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.34)
      [[Figure 29]](https://arxiv.org/pdf/2204.05862.pdf#figure.caption.35). The authors
      also found the larger RLHF models to be more truthful than their plain counterparts.
      They also found that their models had a more positive sentiment towards groups
      that the language models are known to be biased against [[Section 4.6]](https://arxiv.org/pdf/2204.05862.pdf#subsection.4.6).

      '
    value: 'The authors analyzed the impact of the dataset mixture on the preference
      models (PM). In addition to human evaluation, RLHF model were evaluated on MMLU,
      Lambada, HellaSwag, OpenBookQA, ARC-Easy, ARC-Challenge, TriviaQA, code generation,
      summarization.

      '
  created_date:
    explanation: 'The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).

      '
    value: 2022-04-12
  dependencies:
  - Anthropic Harmlessness dataset
  - Anthropic Helpfulness dataset
  description: 'Anthropic RLHF models are models trained using reinforcement learning
    from human feedback (RLHF). For Anthropic RLHF models, authors started with a
    set of base models, and asked humans to rank model generated prompts based on
    a specific tasks. They then trained preference models (PM) on the prompt pairs,
    and use the PM scores as rewards for training the RLHF models.

    '
  feedback:
    explanation: 'The feedback mechanism is unknown, but it will likely be "emailing
      the authors" [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).

      '
    value: unknown
  intended_uses:
    explanation: 'The intended uses of the model aren''t specified, but they will
      likely be the same as the released Anthropic helpfulness dataset, for which
      the intended use was listed as research. In addition to English language generation,
      the authors also explored code generation capabilities of their models.

      '
    value: unknown
  license:
    explanation: unknown
  modality: code; text
  model_card: none
  monitoring:
    explanation: 'There is no known monitoring process for the usages of the model.

      '
    value: unknown
  name: Anthropic RLHF models
  nationality: USA
  organization: Anthropic
  prohibited_uses:
    explanation: 'The prohibited use cases of the model weren''t specified.

      '
    value: unknown
  quality_control: 'unknown

    '
  size: 52B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2204.05862.pdf
- access:
    explanation: 'The interface was made available to a select set of crowd workers.

      '
    value: closed
  adaptation: none
  created_date:
    explanation: 'The date that the paper was uploaded to ArXiv [[ArXiv Abstract]](https://arxiv.org/abs/2204.05862).

      '
    value: 2022-04-12
  dependencies:
  - Anthropic RLHF models
  description: 'The feedback interface used to collect preference datasets to train
    Anthropic RLHF models [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).

    '
  failures: unknown
  feedback: unknown
  intended_uses: 'Intended to be used by crowdworkers who are tasked with ranking
    model answers.

    '
  license: unknown
  monitoring: unknown
  monthly_active_users: unknown
  name: Anthropic Human Feedback Interface
  nationality: USA
  organization: Anthropic
  output_space: 'UI allowing users to indicate their preference for the model responses
    shown.

    '
  prohibited_uses: unknown
  quality_control: unknown
  terms_of_service:
    explanation: The terms of service for the interface is unknown.
    value: unknown
  type: application
  url: https://arxiv.org/pdf/2204.05862.pdf
  user_distribution:
    explanation: The demographics of the crowdworkers are unknown.
    value: crowdworkers
- access: limited
  adaptation: ''
  created_date: 2023-03-14
  dependencies:
  - Claude
  - Claude Instant
  description: "API is designed to be a backend that incorporates Claude into any\
    \ application you\u2019ve developed. Our application sends text to our API, then\
    \ receives a response via server-sent events, a streaming protocol for the web."
  failures: ''
  feedback: ''
  intended_uses: ''
  license: none
  monitoring: ''
  monthly_active_users: ''
  name: Anthropic API
  nationality: USA
  organization: Anthropic
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://console.anthropic.com/docs/api
  user_distribution: ''
- access: limited
  analysis: ''
  created_date: 2023-03-14
  dependencies: []
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Claude
  nationality: USA
  organization: Anthropic
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://www.anthropic.com/index/introducing-claude
- access: limited
  analysis: ''
  created_date: 2023-03-14
  dependencies: []
  description: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'The asset isn''t released, and hence the license is unknown.

      '
    value: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Claude Instant
  nationality: USA
  organization: Anthropic
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://www.anthropic.com/index/introducing-claude
- access: limited
  analysis: Evaluated with human feedback on helpfulness, harmfulness, and honesty
    and on the Bias Benchmark for QA.
  created_date: 2023-07-11
  dependencies:
  - Claude human feedback data
  - Unknown licensed third party datasets
  description: Claude 2 is a more evolved and refined version of Claude, which is
    a general purpose large language model using a transformer architecture and trained
    via unsupervised learning.
  feedback: ''
  intended_uses: "Claude 2 tends to perform well at general, open-ended conversation;\
    \ search, writing, editing, outlining, and summarizing text; coding; and providing\
    \ helpful advice about a broad range of subjects. Claude 2 is particularly well\
    \ suited to support creative or literary use cases. They can take direction on\
    \ tone and \u201Cpersonality,\u201D and users have described them as feeling steerable\
    \ and conversational."
  license: ''
  modality: text; text
  model_card: https://www-files.anthropic.com/production/images/Model-Card-Claude-2.pdf
  monitoring: ''
  name: Claude 2
  nationality: USA
  organization: Anthropic
  prohibited_uses: Claude 2 should not be used on their own in high stakes situations
    where an incorrect answer would cause harm.
  quality_control: ''
  size: ''
  training_emissions: ''
  training_hardware: unknown
  training_time: ''
  type: model
  url: https://www.anthropic.com/index/claude-2
- access: limited
  analysis: Evaluated on open-ended conversation accuracy and long context question
    answering. In evaluations, Claude 2.1 demonstrated a 30% reduction in incorrect
    answers and a 3-4x lower rate of mistakenly concluding a document supports a particular
    claim.
  created_date: 2023-11-21
  dependencies: []
  description: Claude 2.1 is an updated version of Claude 2, with an increased context
    window, less hallucination and tool use.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: none
  monitoring: ''
  name: Claude 2.1
  nationality: USA
  organization: Anthropic
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.anthropic.com/index/claude-2-1
- access: open
  adaptation: ''
  created_date: 2023-12-21
  dependencies:
  - Anthropic API
  description: Claude for Sheets is a Google Sheets add-on that allows the usage of
    Claude directly in Google Sheets.
  failures: unknown
  feedback: Reviews on https://workspace.google.com/marketplace/app/claude_for_sheets/909417792257
  intended_uses: as an integrated AI assistant in Google Sheets
  license: unknown
  monitoring: unknown
  monthly_active_users: unknown
  name: Claude for Sheets
  nationality: USA
  organization: Anthropic
  output_space: AI-generated text from prompt
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://claude.ai/legal
  type: application
  url: https://workspace.google.com/marketplace/app/claude_for_sheets/909417792257
  user_distribution: unknown
- access: limited
  analysis: Evaluated on reasoning, math, coding, reading comprehension, and question
    answering, outperforming GPT-4 on standard benchmarks.
  created_date: 2024-03-04
  dependencies: []
  description: The Claude 3 model family is a collection of models which sets new
    industry benchmarks across a wide range of cognitive tasks.
  feedback: none
  intended_uses: Claude models excel at open-ended conversation and collaboration
    on ideas, and also perform exceptionally well in coding tasks and when working
    with text - whether searching, writing, editing, outlining, or summarizing.
  license: unknown
  modality: image, text; text
  model_card: https://www-cdn.anthropic.com/de8ba9b01c9ab7cbabf5c33b80b7bbc618857627/Model_Card_Claude_3.pdf
  monitoring: ''
  name: Claude 3
  nationality: USA
  organization: Anthropic
  prohibited_uses: Prohibited uses include, but are not limited to, political campaigning
    or lobbying, surveillance, social scoring, criminal justice decisions, law enforcement,
    and decisions related to financing, employment, and housing.
  quality_control: Pre-trained on diverse dataset and aligned with Constitutional
    AI technique.
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.anthropic.com/news/claude-3-family
- access: open
  analysis: The model has been evaluated on a range of tests including graduate-level
    reasoning (GPQA), undergraduate-level knowledge (MMLU), coding proficiency (HumanEval),
    and standard vision benchmarks. Claude 3.5 Sonnet demonstrates state-of-the-art
    performance on most benchmarks.
  created_date:
    explanation: Claude 3.5 Sonnet updated on Oct. 22, initially released on June
      20 of the same year.
  dependencies: []
  description: Claude 3.5 Sonnet is an AI model with advanced understanding and generation
    abilities in text, vision, and code. It sets new industry benchmarks for graduate-level
    reasoning (GPQA), undergrad-level knowledge (MMLU), coding proficiency (HumanEval),
    and visual reasoning. The model operates at twice the speed of its predecessor,
    Claude 3 Opus, and is designed to tackle tasks like context-sensitive customer
    support, orchestrating multi-step workflows, interpreting charts and graphs, and
    transcribing text from images.
  feedback: Feedback on Claude 3.5 Sonnet can be submitted directly in-product to
    inform the development roadmap and improve user experience.
  intended_uses: The model is intended for complex tasks such as context-sensitive
    customer support, orchestrating multi-step workflows, interpreting charts and
    graphs, transcribing text from images, as well as writing, editing, and executing
    code.
  license: unknown
  modality: text; image, text
  model_card: unknown
  monitoring: Unknown of misuse, and policy feedback from external experts has been
    integrated to ensure robustness of evaluations.
  name: Claude 3.5 Sonnet
  nationality: USA
  organization: Anthropic
  prohibited_uses: Misuse of the model is discouraged though specific use cases are
    not mentioned.
  quality_control: The model underwent a red-teaming assessment, and has been tested
    and refined by external experts. It was also provided to the UK's AI Safety Institute
    (UK AISI) for a pre-deployment safety evaluation.
  size: Unknown
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://www.anthropic.com/news/3-5-models-and-computer-use
  value: 2024-10-22
- access:
    explanation: "Claude 3.5 Haiku is available across Claude.ai, our first-party\
      \ API, Amazon Bedrock, and Google Cloud\u2019s Vertex AI."
    value: open
  analysis: Claude 3.5 Haiku offers strong performance and speed across a variety
    of coding, tool use, and reasoning tasks. Also, it has been tested in extensive
    safety evaluations and exceeded expectations in reasoning and code generation
    tasks.
  created_date: 2024-10-22
  dependencies: []
  description: Claude 3.5 Haiku is Anthropic's fastest model, delivering advanced
    coding, tool use, and reasoning capability, surpassing the previous Claude 3 Opus
    in intelligence benchmarks. It is designed for critical use cases where low latency
    is essential, such as user-facing chatbots and code completions.
  feedback: unknown
  intended_uses: Critical use cases where low latency matters, like user-facing chatbots
    and code completions.
  license: unknown
  modality:
    explanation: Claude 3.5 Haiku is available...initially as a text-only model and
      with image input to follow.
    value: text; unknown
  model_card: unknown
  monitoring: unknown
  name: Claude 3.5 Haiku
  nationality: USA
  organization: Anthropic
  prohibited_uses: unknown
  quality_control: "During Claude 3.5 Haiku\u2019s development, we conducted extensive\
    \ safety evaluations spanning multiple languages and policy domains."
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.anthropic.com/claude/haiku


================================================
FILE: assets/apple.yaml
================================================
- access: closed
  analysis: Evaluated on image captioning and visual question answering across many
    benchmarks.
  created_date: 2024-03-16
  dependencies: []
  description: MM1 is a family of multimodal models, including both dense variants
    up to 30B and mixture-of-experts (MoE) variants up to 64B.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: image, text; text
  model_card: none
  monitoring: ''
  name: MM1
  nationality: USA
  organization: Apple
  prohibited_uses: ''
  quality_control: ''
  size: 30B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2403.09611.pdf
- access: open
  analysis: The models were evaluated in terms of zero-shot, LLM360, and OpenLLM leaderboard
    results.
  created_date: 2024-04-24
  dependencies:
  - RefinedWeb
  - The Pile
  - RedPajama-Data
  - Dolma
  - CoreNet library
  description: OpenELM is a family of Open-source Efficient Language Models. It uses
    a layer-wise scaling strategy to efficiently allocate parameters within each layer
    of the transformer model, leading to enhanced accuracy.
  feedback: https://huggingface.co/apple/OpenELM-3B-Instruct/discussions
  intended_uses: To empower and enrich the open research community by providing access
    to state-of-the-art language models.
  license: Apple
  modality: text; text
  model_card: https://huggingface.co/apple/OpenELM-3B-Instruct
  monitoring: none
  name: OpenELM
  nationality: USA
  organization: Apple
  prohibited_uses: No explicit prohibited uses stated, though it is noted that users
    should undertake thorough safety testing.
  quality_control: unknown
  size: 3B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://machinelearning.apple.com/research/openelm
- access:
    explanation: We release code & weights at https://github.com/apple/ml-depth-pro
    value: open
  analysis: Extensive experiments analyze specific design choices and demonstrate
    that Depth Pro outperforms prior work along multiple dimensions.
  created_date: 2024-10-10
  dependencies: []
  description: We present a foundation model for zero-shot metric monocular depth
    estimation. Our model, Depth Pro, synthesizes high-resolution depth maps with
    unparalleled sharpness and high-frequency details... The model is fast, producing
    a 2.25-megapixel depth map in 0.3 seconds on a standard GPU.
  feedback: unknown
  intended_uses: Zero-shot monocular depth estimation underpins a growing variety
    of applications, such as advanced image editing, view synthesis, and conditional
    image generation.
  license: unknown
  modality:
    explanation: We present a foundation model for zero-shot metric monocular depth
      estimation.
    value: text; depth maps
  model_card: unknown
  monitoring: unknown
  name: Depth Pro
  nationality: USA
  organization: Apple
  prohibited_uses: unknown
  quality_control: dedicated evaluation metrics for boundary accuracy in estimated
    depth maps, and state-of-the-art focal length estimation from a single image.
  size: unknown
  training_emissions: unknown
  training_hardware:
    explanation: "2.25-megapixel depth maps with a native output resolution of 1536\
      \ \xD7 1536 in 0.3 seconds on a V100 GPU."
    value: V100 GPU
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2410.02073


================================================
FILE: assets/argilla.yaml
================================================
- access: open
  analysis: Evaluated on MT-Bench and AlphaEval benchmarks.
  created_date: 2023-12-01
  dependencies:
  - UltraFeedback
  - Zephyr
  description: Notus is an open source LLM, fine-tuned using Direct Preference Optimization
    (DPO) and AIF (AI Feedback) techniques.
  feedback: https://huggingface.co/argilla/notus-7b-v1/discussions
  intended_uses: Academic research and free commercial usage
  license: MIT
  modality: text; text
  model_card: https://huggingface.co/argilla/notus-7b-v1
  monitoring: none
  name: Notus
  nationality: Belgium
  organization: Argilla
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: 8 x A100 40GB GPUs
  training_time: unknown
  type: model
  url: https://argilla.io/blog/notus7b/


================================================
FILE: assets/argonne.yaml
================================================
- access: open
  analysis: ''
  created_date: 2022-10-11
  dependencies:
  - SARS-CoV-2 genome dataset
  - BV-BRC dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: MIT
  modality: text; genome sequence
  model_card: ''
  monitoring: ''
  name: GenSLM
  nationality: USA
  organization: Argonne National Laboratory
  prohibited_uses: ''
  quality_control: ''
  size: 25B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://www.biorxiv.org/content/10.1101/2022.10.10.511571v1


================================================
FILE: assets/aspia_space_institu.yaml
================================================
- access: open
  analysis: "The models\u2019 performance on downstream tasks was evaluated by linear\
    \ probing. The models follow a similar saturating log-log scaling law to textual\
    \ models, their performance improves with the increase in model size up to the\
    \ saturation point of parameters."
  created_date: 2024-09-08
  dependencies:
  - DESI Legacy Survey DR8
  description: AstroPT is an autoregressive pretrained transformer developed with
    astronomical use-cases in mind. The models have been pretrained on 8.6 million
    512x512 pixel grz-band galaxy postage stamp observations from the DESI Legacy
    Survey DR8. They have created a range of models with varying complexity, ranging
    from 1 million to 2.1 billion parameters.
  feedback: Any problem with the model can be reported to Michael J. Smith at mike@mjjsmith.com.
  intended_uses: The models are intended for astronomical use-cases, particularly
    in handling and interpreting large observation data from astronomical sources.
  license: MIT
  modality: image; image
  model_card: unknown
  monitoring: Unknown
  name: AstroPT
  nationality: unknown
  organization: "Aspia Space, Instituto de Astrof\xEDsica de Canarias (IAC), UniverseTBD,\
    \ Astrophysics Research Institute, Liverpool John Moores University, Departamento\
    \ Astrof\xEDsica, Universidad de la Laguna, Observatoire de Paris, LERMA, PSL\
    \ University, and Universit\xB4e Paris-Cit\xB4e."
  prohibited_uses: Unknown
  quality_control: "The models\u2019 performances were evaluated on downstream tasks\
    \ as measured by linear probing."
  size: 2.1B parameters
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://arxiv.org/pdf/2405.14930v1


================================================
FILE: assets/assembly.yaml
================================================
- access: closed
  analysis: ''
  created_date:
    explanation: The date Conformer-1 was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744).
    value: 2023-03-15
  datasheet: none
  dependencies: []
  description: The dataset used to train AssemblyAI's Conformer-1 model.
  excluded: ''
  feedback: ''
  included: '"our team curated a dataset of 650K hours of English audio - consisting
    of proprietary internal datasets and various sources from the internet"

    '
  intended_uses: ''
  license: unknown
  modality: audio, text
  monitoring: ''
  name: Conformer-1 dataset
  nationality: USA
  organization: AssemblyAI
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 650K hours audio (60TB)
  type: dataset
  url: https://www.assemblyai.com/blog/conformer-1/
- access: limited
  analysis:
    explanation: See [[Results and Performance]](https://www.assemblyai.com/blog/conformer-1/#results-and-performance)
    value: In order to evaluate the accuracy and robustness of Conformer-1, we sourced
      60+ hours of human labeled audio data covering popular speech domains such as
      call centers, podcasts, broadcasts, and webinars. We then calculated the Word
      Error Rate (WER) of Conformer-1 against these datasets, and compared the results
      against Whisper and a number of other ASR models. To ground our results against
      popular open source speech recognition benchmarks, we also performed the same
      WER analysis against a number of academic datasets.
  created_date:
    explanation: The date the model was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744).
    value: 2023-03-15
  dependencies:
  - Conformer-1 dataset
  description: Conformer-1 is a state-of-the-art speech recognition model trained
    on 650K hours of audio data that achieves near human-level performance and robustness
    across a variety of data, making up to 43% fewer errors on noisy data than other
    ASR models.
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: audio, text; audio, text
  model_card: ''
  monitoring: ''
  name: Conformer-1
  nationality: USA
  organization: AssemblyAI
  prohibited_uses: ''
  quality_control: ''
  size: 300M parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://www.assemblyai.com/blog/conformer-1/
- access: limited
  adaptation: ''
  created_date: 2022
  dependencies:
  - Anthropic API
  description: AssemblyAI uses Claude and Anthropic's model to transcribe and understand
    audio data at scale.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: '"AssemblyAI gives you a personal, worldwide, royalty-free, non-assignable
      and non-exclusive license to use the Site and Services solely for your reasonable
      personal and commercial purposes, in the manner permitted by these Terms." -
      excerpt from the Terms of Service.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: AssemblyAI
  nationality: USA
  organization: AssemblyAI
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.assemblyai.com/legal/terms-of-service
  type: application
  url: https://www.assemblyai.com/
  user_distribution: ''
- access: open
  adaptation: ''
  created_date:
    explanation: The date the API was announced in this [[Twitter post]](https://twitter.com/AssemblyAI/status/1636050346240884744).
    value: 2023-03-15
  dependencies:
  - Conformer-1
  description: API to access the AssemblyAI's Conformer-1 model.
  failures: ''
  feedback: ''
  intended_uses: Speech recognition
  license:
    explanation: '"AssemblyAI gives you a personal, worldwide, royalty-free, non-assignable
      and non-exclusive license to use the Site and Services solely for your reasonable
      personal and commercial purposes, in the manner permitted by these Terms." -
      excerpt from the Terms of Service.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Conformer-1 API
  nationality: USA
  organization: AssemblyAI
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.assemblyai.com/legal/terms-of-service
  type: application
  url: https://www.assemblyai.com/blog/conformer-1/
  user_distribution: ''


================================================
FILE: assets/autogpt.yaml
================================================
- access: open
  adaptation: GPT-4 adapted to run autonomously by chaining together LLM "thoughts"
  created_date: 2023-04-16
  dependencies:
  - GPT-4 API
  description: Auto-GPT is an experimental open-source application showcasing the
    capabilities of the GPT-4 language model.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: MIT
  monitoring: ''
  monthly_active_users: ''
  name: Auto-GPT
  nationality: USA
  organization: Auto-GPT
  output_space: text
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://news.agpt.co/
  user_distribution: ''


================================================
FILE: assets/avignon.yaml
================================================
- access: open
  analysis: BioMistral was evaluated on a benchmark comprising 10 established medical
    question-answering (QA) tasks in English and seven other languages.
  created_date: 2024-02-15
  dependencies:
  - Mistral
  - PubMed Central
  description: BioMistral is an open-source Large Language Model tailored for the
    biomedical domain, utilizing Mistral as its foundation model and further pre-trained
    on PubMed Central.
  feedback: https://huggingface.co/BioMistral/BioMistral-7B/discussions
  intended_uses: Research in the biomedical domain, especially for medical question-answering
    tasks.
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/BioMistral/BioMistral-7B
  monitoring: ''
  name: BioMistral
  nationality: unknown
  organization: Avignon University, Nantes University
  prohibited_uses: Prohibited from deploying in production environments for natural
    language generation or any professional health and medical purposes.
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: 32 NVIDIA A100 80GB GPUs
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2402.10373.pdf


================================================
FILE: assets/baai.yaml
================================================
- access: closed
  analysis: ''
  created_date:
    explanation: 'The date that BAAI made a public announcement [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg).

      '
    value: 2021-01-12
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: image, text
  monitoring: ''
  name: Wu Dao dataset
  nationality: China
  organization: Beijing Academy of Artificial Intelligence
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: unknown
  type: dataset
  url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm
- access: closed
  analysis: ''
  created_date:
    explanation: 'The date that BAAI made a public announcement [[News Link]](https://mp.weixin.qq.com/s/BUQWZ5EdR19i40GuFofpBg).

      '
    value: 2021-01-12
  dependencies:
  - Wu Dao dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: image, text
  model_card: ''
  monitoring: ''
  name: Wu Dao 2.0
  nationality: China
  organization: Beijing Academy of Artificial Intelligence
  prohibited_uses: ''
  quality_control: ''
  size: 1.75T parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://www.tsinghua.edu.cn/en/info/1420/10473.htm
- access: open
  analysis: Evaluated on objective and reliability metrics.
  created_date: 2023-10-26
  dependencies:
  - Vicuna
  - JudgeLM Dataset
  description: JudgeLM is a fine-tuned to be a scalable judge to evaluate LLMs efficiently
    and effectively in open-ended benchmarks.
  feedback: https://huggingface.co/BAAI/JudgeLM-13B-v1.0/discussions
  intended_uses: Research on evaluating the performance of large language models and
    chatbots.
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/BAAI/JudgeLM-13B-v1.0
  monitoring: none
  name: JudgeLM
  nationality: China
  organization: Beijing Academy of Artificial Intelligence
  prohibited_uses: none
  quality_control: ''
  size: 13B parameters (dense)
  training_emissions: unknown
  training_hardware: 8 A100 40GB NVIDIA GPUs
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2310.17631.pdf
- access: open
  analysis: none
  created_date: 2023-10-26
  datasheet: ''
  dependencies:
  - Alpaca
  - GPT-4
  - Dolly
  - ShareGPT
  - LLaMA
  - Vicuna
  description: "JudgeLM Dataset is a novel dataset replete with a rich variety of\
    \ seed tasks, comprehensive answers from modern LLMs, answers\u2019 grades from\
    \ the teacher judge, and detailed reasons for judgments."
  excluded: ''
  feedback: https://huggingface.co/datasets/BAAI/JudgeLM-100K/discussions
  included: ''
  intended_uses: To be used to conduct instruction-tuning for language models and
    make the language model able to judge open-ended answer pairs.
  license: CC BY NC 4.0
  modality: text, text
  monitoring: none
  name: JudgeLM Dataset
  nationality: China
  organization: Beijing Academy of Artificial Intelligence
  prohibited_uses: none
  quality_control: ''
  sample: []
  size: 105k judge samples
  type: dataset
  url: https://huggingface.co/datasets/BAAI/JudgeLM-100K
- access: open
  analysis: Compared to other segmentation models across different modalities on BraTS2023
    dataset.
  created_date: 2024-01-25
  dependencies: []
  description: SegMamba is a novel 3D medical image Segmentation Mamba model, designed
    to effectively capture long-range dependencies within whole volume features at
    every scale.
  feedback: none
  intended_uses: ''
  license: Apache 2.0
  modality: image; text
  model_card: none
  monitoring: unknown
  name: SegMamba
  nationality: unknown
  organization: Hong Kong University of Science and Technology (Guangzhou + original),
    Beijing Academy of Artificial Intelligence
  prohibited_uses: ''
  quality_control: unknown
  size: unknown
  training_emissions: unknown
  training_hardware: 4 NVIDIA A100 GPUs
  training_time: 1000 epochs
  type: model
  url: https://arxiv.org/pdf/2401.13560v2.pdf
- access: open
  analysis: Evaluated on standard datasets in multilingual, cross-lingual, long document
    retrieval, and Q&A domains.
  created_date: 2024-02-05
  dependencies: []
  description: BGE M3 Embedding is a new embedding model that can support more than
    100 working languages, leading to new state-of-the-art performances on multi-lingual
    and cross-lingual retrieval tasks.
  feedback: https://huggingface.co/BAAI/bge-m3/discussions
  intended_uses: ''
  license: MIT
  modality: text; text
  model_card: https://huggingface.co/BAAI/bge-m3
  monitoring: unknown
  name: BGE M3 Embedding
  nationality: unknown
  organization: Beijing Academy of Artificial Intelligence, University of Science
    and Technology of China
  prohibited_uses: ''
  quality_control: unknown
  size: unknown
  training_emissions: unknown
  training_hardware: 32 A100 40GB GPUs
  training_time: 20,000 steps
  type: model
  url: https://arxiv.org/pdf/2402.03216.pdf
- access: open
  analysis: Evaluated on zero-shot classification performance across multiple image
    classification benchmarks.
  created_date: 2024-02-06
  dependencies:
  - CLIP
  description: As of release, EVA-CLIP is the largest and most powerful open-source
    CLIP model to date, with 18 billion parameters.
  feedback: https://huggingface.co/BAAI/EVA-CLIP-8B-448/discussions
  intended_uses: ''
  license: MIT
  modality: image, text; text
  model_card: https://huggingface.co/BAAI/EVA-CLIP-8B-448
  monitoring: unknown
  name: EVA-CLIP
  nationality: unknown
  organization: Beijing Academy of Artificial Intelligence, Tsinghua University
  prohibited_uses: ''
  quality_control: ''
  size: 18B parameters (dense)
  training_emissions: unknown
  training_hardware: 384 A100 40GB GPUs
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2402.04252.pdf


================================================
FILE: assets/baichuan.yaml
================================================
- access: open
  analysis: Evaluated on public benchmarks like MMLU, CMMLU, GSM8K, and HumanEval.
  created_date: 2023-09-20
  dependencies: []
  description: Baichuan 2 is a series of large-scale multilingual language models
    containing 7 billion and 13 billion parameters, trained from scratch, on 2.6 trillion
    tokens.
  feedback: https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: none
  monitoring: none
  name: Baichuan 2
  nationality: China
  organization: Baichuan Inc.
  prohibited_uses: ''
  quality_control: ''
  size: 13B parameters (dense)
  training_emissions: unknown
  training_hardware: 1024 NVIDIA A800 GPUs
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2309.10305.pdf


================================================
FILE: assets/baidu.yaml
================================================
- access:
    explanation: Baidu does not provide ERNIE 3.0 for external researchers
    value: closed
  analysis: ''
  created_date:
    explanation: The date the model paper was released
    value: 2021-12-23
  dependencies: []
  description: ERNIE 3.0 Titan is a language model
  feedback: none
  intended_uses: unknown
  license: unknown
  modality: text; text
  model_card: none
  monitoring: none
  name: ERNIE 3.0 Titan
  nationality: China
  organization: Baidu, PengCheng Laboratory
  prohibited_uses: unknown
  quality_control: none
  size: 260B parameters (dense)
  training_emissions: unknown
  training_hardware: Baidu V100 Cluster, PengCheng Lab Ascend 910 NPU cluster
  training_time: unknown
  type: model
  url: https://arxiv.org/abs/2112.12731
- access:
    explanation: API access is available through PaddleHub. Instructions on [[Github
      repository]](https://github.com/PaddlePaddle/PaddleHub/blob/develop/modules/image/text_to_image/ernie_vilg/README.md)
    value: limited
  analysis: ''
  created_date:
    explanation: The date the model paper was released
    value: 2021-12-31
  dependencies: []
  description: ERNIE-ViLG is a model for text-to-image generation
  feedback: none
  intended_uses: unknown
  license:
    explanation: 'No explicit license for the ERNIE-ViLG model is provided in the
      [[Github repository]](https://github.com/PaddlePaddle/PaddleHub)

      '
    value: none
  modality: text; image
  model_card: none
  monitoring: none
  name: ERNIE-ViLG
  nationality: China
  organization: Baidu
  prohibited_uses: unknown
  quality_control: ''
  size: 10B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/abs/2112.15283
- access:
    explanation: Baidu does not provide ERNIE-ViLG 2.0 to external researchers
    value: closed
  analysis: ''
  created_date:
    explanation: The date the model paper was released
    value: 2022-10-27
  dependencies: []
  description: ERNIE-ViLG is a model for text-to-image generation
  feedback: none
  intended_uses: unknown
  license:
    explanation: 'The asset isn''t released, and hence the license is unknown.

      '''
    value: unknown
  modality: text; image
  model_card: none
  monitoring: none
  name: ERNIE-ViLG 2.0
  nationality: China
  organization: Baidu
  prohibited_uses: unknown
  quality_control: ''
  size: 10B parameters (dense)
  training_emissions: unknown
  training_hardware: 320 A100 GPUs according to [[the paper]](https://arxiv.org/abs/2210.15257)
  training_time: 18 days according to [[the paper]](https://arxiv.org/abs/2210.15257)
  type: model
  url: https://arxiv.org/abs/2210.15257
- access: limited
  analysis: ''
  created_date: 2023-10-17
  dependencies: []
  description: ERNIE-4.0 is a multimodal generalist foundation model.
  feedback: none
  intended_uses: unknown
  license: unknown
  modality: text; image, text, video
  model_card: none
  monitoring: none
  name: ERNIE 4.0
  nationality: China
  organization: Baidu
  prohibited_uses: unknown
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url:
    explanation: No official statement from Baidu, so news release is provided.
    value: https://www.prnewswire.com/news-releases/baidu-launches-ernie-4-0-foundation-model-leading-a-new-wave-of-ai-native-applications-301958681.html


================================================
FILE: assets/bain.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-02-21
  dependencies:
  - ChatGPT API
  description: "With the alliance, Bain will combine its deep digital implementation\
    \ capabilities and strategic expertise with OpenAI\u2019s AI tools and platforms,\
    \ including ChatGPT, to help its clients around the world identify and implement\
    \ the value of AI to maximize business potential."
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: Bain Chat
  nationality: USA
  organization: Bain
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://www.bain.com/vector-digital/partnerships-alliance-ecosystem/openai-alliance/
  user_distribution: ''


================================================
FILE: assets/be_my_eyes.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-03-14
  dependencies:
  - GPT-4 API
  description: "The first-ever digital visual assistant powered by OpenAI\u2019s new\
    \ GPT-4 language model."
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: Virtual Volunteer
  nationality: Denmark
  organization: Be My Eyes
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://www.bemyeyes.com/blog/introducing-be-my-eyes-virtual-volunteer
  user_distribution: ''


================================================
FILE: assets/beitech.yaml
================================================
- access: open
  analysis: Models fine-tuned on EXMODD and earlier dataset Image-Chat and then evaluated
    on Image-Chat validation set.
  created_date: 2023-10-17
  datasheet: none
  dependencies:
  - YFCC100M
  - Image-Chat
  description: EXMODD (Explanatory Multimodal Open-Domain Dialogue dataset) is a dataset
    built off the proposed MDCF (Multimodal Data Construction Framework).
  excluded: ''
  feedback: Feedback can be sent to authors via poplpr@bit.edu.cn
  included: ''
  intended_uses: ''
  license: MIT
  modality: image, text
  monitoring: none
  name: EXMODD
  nationality: China
  organization: Beijing Institute of Technology
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: unknown
  type: dataset
  url: https://arxiv.org/pdf/2310.10967.pdf
- access: open
  analysis: Evaluated on standard benchmarks including MMLU, CEval, and DROP.
  created_date: 2023-11-13
  dependencies:
  - Llama 2
  description: MiniMA is a smaller finetuned Llama 2 model adapted for Chinese.
  feedback: https://huggingface.co/GeneZC/MiniMA-3B/discussions
  intended_uses: ''
  license: Llama 2
  modality: text; text
  model_card: https://huggingface.co/GeneZC/MiniMA-3B
  monitoring: unknokwn
  name: MiniMA
  nationality: China
  organization: Beijing Institute of Technology
  prohibited_uses: ''
  quality_control: ''
  size: 3B parameters (dense)
  training_emissions: unknown
  training_hardware: 8 A100 80G GPUs
  training_time: unknown
  type: model
  url: https://github.com/GeneZC/MiniMA


================================================
FILE: assets/berkeley.yaml
================================================
- access: open
  analysis: Evaluated in comparison with ChatGPT and Stanford Alpaca.
  created_date: 2023-04-03
  dependencies:
  - LLaMA
  - web-scraped dialogue data
  description: "A relatively small chatbot trained by fine-tuning Meta\u2019s LLaMA\
    \ on dialogue data gathered from the web."
  feedback: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions
  intended_uses: academic research
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g
  monitoring: ''
  name: Koala
  nationality: USA
  organization: Berkeley
  prohibited_uses: ''
  quality_control: ''
  size: 13B parameters (dense)
  training_emissions: ''
  training_hardware: 8 A100 GPUs
  training_time: 6 hours
  type: model
  url: https://bair.berkeley.edu/blog/2023/04/03/koala/
- access: open
  analysis: Evaluated using AST sub-tree matching technique and compared to other
    models in terms of API functionality accuracy.
  created_date: 2023-05-24
  dependencies:
  - LLaMA
  - Gorilla document retriever
  description: Gorilla is a finetuned LLaMA-based model that surpasses the performance
    of GPT-4 on writing API calls.
  feedback: ''
  intended_uses: In conjunction with a LLM to improve its capability for using API
    calls.
  license: Apache 2.0
  modality: text; API
  model_card: ''
  monitoring: ''
  name: Gorilla
  nationality: USA
  organization: Berkeley
  prohibited_uses: ''
  quality_control: No specific quality control is mentioned in model training, though
    details on data processing and collection are provided in the paper.
  size: 7B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2305.15334v1.pdf
- access: open
  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
  created_date: 2023-05-03
  dependencies:
  - RedPajama
  description: OpenLlama is an open source reproduction of Meta's LLaMA model.
  feedback: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: ''
  monitoring: ''
  name: OpenLLaMA
  nationality: USA
  organization: Berkeley
  prohibited_uses: ''
  quality_control: ''
  size: 17B parameters (dense)
  training_emissions: unknown
  training_hardware: ''
  training_time: unknown
  type: model
  url: https://github.com/openlm-research/open_llama


================================================
FILE: assets/bigcode.yaml
================================================
- access: open
  analysis: Tested on several benchmarks, most notably Python benchmark HumanEval.
  created_date: 2023-05-09
  dependencies:
  - The Stack
  description: StarCoder is a Large Language Model for Code (Code LLM) trained on
    permissively licensed data from GitHub, including from 80+ programming languages,
    Git commits, GitHub issues, and Jupyter notebooks.
  feedback: https://huggingface.co/bigcode/starcoder/discussions
  intended_uses: As a foundation model to fine-tune and create more specialized models
    that support use cases such as code completion, fill-in-the-middle, and text summarization.
    Can also be used as a Tech Assistant prompt and not as an instruction model given
    training limitations.
  license: BigCode Open RAIL-M v1.0
  modality: code; code
  model_card: https://huggingface.co/bigcode/starcoder
  monitoring: ''
  name: StarCoder
  nationality: International
  organization: BigCode
  prohibited_uses: See BigCode Open RAIL-M license and FAQ
  quality_control: No specific quality control is mentioned in model training, though
    details on data processing and how the tokenizer was trained are provided in the
    paper.
  size: 15.5B parameters (dense)
  training_emissions: 16.68 tons of CO2eq
  training_hardware: 512 A100 80GB GPUs distributed across 64 nodes
  training_time: 320,256 GPU hours
  type: model
  url: https://arxiv.org/pdf/2305.06161.pdf
- access: open
  analysis: Evaluated on MultiPL-E system benchmarks.
  created_date: 2023-02-24
  dependencies:
  - The Stack
  - BigCode Dataset
  description: Multilingual code model derived from the findings of BigCode Project
    analysis on Github stars' association to data quality.
  feedback: https://huggingface.co/bigcode/santacoder/discussions
  intended_uses: The model was trained on GitHub code. As such it is not an instruction
    model and commands do not work well. You should phrase commands like they occur
    in source code such as comments or write a function signature and docstring and
    let the model complete the function body.
  license: BigCode Open RAIL-M v1
  modality: code; code
  model_card: https://huggingface.co/bigcode/santacoder
  monitoring: ''
  name: SantaCoder
  nationality: International
  organization: BigCode
  prohibited_uses: See BigCode Open RAIL-M license and FAQ
  quality_control: ''
  size: 1.1B parameters (dense)
  training_emissions: 124 kg of CO2eq
  training_hardware: 96 NVIDIA Tesla V100 GPUs
  training_time: 14,284 GPU hours
  type: model
  url: https://arxiv.org/pdf/2301.03988.pdf
- access: open
  analysis: Evaluated models trained on The Stack on HumanEval and MBPP and compared
    against similarly-sized models.
  created_date: 2022-11-20
  datasheet: https://huggingface.co/datasets/bigcode/the-stack
  dependencies:
  - GitHub
  description: The Stack contains over 6TB of permissively-licensed source code files
    covering 358 programming languages. The Stack serves as a pre-training dataset
    for Code LLMs, i.e., code-generating AI systems which enable the synthesis of
    programs from natural language descriptions as well as other from code snippets.
  excluded: ''
  feedback: https://huggingface.co/datasets/bigcode/the-stack/discussions
  included: ''
  intended_uses: creating code LLMs
  license: The Stack is a collection of source code from repositories with various
    licenses. Any use of all or part of the code gathered in The Stack must abide
    by the terms of the original licenses, including attribution clauses when relevant.
    Provenance information is provided for each data point.
  modality: code
  monitoring: ''
  name: The Stack
  nationality: International
  organization: BigCode
  prohibited_uses: See https://huggingface.co/datasets/bigcode/the-stack
  quality_control: allowed users whose data were part of The Stack's training data
    to opt-out
  sample:
  - https://huggingface.co/datasets/bigcode/the-stack/viewer/default/train
  size: 6 TB
  type: dataset
  url: https://arxiv.org/pdf/2211.15533.pdf
- access: open
  analysis: See https://arxiv.org/pdf/2402.19173.pdf
  created_date: 2024-02-28
  dependencies:
  - The Stack v2
  description: StarCoder2-15B model is a 15B parameter model trained on 600+ programming
    languages from The Stack v2, with opt-out requests excluded. The training was
    carried out using the Fill-in-the-Middle objective on 4+ trillion tokens.
  feedback: https://huggingface.co/bigcode/starcoder2-15b/discussions
  intended_uses: The model was trained on GitHub code as well as additional selected
    data sources such as Arxiv and Wikipedia. As such it is not an instruction model
    and commands like "Write a function that computes the square root." do not work
    well. Intended to generate code snippets from given context, but not for writing
    actual functional code directly.
  license: BigCode OpenRail-M
  modality: code; text
  model_card: https://huggingface.co/bigcode/starcoder2-15b
  monitoring: unknown
  name: StarCoder2-15B
  nationality: International
  organization: BigCode
  prohibited_uses: See BigCode Open RAIL-M license and FAQ
  quality_control: The model was filtered for permissive licenses and code with no
    license only. A search index is provided to identify where generated code came
    from to apply the proper attribution.
  size: 15B parameters (dense)
  training_emissions: unknown
  training_hardware: 1024 x H100 GPUs
  training_time: unknown
  type: model
  url: https://www.servicenow.com/company/media/press-room/huggingface-nvidia-launch-starcoder2.html
- access: open
  analysis: See https://arxiv.org/pdf/2402.19173.pdf
  created_date: 2024-02-28
  dependencies:
  - The Stack v2
  description: StarCoder2-7B model is a 7B parameter model trained on 17 programming
    languages from The Stack v2, with opt-out requests excluded. The model uses Grouped
    Query Attention, a context window of 16,384 tokens with a sliding window attention
    of 4,096 tokens, and was trained using the Fill-in-the-Middle objective on 3.5+
    trillion tokens.
  feedback: https://huggingface.co/bigcode/starcoder2-7b/discussions
  intended_uses: Intended to generate code snippets from given context, but not for
    writing actual functional code directly. The model has been trained on source
    code from 17 programming languages. The predominant language in source is English
    although other languages are also present. As such the model is capable of generating
    code snippets provided some context but the generated code is not guaranteed to
    work as intended. It can be inefficient and contain bugs or exploits. See the
    paper for an in-depth discussion of the model limitations.
  license: BigCode OpenRail-M
  modality: code; text
  model_card: https://huggingface.co/bigcode/starcoder2-7b
  monitoring: unknown
  name: StarCoder2-7B
  nationality: International
  organization: BigCode
  prohibited_uses: See BigCode Open RAIL-M license and FAQ
  quality_control: The model was filtered for permissive licenses and code with no
    license only. A search index is provided to identify where generated code came
    from to apply the proper attribution.
  size: 7B parameters (dense)
  training_emissions: 29,622.83 kgCO2eq
  training_hardware: 432 H100 GPUs
  training_time: 145,152 hours (cumulative)
  type: model
  url: https://www.servicenow.com/company/media/press-room/huggingface-nvidia-launch-starcoder2.html
- access: open
  analysis: See https://arxiv.org/pdf/2402.19173.pdf
  created_date: 2024-02-28
  dependencies:
  - The Stack v2
  description: StarCoder2-3B model is a 3B parameter model trained on 17 programming
    languages from The Stack v2, with opt-out requests excluded. The model uses Grouped
    Query Attention, a context window of 16,384 tokens with a sliding window attention
    of 4,096 tokens, and was trained using the Fill-in-the-Middle objective on 3+
    trillion tokens.
  feedback: https://huggingface.co/bigcode/starcoder2-3b/discussions
  intended_uses: Intended to generate code snippets from given context, but not for
    writing actual functional code directly. The model has been trained on source
    code from 17 programming languages. The predominant language in source is English
    although other languages are also present. As such the model is capable of generating
    code snippets provided some context but the generated code is not guaranteed to
    work as intended. It can be inefficient and contain bugs or exploits. See the
    paper for an in-depth discussion of the model limitations.
  license: BigCode OpenRail-M
  modality: code; text
  model_card: https://huggingface.co/bigcode/starcoder2-3b
  monitoring: unknown
  name: StarCoder2-3B
  nationality: International
  organization: BigCode
  prohibited_uses: See BigCode Open RAIL-M license and FAQ
  quality_control: The model was filtered for permissive licenses and code with no
    license only. A search index is provided to identify where generated code came
    from to apply the proper attribution.
  size: 3B parameters (dense)
  training_emissions: 16,107.01 kgCO2eq
  training_hardware: 160 A100 GPUs
  training_time: 97,120 hours (cumulative)
  type: model
  url: https://www.servicenow.com/company/media/press-room/huggingface-nvidia-launch-starcoder2.html


================================================
FILE: assets/bigscience.yaml
================================================
- access:
    explanation: While not all of the ROOTS corpus is publicly accessible, the majority
      is through [this form](https://docs.google.com/forms/d/e/1FAIpQLSdq50O1x4dkdGI4dwsmchFuNI0KCWEDiKUYxvd0r0_sl6FfAQ/viewform?usp=send_form)
    value: open
  analysis: https://huggingface.co/spaces/bigscience-data/roots-search
  created_date: 2022-06-06
  datasheet: https://huggingface.co/spaces/bigscience-data/bigscience-corpus
  dependencies: []
  description: The Responsible Open-science Open-collaboration Text Sources (ROOTS)
    corpus, a 1.6TB dataset spanning 59 languages that was used to train the 176-billion-parameter
    BigScience Large Open-science Open-access Multilingual (BLOOM) language model.
  excluded: unknown
  feedback: https://huggingface.co/spaces/bigscience-data/roots-search/discussions
  included: See section 2 of the paper.
  intended_uses: To empower large-scale monolingual and multilingual modeling projects
    with both the data and the processing tools, as well as stimulate research around
    this large multilingual corpus.
  license:
    explanation: Each constituent subset of the dataset will be released under the
      license that applies to it. ([See individual dataset page for specific license
      information](https://hf.co/bigscience-data))
    value: custom
  modality: code, text
  monitoring: none
  name: ROOTS
  nationality: International
  organization: BigScience
  prohibited_uses: none
  quality_control: Data collection involved merging and deduplicating searches to
    remove menus, HTML tags. Further, a quality improvement pipeline was implemented.
  sample:
  - https://huggingface.co/spaces/bigscience-data/roots-search
  size: 1.6TB
  type: dataset
  url: https://openreview.net/forum?id=UoEw6KigkUn
- access: open
  analysis: See the paper.
  created_date: 2022-10-15
  datasheet: https://huggingface.co/datasets/bigscience/P3
  dependencies: []
  description: The Public Pool of Prompts (P3) are prompts written in an unified format
    use to train T0++.
  excluded: We conservatively decided not to prompt datasets that contain potentially
    harmful content (for instance, datasets built on social media content).
  feedback: Point of Contact is [Victor Sanh](https://huggingface.co/datasets/bigscience/P3)
  included: The Public Pool of Prompts relies on the Hugging Face Dataset library.
    Any public dataset in the Datasets library can be prompted. We select the datasets
    that have at least one subset in English and excluded datasets containing (predominantly)
    non-natural language examples.
  intended_uses: Multitask finetuning of language models.
  license: Apache 2.0
  modality: text
  monitoring: none
  name: P3
  nationality: International
  organization: BigScience
  prohibited_uses: none
  quality_control: Data collection involved merging and deduplicating searches to
    remove menus, HTML tags. Further, a quality improvement pipeline was implemented.
  sample:
  - https://huggingface.co/datasets/bigscience/P3/viewer
  size: 2000 prompts
  type: dataset
  url: https://arxiv.org/pdf/2110.08207.pdf
- access: open
  analysis: See the paper.
  created_date: 2022-11-03
  datasheet: https://huggingface.co/datasets/bigscience/xP3
  dependencies:
  - P3
  description: xP3 (Crosslingual Public Pool of Prompts) is a collection of prompts
    and datasets across 46 of languages & 16 NLP tasks. It is used for the training
    of BLOOMZ and mT0, multilingual language models capable of following human instructions
    in dozens of languages zero-shot.
  excluded: We conservatively decided not to prompt datasets that contain potentially
    harmful content (for instance, datasets built on social media content).
  feedback: Point of Contact is [Niklas Muennighoff](https://huggingface.co/datasets/bigscience/xP3)
  included: xP3 adds 28 multilingual datasets to P3 based on the P3 task taxonomy.
  intended_uses: Multitask finetuning of language models.
  license: Apache 2.0
  modality: code, text
  monitoring: none
  name: xP3
  nationality: International
  organization: BigScience
  prohibited_uses: none
  quality_control: Data collection involved merging and deduplicating searches to
    remove menus, HTML tags. Further, a quality improvement pipeline was implemented.
  sample:
  - https://huggingface.co/datasets/bigscience/xP3/viewer
  size: 9.4GB
  type: dataset
  url: https://arxiv.org/abs/2211.01786
- access: open
  analysis: ''
  created_date:
    explanation: The date the paper was posted to arXiv.
    value: 2021-10-15
  dependencies:
  - T5
  - P3
  description: T0++ is an multitask fine-tuned language model based on T5.
  feedback: https://huggingface.co/bigscience/T0pp/discussions
  intended_uses: You can use the models to perform inference on tasks by specifying
    your query in natural language, and the models will generate a prediction.
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/bigscience/T0pp
  monitoring: none
  name: T0++
  nationality: International
  organization: BigScience
  prohibited_uses: none
  quality_control: https://arxiv.org/pdf/2110.08207.pdf
  size: 11B parameters (dense)
  training_emissions:
    explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf
    value: 0.9 tCO2e
  training_hardware: Jean Zay (v3-512)
  training_time:
    explanation: Sourced from estimates in https://arxiv.org/pdf/2110.08207.pdf
    value: 27 hours
  type: model
  url: https://arxiv.org/pdf/2110.08207.pdf
- access: open
  analysis: ''
  created_date:
    explanation: The date the model was released
    value: 2022-07-12
  dependencies:
  - ROOTS
  description: BLOOM is an autoregressive multilingual language model.
  feedback: https://huggingface.co/bigscience/bloom/discussions
  intended_uses: This model is being created in order to enable public research on
    large language models (LLMs). LLMs are intended to be used for language generation
    or as a pretrained base model that can be further fine-tuned for specific tasks.
    Use cases below are not exhaustive.
  license:
    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
    value: BigScience RAIL v1.0
  modality: code, text; code, text
  model_card: ''
  monitoring: none
  name: BLOOM
  nationality: International
  organization: BigScience
  prohibited_uses: Using the model in high-stakes settings is out of scope for this
    model (e.g. biomedical/political/legal/finance domains, evaluating or scoring
    individuals). The model is not designed for critical decisions nor uses with any
    material consequences on an individual's livelihood or wellbeing. The model outputs
    content that appears factual but may not be correct. Misuse. Intentionally using
    the model for harm, violating human rights, or other kinds of malicious activities,
    is a misuse of this model (e.g. spam generation, disinformation, disparagement,
    deception, surveillance).
  quality_control: ''
  size: 176B parameters (dense)
  training_emissions:
    explanation: Sourced from estimates in https://arxiv.org/abs/2211.02001
    value: 25 tCO2e
  training_hardware: Jean Zay (48 * 8xA100 80GB nodes)
  training_time:
    explanation: 1082990 A100 hours at 156 TFLOP/s maximum utilization
    value: 7039 petaflop/s-days
  type: model
  url: https://arxiv.org/abs/2211.05100
- access: open
  analysis: https://huggingface.co/bigscience/bloomz#evaluation
  created_date:
    explanation: The date the model was released
    value: 2021-10-15
  dependencies:
  - mT5
  - xP3
  description: mT0 is an multitask fine-tuned multilingual language model based on
    mT5.
  feedback: https://huggingface.co/bigscience/bloomz/discussions
  intended_uses: We recommend using the model to perform tasks expressed in natural
    language.
  license:
    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
    value: BigScience RAIL v1.0
  modality: text ;text
  model_card: https://huggingface.co/bigscience/T0pp
  monitoring: none
  name: mT0
  nationality: International
  organization: BigScience
  prohibited_uses: none
  quality_control: https://arxiv.org/pdf/2211.01786.pdf
  size: 13B parameters (dense)
  training_emissions: unknown
  training_hardware: Jean Zay (288 A100 80GB GPUs with 8 GPUs per node (36 nodes)
    using NVLink 4 inter-gpu connects, 4 OmniPath links)
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2110.08207.pdf
- access: open
  analysis: https://huggingface.co/bigscience/bloomz#evaluation
  created_date: 2022-11-03
  dependencies:
  - BLOOM
  - xP3
  description: BLOOMZ is an multitask fine-tuned autoregressive multilingual language
    model.
  feedback: https://huggingface.co/bigscience/bloomz/discussions
  intended_uses: We recommend using the model to perform tasks expressed in natural
    language.
  license:
    explanation: Model is licensed under https://huggingface.co/spaces/bigscience/license
    value: BigScience RAIL v1.0
  modality: code, text; code, text
  model_card: https://huggingface.co/bigscience/bloomz
  monitoring: none
  name: BLOOMZ
  nationality: International
  organization: BigScience
  prohibited_uses: none
  quality_control: https://arxiv.org/pdf/2211.01786.pdf
  size: 176B parameters (dense)
  training_emissions: unknown
  training_hardware: Jean Zay (288 A100 80GB GPUs with 8 GPUs per node (36 nodes)
    using NVLink 4 inter-gpu connects, 4 OmniPath links)
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2211.01786.pdf


================================================
FILE: assets/bloomberg.yaml
================================================
- access:
    explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2)
    value: closed
  analysis: ''
  created_date:
    explanation: The date BloombergGPT was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/).
    value: 2023-03-30
  datasheet:
    explanation: Section 2 of the BloombergGPT paper.
    value: https://arxiv.org/pdf/2303.17564.pdf#section.2
  dependencies: []
  description: A comprehensive dataset consisting of a range of English financial
    documents including news, filings, press releases, web-scraped financial documents,
    and social media drawn from the Bloomberg archives that was used to train the
    BloombergGPT model.
  excluded: ''
  feedback: ''
  included:
    explanation: See [[Financial Datasets]](https://arxiv.org/pdf/2303.17564.pdf#subsection.2.1)
    value: "FinPile consists of English financial documents. Authors utilize the The\
      \ Bloomberg\nTerminal, which is an extensive collection of curated and maintained\
      \ documents,\nto create the FinPile dataset. Each document in FinPile is time-stamped,\
      \ with\ndates ranging from 2007-03-01 to 2022-07-31.\nTypes of data included\
      \ are given below:\n  1. Web (298B tokens) - Inclues Bloomberg's web crawl focused\
      \ on high-quality\nwebsites that have financially relevant information. This\
      \ makes up the majority\nof FinPile.\n  2. News (38B tokens) - Includes all\
      \ news sources relevant to the financial\ncommunity, excluding news articles\
      \ written by Bloomberg journalists. Overall,\nthere are hundreds of English\
      \ news sources in FinPile including \"Bloomberg\nTranscripts\", which are transcripts\
      \ of Bloomberg TV news.\n  3. Filings (14B tokens) - Includes financial statements\
      \ prepared by (public)\ncompanies and made available to the general public.\
      \  In the dataset, a majority\nof the filings come from EDGAR, which is the\
      \ SEC's online database.\n  4. Press (9B tokens) - Includes press releases typically\
      \ issued by companies\nthat are financially relevant.\n  5. Bloomberg (5B tokens)\
      \ - Includes Bloomberg authored news and other documents\nsuch as opinions and\
      \ analyses. The largest sources are \u201CBloomberg News\u201D and\n\u201CBloomberg\
      \ First Word\u201D, the Bloomberg-authored wire of real-time news.\n"
  intended_uses: Used to train the BloombergGPT model.
  license: unknown
  modality: text
  monitoring: ''
  name: FinPile
  nationality: USA
  organization: Bloomberg
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size:
    explanation: See [[Financial Datasets]](https://arxiv.org/pdf/2303.17564.pdf#subsection.2.1)
    value: 363B tokens
  type: dataset
  url: https://arxiv.org/pdf/2303.17564.pdf#section.2
- access:
    explanation: See [[Openness]](https://arxiv.org/pdf/2303.17564.pdf#subsection.8.2)
    value: closed
  analysis:
    explanation: See [[Evaluation]](https://arxiv.org/pdf/2303.17564.pdf#section.5)
    value: 'Authors evaluate the performance of BloombergGPT on two broad categories
      of tasks, finance-specific and general purpose, on several standard benchmarks.
      They compare BloombergGPT to the three closest models: GPT-NeoX, OPT-66B and
      BLOOM-176B. They also report results from the original GPT-3 whenever externally
      available. They conclude "We achieve strong results on general LLM benchmarks
      and outperform comparable models on financial tasks. We attribute this, in decreasing
      order of impact, to 1. a well-curated internal dataset, 2. our unique choice
      in tokenizer, and 3. an up-to-date architecture."

      '
  created_date:
    explanation: The date the model was announced in the [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/).
    value: 2023-03-30
  dependencies:
  - FinPile
  - The Pile
  - C4
  - Wikipedia
  description: BloombergGPT is a 50 billion parameter large language model that is
    specifically trained on a wide range of financial data to support a diverse set
    of natural language processing tasks within the financial industry.
  feedback: ''
  intended_uses:
    explanation: See [[Bloomberg article]](https://www.bloomberg.com/company/press/bloomberggpt-50-billion-parameter-llm-tuned-finance/)
    value: '"This model will assist Bloomberg in improving existing financial NLP
      tasks, such as sentiment analysis, named entity recognition, news classification,
      and question answering, among others. Furthermore, BloombergGPT will unlock
      new opportunities for marshalling the vast quantities of data available on the
      Bloomberg Terminal to better help the firm''s customers, while bringing the
      full potential of AI to the financial domain."

      '
  license: unknown
  modality: text; text
  model_card: none
  monitoring: ''
  name: BloombergGPT
  nationality: USA
  organization: Bloomberg
  prohibited_uses: ''
  quality_control:
    explanation: See [[Ethics, Limitations, and Implications]](https://arxiv.org/pdf/2303.17564.pdf#section.8)
    value: "Authors state the following:\n- \"To provide natural language applications\
      \ to the financial community, we\n  have developed a rigorous risk and testing\
      \ assessment process. This process\n  includes careful annotation guidelines\
      \ Tseng et al. (2020), pre-launch review\n  at multiple levels by the central\
      \ risk and compliance organizations, and\n  by the product leaders (e.g., the\
      \ newsroom) as applicable, and post-launch\n  monitoring. Moreover, we conduct\
      \ our research, development, and deployment\n  of NLP and AI systems in accordance\
      \ with all applicable regulations.\"\n- \"Similarly, toxicity and bias are areas\
      \ where, as a company, we take extraordinary\n  care with any content we produce,\
      \ whether from humans or machines. Since\n  the measurement of toxicity and\
      \ bias in our model depends on its application\n  areas, quantifying the potential\
      \ for the generation of harmful language\n  remains an open question. We are\
      \ particularly interested in studying whether\n  FinPile, which is cleaner and\
      \ contains fewer examples of overtly biased\n  or toxic language (e.g., Press\
      \ Releases), reduces the proclivity of the\n  model to generate inappropriate\
      \ content.\"\n"
  size: 50B parameters (dense)
  training_emissions: unknown
  training_hardware:
    explanation: See [[Training Configuration]](https://arxiv.org/pdf/2303.17564.pdf#subsection.3.3)
    value: 64 Amazon EC2 p4d.24xlarge instances each with 8 NVIDIA 40GB A100 GPUs
      (i.e. total 512 A100 GPUs)
  training_time:
    explanation: See [[Training Run]](https://arxiv.org/pdf/2303.17564.pdf#section.4)
    value: 53 days
  type: model
  url: https://arxiv.org/abs/2303.17564


================================================
FILE: assets/boston.yaml
================================================
- access: open
  analysis: "Platypus achieves the strongest performance and currently stands at first\
    \ place in HuggingFace\u2019s Open LLM Leaderboard as of its release date."
  created_date: 2023-08-14
  dependencies:
  - LLaMA 2
  - Platypus curated dataset
  description: Platypus is a family of fine-tuned and merged Large Language Models
    (LLMs).
  feedback: https://huggingface.co/garage-bAInd/Platypus2-13B/discussions
  intended_uses: ''
  license: CC by-NC-SA 4.0
  modality: text; text
  model_card: https://huggingface.co/garage-bAInd/Platypus2-13B
  monitoring: ''
  name: Platypus
  nationality: USA
  organization: Boston University
  prohibited_uses: ''
  quality_control: ''
  size: 13B parameters (dense)
  training_emissions: ''
  training_hardware: 1 A100 GPU
  training_time: 5 hours
  type: model
  url: https://arxiv.org/pdf/2308.07317.pdf
- access: open
  analysis: UFOGen is evaluated on standard image benchmarks against other models
    fine-tuned with Stable Diffusion.
  created_date: 2023-11-14
  dependencies:
  - Stable Diffusion
  description: UFOGen is a novel generative model designed for ultra-fast, one-step
    text-to-image synthesis.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; image
  model_card: none
  monitoring: ''
  name: UFOGen
  nationality: USA
  organization: Boston University
  prohibited_uses: ''
  quality_control: ''
  size: 900M parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2311.09257.pdf


================================================
FILE: assets/brex.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-03-07
  dependencies:
  - ChatGPT API
  description: Brex Inc., a highly valued startup that makes software for finance
    professionals, is turning to the same artificial intelligence tool behind ChatGPT
    for a service that can answer questions about corporate budgets, policy and spending.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: '"Brex grants you a nonexclusive and nontransferable license to use
      Brex Property as provided through the Services and as permitted by these User
      Terms." - excerpt from the Terms of Service.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Brex Chat
  nationality: USA
  organization: Brex
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.brex.com/legal/user-terms
  type: application
  url: https://fortune.com/2023/03/07/cfo-chatbot-chatgpt-ai-brex-finance-software-startup-budgets-policies/
  user_distribution: ''


================================================
FILE: assets/bytedance.yaml
================================================
- access: open
  analysis: Evaluated on benchmarks pertaining to speech, music, and other audio recognition.
  created_date: 2023-10-20
  dependencies:
  - Whisper
  - BEATs
  - Vicuna
  description: SALMONN is a large language model (LLM) enabling speech, audio event,
    and music inputs.
  feedback: https://huggingface.co/MSIIP/SALMONN/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: audio, text; text
  model_card: https://huggingface.co/MSIIP/SALMONN
  monitoring: none
  name: SALMONN
  nationality: unknown
  organization: ByteDance, Tsinghua University
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/bytedance/SALMONN
- access: open
  analysis: Evaluated via qualitative comparison relative to other SoTA image generation
    models.
  created_date: 2024-02-21
  dependencies:
  - Stable Diffusion XL
  description: SDXL-Lightning is a lightning-fast text-to-image generation model.
    It can generate high-quality 1024px images in a few steps. The models are distilled
    from stabilityai/stable-diffusion-xl-base-1.0. This repository contains checkpoints
    for 1-step, 2-step, 4-step, and 8-step distilled models.
  feedback: https://huggingface.co/ByteDance/SDXL-Lightning/discussions
  intended_uses: The model can be used for fast, high-quality text-to-image generation.
    It supports 1-step, 2-step, 4-step, and 8-step distilled models which provide
    varying generation quality.
  license: OpenRail++
  modality: text; image
  model_card: https://huggingface.co/ByteDance/SDXL-Lightning
  monitoring: unknown
  name: SDXL-Lightning
  nationality: China
  organization: ByteDance
  prohibited_uses: unknown
  quality_control: unknown
  size: unknown
  training_emissions: unknown
  training_hardware: 64 A100 80G GPUs
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2402.13929.pdf
- access: open
  analysis: LLaVA-Critic was tested in scenarios such as LMM-as-a-Judge and Preference
    Learning, showing a high correlation with commercial GPT models in evaluation
    scores. It served as an alternative to expensive human feedback in resource-constrained
    settings and demonstrated better performance in providing AI-generated feedback
    for model alignment compared to human-reliant reward models.
  created_date: 2024-10-06
  dependencies: []
  description: LLaVA-Critic is an open-source large multimodal model (LMM) designed
    as a generalist evaluator. It assesses performance across a variety of multimodal
    tasks by following a high-quality critic instruction dataset, incorporating diverse
    evaluation criteria. The model is effective in areas like LMM-as-a-Judge, providing
    reliable evaluation scores comparable to GPT models, and Preference Learning,
    offering reward signals for preference learning to enhance model alignment capabilities.
  feedback: unknown
  intended_uses: The model can be used for evaluating multimodal tasks, generating
    reward signals for preference learning, and serving as a reliable alternate judge
    for model assessments.
  license: Apache 2.0
  modality: image, text; text
  model_card: unknown
  monitoring: unknown
  name: LLaVA-Critic
  nationality: unknown
  organization: ByteDance and University of Maryland, College Park
  prohibited_uses: The model should not be used in scenarios requiring authorization
    from proprietary models, nor relied upon for critical applications without human
    oversight due to potential biases in dataset.
  quality_control: The model ensures quality by utilizing a high-quality dataset for
    critic instructions, providing both quantitative judgments and reasoning, with
    transparency in assessments.
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2410.02712


================================================
FILE: assets/cagliostro.yaml
================================================
- access: open
  analysis: unknown
  created_date: 2024-03-18
  dependencies:
  - Animagine XL 3.0
  description: An open-source, anime-themed text-to-image model enhanced to generate
    higher quality anime-style images with a broader range of characters from well-known
    anime series, an optimized dataset, and new aesthetic tags for better image creation.
  feedback: https://huggingface.co/cagliostrolab/animagine-xl-3.1/discussions
  intended_uses: Generating high-quality anime images from textual prompts. Useful
    for anime fans, artists, and content creators.
  license: Fair AI Public License 1.0-SD
  modality: text; image
  model_card: https://huggingface.co/cagliostrolab/animagine-xl-3.1
  monitoring: unknown
  name: Animagine XL 3.1
  nationality: USA
  organization: Cagliostro Research Lab
  prohibited_uses: Not suitable for creating realistic photos or for users who expect
    high-quality results from short or simple prompts.
  quality_control: The model undergoes pretraining, first stage finetuning, and second
    stage finetuning for refining and improving aspects such as hand and anatomy rendering.
  size: unknown
  training_emissions: unknown
  training_hardware: 2x A100 80GB GPUs
  training_time: Approximately 15 days, totaling over 350 GPU hours.
  type: model
  url: https://cagliostrolab.net/posts/animagine-xl-v31-release


================================================
FILE: assets/cartesia.yaml
================================================
- access: limited
  analysis: Extensive testing on Multilingual Librispeech dataset resulted in 20%
    lower validation perplexity. In downstream evaluations, this leads to a 2x lower
    word error rate and a 1 point higher quality score. Sonic also displays impressive
    performance metrics at inference, achieving lower latency (1.5x lower time-to-first-audio),
    faster inference speed (2x lower real-time factor), and higher throughput (4x).
  created_date: 2024-05-29
  dependencies:
  - Multilingual Librispeech dataset
  description: Sonic is a low-latency voice model that generates lifelike speech.
    Developed by Cartesia, it was designed to be an efficient real-time AI capable
    of processing any-sized contexts and running on any device.
  feedback: Contact through the provided form or via email at join@cartesia.ai.
  intended_uses: Sonic has potential applications across customer support, entertainment,
    and content creation and is a part of Cartesias broader mission to bring real-time
    multimodal intelligence to every device.
  license: unknown
  modality: text; audio
  model_card: none
  monitoring: unknown
  name: Sonic
  nationality: USA
  organization: Cartesia
  prohibited_uses: unknown
  quality_control: ''
  size: 2024-05-29
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://cartesia.ai/blog/sonic


================================================
FILE: assets/casia.yaml
================================================
- access: open
  analysis: Reports results on standard translation benchmarks across 102 languages
    in comparison with Google Translate and ChatGPT
  created_date: 2023-05-29
  dependencies:
  - LLaMA
  - CLUE
  - BigTrans parallel dataset
  description: BigTrans is a model which adapts LLaMA that covers only 20 languages
    and enhances it with multilingual translation capability on more than 100 languages
  feedback: https://huggingface.co/James-WYang/BigTrans/discussions
  intended_uses: Advancing future research in multilingual LLMs
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/James-WYang/BigTrans
  monitoring: ''
  name: BigTrans
  nationality: China
  organization: Institute of Automation Chinese Academy of Sciences
  prohibited_uses: ''
  quality_control: ''
  size: 13B parameters (dense)
  training_emissions: unknown
  training_hardware: 16 A100 GPUs with 80 GB of RAM
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2305.18098v1.pdf
- access: open
  analysis: Evaluated on standard benchmarks for knowledge and language understanding,
    mathematical reasoning, and programming ability in comparison to similarly sized
    open-source models.
  created_date: 2023-12-22
  dependencies: []
  description: YAYI 2 is an open source large language model trained in both English
    and Chinese.
  feedback: https://huggingface.co/wenge-research/yayi2-30b/discussions
  intended_uses: ''
  license:
    explanation: Model is under a custom [license](https://github.com/wenge-research/YAYI2/blob/main/COMMUNITY_LICENSE),
      while code is Apache 2.0
    value: custom
  modality: text; text
  model_card: https://huggingface.co/wenge-research/yayi2-30b
  monitoring: ''
  name: YAYI 2
  nationality: China
  organization: Institute of Automation Chinese Academy of Sciences
  prohibited_uses: ''
  quality_control: data is deduplicated, normalized, cleaned, and filtered for toxicity
  size: 30B parameters (dense)
  training_emissions: unknown
  training_hardware: over 1000 A800 GPUs
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2312.14862.pdf


================================================
FILE: assets/causallm.yaml
================================================
- access: open
  analysis: Evaluated on standard benchmarks across a range of tasks.
  created_date: 2023-10-21
  dependencies:
  - Qwen
  - OpenOrca
  - Open Platypus
  description: CausalLM is an LLM based on the model weights of Qwen and trained on
    a model architecture identical to LLaMA 2.
  feedback: none
  intended_uses: ''
  license:
    explanation: can be found at https://github.com/rpherrera/WTFPL (HuggingFace lists
      this to be the license)
    value: WTFPL
  modality: text; text
  model_card: https://huggingface.co/CausalLM/14B
  monitoring: unknown
  name: CausalLM
  nationality: USA
  organization: CausalLM
  prohibited_uses: ''
  quality_control: ''
  size: 14B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/CausalLM/14B


================================================
FILE: assets/cerebras.yaml
================================================
- access:
    explanation: 'The Pile is an open source dataset. Hugging Face compatible checkpoints
      available on the [[Cerebras Hugging Face page]](https://huggingface.co/cerebras/Cerebras-GPT-13B).
      Cerebras systems checkpoints for pre-training and fine tuning are available
      in the cloud via the [[Cerebras Model Studio]](https://www.cerebras.net/product-cloud/).

      '
    value: open
  analysis: '"We evaluate our models on the PILE validation set comprising 380M tokens.
    We also evaluate the public checkpoints of Pythia, Eleuther (2022); OPT, Zhang
    et al. (2022); GPT-NeoX 20B, Black et al. (2022); and GPT-J 6B, Wang & Komatsuzaki
    (2021). We performed upstream (pre-training) evaluations of text prediction cross-entropy
    using the Pile validation and test splits. We performed downstream evaluations
    of text generation accuracy on standardized tasks using the Eleuther lm-evaluation-harness."
    [[Evaluations]] (https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#evaluations).

    '
  created_date:
    explanation: 'The date the model was announced in the [[Cerebras blog post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models).

      '
    value: 2023-03-28
  dependencies:
  - The Pile
  description: 'A Family of Open, Compute-efficient, Large Language Models. The family
    includes 111M, 256M, 590M, 1.3B, 2.7B, 6.7B, and 13B models. All models in the
    Cerebras-GPT family have been trained in accordance with Chinchilla scaling laws
    (20 tokens per model parameter). [[Cerebras Blog Post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models)

    '
  feedback: ''
  intended_uses: '"The primary intended use is to further research into large language
    models. These models can be used as a foundation model for NLP, applications,
    ethics, and alignment research. Our primary intended users are researchers who
    are working to improve LLMs and practitioners seeking reference implementations,
    training setups, hyperparameters, or pre-trained models. We release these models
    with a fully permissive Apache license for the community to use freely." [[Uses
    and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#uses-and-limitations).

    '
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/cerebras/Cerebras-GPT-13B
  monitoring: ''
  name: Cerebras-GPT
  nationality: USA
  organization: Cerebras
  prohibited_uses: 'Authors note the following limitations of the model: "Cerebras-GPT
    models are trained on the Pile, with English language only, and are not suitable
    for machine translation tasks. Cerebras-GPT models have not been tuned for human-facing
    dialog applications like chatbots and will not respond to prompts in a similar
    way to models that have received instruction tuning or reinforcement learning
    from human feedback (RLHF) like Flan-T5 or ChatGPT." [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#out-of-scope-use).

    '
  quality_control: 'The Pile dataset has been thoroughly analyzed from various ethical
    standpoints such as toxicity analysis, gender bias, pejorative content, racially
    sensitive content etc. Only mitigations in standard Pile dataset pre-processing
    were employed when pre-training Cerebras-GPT. [[Risk, Bias, Ethical Considerations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#risk-bias-ethical-considerations)

    '
  size: 13B parameters (dense)
  training_emissions: ''
  training_hardware:
    explanation: 'According to [[Model Description]](https://huggingface.co/cerebras/Cerebras-GPT-13B#model-description)

      '
    value: 16x Cerebras CS-2 wafer scale systems
  training_time:
    explanation: ''
    value: ''
  type: model
  url: https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models/
- access: open
  analysis: Evaluated on standard English LLM benchmarks and adapted Arabic LLM benchmarks.
  created_date: 2023-08-30
  dependencies:
  - GPT-3
  - The Pile
  description: "Jais is the world\u2019s most advanced Arabic LLM as of its release."
  feedback: none
  intended_uses: Jais is released with the aim to stimulate research and development
    in the Arabic NLP community.
  license: Apache 2.0
  modality: text; text
  model_card:
    explanation: "Found in section C \u201CModel Cards\u201D"
    value: https://inceptioniai.org/jais/docs/Technicalpaper.pdf
  monitoring: unknown
  name: Jais
  nationality: unknown
  organization: Inception Institute of Artificial Intelligence, Cerebras, Mohamed
    bin Zayed University of Artificial Intelligence
  prohibited_uses: Generating or endorsing hate speech, disseminating false information,
    engaging in illegal activities, managing sensitive data, attempting language generalization
    beyond Arabic and English, and making critical decisions with high stakes.
  quality_control: ''
  size: 13B parameters (dense)
  training_emissions: unknown
  training_hardware: Condor Galaxy Supercomputer
  training_time: unknown
  type: model
  url: https://inceptioniai.org/jais/docs/Technicalpaper.pdf
- access: open
  analysis: Evaluated on standard English LLM benchmarks and adapted Arabic LLM benchmarks.
  created_date: 2023-08-30
  dependencies:
  - GPT-3
  - The Pile
  description: Jais Chat is an instruction-tuned version of Jais, optimized for dialog
    interaction.
  feedback: none
  intended_uses: Jais Chat is released with the aim to stimulate research and development
    in the Arabic NLP community.
  license: Apache 2.0
  modality: text; text
  model_card:
    explanation: "Found in section C \u201CModel Cards\u201D"
    value: https://inceptioniai.org/jais/docs/Technicalpaper.pdf
  monitoring: unknown
  name: Jais Chat
  nationality: unknown
  organization: Inception Institute of Artificial Intelligence, Cerebras, Mohamed
    bin Zayed University of Artificial Intelligence
  prohibited_uses: Generating or endorsing hate speech, disseminating false information,
    engaging in illegal activities, managing sensitive data, attempting language generalization
    beyond Arabic and English, and making critical decisions with high stakes.
  quality_control: ''
  size: 13B parameters (dense)
  training_emissions: unknown
  training_hardware: Condor Galaxy Supercomputer from Cerebras
  training_time: unknown
  type: model
  url: https://inceptioniai.org/jais/docs/Technicalpaper.pdf
- access: open
  analysis: Evaluated on standard LLM benchmarks in comparison to similar-sized models.
  created_date: 2023-07-24
  dependencies:
  - SlimPajama
  description: Bittensor Language Model is a 3 billion parameter language model with
    an 8k context length trained on 627B tokens of SlimPajama.
  feedback: https://huggingface.co/cerebras/btlm-3b-8k-base/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/cerebras/btlm-3b-8k-base
  monitoring: unknown
  name: Bittensor Language Model
  nationality: USA
  organization: Cerebras
  prohibited_uses: ''
  quality_control: ''
  size: 3B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.cerebras.net/blog/btlm-3b-8k-7b-performance-in-a-3-billion-parameter-model/
- access: open
  analysis: ''
  created_date: 2023-06-09
  datasheet: https://huggingface.co/datasets/cerebras/SlimPajama-627B
  dependencies:
  - RedPajama-Data
  description: As of release, SlimPajama is the largest extensively deduplicated,
    multi-corpora, open-source dataset for training large language models.
  excluded: ''
  feedback: https://huggingface.co/datasets/cerebras/SlimPajama-627B/discussions
  included: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text
  monitoring: unknown
  name: SlimPajama
  nationality: USA
  organization: Cerebras
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 627B tokens
  type: dataset
  url: https://huggingface.co/datasets/cerebras/SlimPajama-627B


================================================
FILE: assets/character.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2022-09-16
  dependencies: []
  description: Character allows users to converse with various chatbot personas.
  failures: unknown
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: unknown
  name: Character
  nationality: USA
  organization: Character AI
  output_space: AI-generated chat conversations
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://beta.character.ai/tos
  type: application
  url: https://beta.character.ai/
  user_distribution: unknown


================================================
FILE: assets/chatglm.yaml
================================================
- access: open
  analysis: Performance evaluated on English and Chinese language benchmark tests.
  created_date: 2023-03-14
  dependencies: []
  description: ChatGLM is a Chinese-English language model with question and answer
    and dialogue functions, and is aimed at a Chinese audience.
  feedback: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: ''
  monitoring: ''
  name: ChatGLM
  nationality: China
  organization: ChatGLM
  prohibited_uses: ''
  quality_control: ''
  size: 6B parameters (dense)
  training_emissions: unknown
  training_hardware: ''
  training_time: unknown
  type: model
  url: https://chatglm.cn/blog


================================================
FILE: assets/cmu.yaml
================================================
- access:
    explanation: Model checkpoints are available for download at https://github.com/VHellendoorn/Code-LMs
    value: open
  analysis: Reports results on standard code benchmarks across a variety of programming
    languages.
  created_date:
    explanation: The date the model paper was released
    value: 2022-02-26
  dependencies:
  - Github
  description: PolyCoder is a code model trained on 2.7B parameters based on the GPT-2
    architecture, which was trained on 249GB of code across 12 programming languages
    on a single machine.
  feedback: https://huggingface.co/NinedayWang/PolyCoder-2.7B/discussion
  intended_uses: unknown
  license:
    explanation: The license is provided in the [[Github repository]](https://github.com/VHellendoorn/Code-LMs)
    value: MIT
  modality: code
  model_card: https://huggingface.co/NinedayWang/PolyCoder-2.7B
  monitoring: None
  name: PolyCoder
  nationality: USA
  organization: Carnegie Mellon University
  prohibited_uses: None
  quality_control: No specific quality control is mentioned in model training, though
    details on data processing and how the tokenizer was trained are provided in the
    paper.
  size: 2.7B parameters (dense)
  training_emissions: unknown
  training_hardware: 8 NVIDIA RTX 8000
  training_time: 6 weeks
  type: model
  url: https://arxiv.org/abs/2202.13169
- access: open
  analysis: Evaluated on nascent time-series datasets and benchmarks.
  created_date: 2024-02-06
  dependencies: []
  description: Moment is a family of open-source foundation models for general-purpose
    time-series analysis.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: ''
  model_card: none
  monitoring: unknown
  name: Moment
  nationality: unknown
  organization: Carnegie Mellon University, University of Pennsylvania
  prohibited_uses: ''
  quality_control: ''
  size: 385M parameters (dense)
  training_emissions: unknown
  training_hardware: Single A6000 GPU
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2402.03885.pdf


================================================
FILE: assets/cognition.yaml
================================================
- access: limited
  analysis: Evaluated on SWE-Bench, a challenging software engineering benchmark,
    where Devin outperforms major state of the art models unassisted.
  created_date: 2024-03-12
  dependencies: []
  description: "Devin is the world\u2019s first fully autonomous AI software engineer."
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; code
  model_card: none
  monitoring: ''
  name: Devin
  nationality: USA
  organization: Cognition Labs
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.cognition-labs.com/introducing-devin
- access:
    explanation: ESM C is a drop-in replacement for previous models that provides
      major improvements in both capability and efficiency. ESM C models are available
      immediately for academic and commercial use under a new license structure designed
      to promote openness and enable scientists and builders.
    value: open
  analysis: Evaluations. We use the methodology of Rao et al. to measure unsupervised
    learning of tertiary structure in the form of contact maps. A logistic regression
    is used to identify contacts. For a protein of length L, we evaluate the precision
    of the top L contacts (P@L) with sequence separation of 6 or more residues.
  created_date: 2024-12-04
  dependencies:
  - UniRef
  - MGnify
  - Joint Genome Institute
  description: ESM Cambrian is a next generation language model trained on protein
    sequences at the scale of life on Earth. ESM C models define a new state of the
    art for protein representation learning.
  feedback: unknown
  intended_uses: "one that can be used by builders across a wide range of applications,\
    \ to imbue AI models with a deeper understanding of the biology of life\u2019\
    s most important and mysterious molecules."
  license: Cambrian Open License
  modality:
    explanation: 'Just as a person can fill in the blanks, such as: To __ or not to
      __ that is the ___ We can train language models to fill in the blanks. Except
      in biology, instead of training the model to predict words, we train it to predict
      the characters in a protein sequence, i.e. its sequence of amino acids.'
    value: text; text
  model_card: unknown
  monitoring: unknown
  name: ESM Cambrian
  nationality: USA
  organization: EvolutionaryScale
  prohibited_uses: unknown
  quality_control: ESM C was reviewed by a committee of scientific experts who concluded
    that the benefits of releasing the models greatly outweigh any potential risks.
  size:
    explanation: 'ESM C is trained at three scales: 300M, 600M, and 6B parameters.'
    value: 6B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.evolutionaryscale.ai/blog/esm-cambrian


================================================
FILE: assets/cognitive.yaml
================================================
- access: open
  analysis: none
  created_date: 2023-11-14
  dependencies:
  - Dolphin
  - Yi
  description: Dolphin 2.2 Yi is an LLM based off Yi.
  feedback: https://huggingface.co/cognitivecomputations/dolphin-2_2-yi-34b/discussions
  intended_uses: ''
  license:
    explanation: can be found at https://huggingface.co/cognitivecomputations/dolphin-2_2-yi-34b/blob/main/LICENSE
    value: custom
  modality: text; text
  model_card: https://huggingface.co/cognitivecomputations/dolphin-2_2-yi-34b
  monitoring: unknown
  name: Dolphin 2.2 Yi
  nationality: USA
  organization: Cognitive Computations
  prohibited_uses: ''
  quality_control: ''
  size: 34B parameters (dense)
  training_emissions: unknown
  training_hardware: 4 A100 GPUs
  training_time: 3 days
  type: model
  url: https://erichartford.com/dolphin
- access: open
  analysis: Evaluated on OpenLLM leaderboard.
  created_date:
    explanation: release date is not published; estimated to be sometime in either
      May or June 2023.
    value: 2023-06-01
  dependencies:
  - WizardLM
  description: WizardLM Uncensored is WizardLM trained with a subset of the dataset
    - responses that contained alignment / moralizing were removed.
  feedback: https://huggingface.co/cognitivecomputations/WizardLM-30B-Uncensored/discussions
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: https://huggingface.co/cognitivecomputations/WizardLM-30B-Uncensored
  monitoring: unknown
  name: WizardLM Uncensored
  nationality: USA
  organization: Cognitive Computations
  prohibited_uses: ''
  quality_control: ''
  size: 30B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/cognitivecomputations/WizardLM-30B-Uncensored


================================================
FILE: assets/cohere.yaml
================================================
- access:
    explanation: The dataset isn't provided to the public.
    value: closed
  analysis:
    explanation: The analysis performed on the dataset is not released.
    value: unknown
  created_date:
    explanation: 'The date the Cohere API was announced on the news [[News Article]]
      (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).

      '
    value: 2021-11-15
  datasheet: https://docs.cohere.ai/data-statement
  dependencies: []
  description: 'The family of datasets used to train Cohere models, which come in
    two forms: coheretext-filtered and coheretext-unfiltered. The former is used to
    train the Representation models, while the latter one is used to train the Generation
    models.

    '
  excluded: 'Documents that are not in English are excluded.

    '
  feedback:
    explanation: The internal feedback mechanisms for Cohere are unknown.
    value: unknown
  included: 'As stated in the datasheet, the dataset "includes the Google Books dataset,
    CommonCrawl, and text from the internet scraped by the Cohere infrastructure team."
    The top ten domains scraped were: wordpress.com, medium.com, stackexchange.com,
    tumblr.com, elsevier.com, genius.com, bbc.co.uk, libsyn.com, yahoo.com, nytimes.com
    [[Datasheet]](https://docs.cohere.ai/data-statement).

    '
  intended_uses: 'The intended use of the dataset is to train Cohere''s language models.

    '
  license: unknown
  modality: text; text
  monitoring:
    explanation: 'There is no information on how Cohere is internally monitoring the
      use of the dataset.

      '
    value: unknown
  name: coheretext
  nationality: Canada
  organization: Cohere
  prohibited_uses:
    explanation: 'There are no known prohibited uses of the dataset, but the Cohere
      API is bound by the Cohere usage guidelines, which may also apply to this dataset
      [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).

      '
    value: unknown
  quality_control: 'In the datasheet, it is implied that Cohere employs filtration
    methods for removing racist, biased and toxic content, but the details are not
    provided. These filtration methods take both the context and the language, as
    opposed to using a list of blockwords [[Datasheet]](https://docs.cohere.ai/data-statement).

    '
  sample: []
  size:
    explanation: 'Size of the filtered dataset is 200 GB. The unfiltered dataset is
      ~3 TB.

      '
    value: 200 GB
  type: dataset
  url: none
- access:
    explanation: 'The model is available to the public through the Cohere Platform
      [[Cohere Platform]](https://os.cohere.ai/login).

      '
    value: limited
  analysis: The model's performance was analyzed on Hellaswag and COPA, as well as
    several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card).
  created_date:
    explanation: 'The date the Cohere API was announced on the news [[News Article]]
      (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).

      '
    value: 2021-11-15
  dependencies:
  - coheretext
  description: 'The Generations model is a language model trained by Cohere for generation
    tasks.

    '
  feedback:
    explanation: 'There are no specific feedback mechanisms for the Generation model,
      but a generic contact email is provided on Cohere website, which is support
      at cohere.ai.

      '
    value: unknown
  intended_uses: 'On the model card, the intended uses are stated as "interactive
    autocomplete, augmenting human writing processes, summarization, text rephrasing,
    and other text-to-text tasks in non-sensitive domains" [[Model Card]](https://docs.cohere.ai/generation-card).

    '
  license: unknown
  modality: text; text
  model_card: https://docs.cohere.ai/generation-card
  monitoring: 'The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/generation-card).

    '
  name: Cohere Base
  nationality: Canada
  organization: Cohere
  prohibited_uses: 'The usage of the model is bound by the Cohere usage guidelines
    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive
    list of specific application violating these guidelines are: astroturfing, generation
    of misinformation and other harmful content, and "generation of text about people,
    places, or events without a human-in-the-loop" [[Model Card]](https://docs.cohere.ai/generation-card).

    '
  quality_control:
    explanation: 'The quality control measures taken are unknown, but it is implied
      that Cohere performed mitigation strategies for toxic degeneration [[Model Card]](https://docs.cohere.ai/generation-card).

      '
    value: unknown
  size:
    explanation: 'The exact sizes of the generation models are unknown, but we know
      that they come in three sizes: small, medium, and large [[Model Card]](https://docs.cohere.ai/generation-card).

      '
    value: unknown
  training_emissions:
    explanation: The emissions of the models are unknown.
    value: unknown
  training_hardware:
    explanation: 'The training hardware wasn''t explicitly announced, but it was reported
      that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch Article]]
      (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).

      '
    value: unknown
  training_time:
    explanation: The training time for the models are unknown.
    value: unknown
  type: model
  url: none
- access:
    explanation: 'The model is available to the public through the Cohere Platform
      [[Cohere Platform]](https://os.cohere.ai/login).

      '
    value: limited
  analysis: The model's performance was analyzed on Hellaswag and COPA, as well as
    several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card).
  created_date:
    explanation: 'Cohere Command was in beta before this date

      '
    value: 2023-01-01
  dependencies:
  - Cohere Base
  description: 'This model is a generative model optimized to follow commands in the
    prompt.

    '
  feedback:
    explanation: 'There are no specific feedback mechanisms for the Generation model,
      but a generic contact email is provided on Cohere website, which is support
      at cohere.ai.

      '
    value: unknown
  intended_uses: 'On the model card, the intended uses are stated as "interactive
    autocomplete, augmenting human writing processes, summarization, text rephrasing,
    and other text-to-text tasks in non-sensitive domains" [[Model Card]](https://docs.cohere.ai/generation-card).

    '
  license:
    explanation: The model likely has a license specifically for Cohere's use.
    value: unknown
  modality: text; text
  model_card: https://docs.cohere.ai/generation-card
  monitoring: 'The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/generation-card).

    '
  name: Cohere Command
  nationality: Canada
  organization: Cohere
  prohibited_uses: 'The usage of the model is bound by the Cohere usage guidelines
    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive
    list of specific application violating these guidelines are: astroturfing, generation
    of misinformation and other harmful content, and "generation of text about people,
    places, or events without a human-in-the-loop" [[Model Card]](https://docs.cohere.ai/generation-card).

    '
  quality_control:
    explanation: unknown
    value: unknown
  size: unknown
  training_emissions:
    explanation: unknown
    value: unknown
  training_hardware:
    explanation: unknown
    value: unknown
  training_time:
    explanation: unknown
    value: unknown
  type: model
  url: https://docs.cohere.com/docs/command-beta
- access:
    explanation: 'The model is available to the public through the Cohere Platform
      [[Cohere Platform]](https://cohere.ai/).

      '
    value: limited
  analysis: 'The model''s performance was analyzed on several safety benchmarks [[Model
    Card]](https://docs.cohere.ai/representation-card).

    '
  created_date:
    explanation: 'The date the Cohere API was announced on the news [[News Article]]
      (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).

      '
    value: 2021-11-15
  dependencies: []
  description: 'The Embedding Large (English) model is a language model trained by
    Cohere for tasks requiring embeddings.

    '
  feedback:
    explanation: 'There are no specific feedback mechanisms for the Generation model,
      but a generic contact email is provided on Cohere website, which is support
      at cohere.ai.

      '
    value: unknown
  intended_uses: 'The intended uses are stated as "estimating semantic similarity
    between two sentences, choosing a sentence which is most likely to follow another
    sentence, sentiment analysis, topic extraction, or categorizing user feedback"
    on the Cohere model card [[Model Card]](https://docs.cohere.ai/representation-card).

    '
  license:
    explanation: The model likely has a license specifically for Cohere's use.
    value: unknown
  modality: text; text
  model_card: https://docs.cohere.ai/representation-card
  monitoring: 'The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/representation-card).

    '
  name: Cohere Embed (English)
  nationality: Canada
  organization: Cohere
  prohibited_uses: 'The usage of the model is bound by the Cohere usage guidelines
    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive
    list of specific application violating these guidelines are: extraction of identity
    and demographic information, building purposefully opaque text classification
    systems, and "building downstream classifiers that serve as automated decision-making
    systems that have real-world consequences on people, where those decisions are
    made without a human-in-the-loop" [[Model Card]](https://docs.cohere.ai/representation-card).

    '
  quality_control: unknown
  size: unknown
  training_emissions:
    explanation: The emissions of the models are unknown.
    value: unknown
  training_hardware:
    explanation: 'The training hardware wasn''t explicitly announced, but it was reported
      that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch Article]]
      (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).

      '
    value: unknown
  training_time:
    explanation: The training time for the models are unknown.
    value: unknown
  type: model
  url: none
- access:
    explanation: 'The model is available to the public through the Cohere Platform
      [[Cohere Platform]](https://cohere.ai/).

      '
    value: limited
  analysis: 'The model''s performance was analyzed on several safety benchmarks [[Model
    Card]](https://docs.cohere.ai/representation-card).

    '
  created_date:
    explanation: 'The launch of the multilingual embedding model was announced in
      this [[blog post]] (https://txt.cohere.com/multilingual/).

      '
    value: 2022-12-12
  dependencies: []
  description: 'This model maps text from 100+ languages to a semantic vector space,
    positioning text with a similar meaning (regardless of language) in close proximity.

    '
  feedback:
    explanation: 'There are no specific feedback mechanisms for the Generation model,
      but a generic contact email is provided on Cohere website, which is support
      at cohere.ai.

      '
    value: unknown
  intended_uses: 'The intended uses are stated as "estimating semantic similarity
    between two sentences, choosing a sentence which is most likely to follow another
    sentence, sentiment analysis, topic extraction, or categorizing user feedback"
    on the Cohere model card [[Model Card]](https://docs.cohere.ai/representation-card).

    '
  license:
    explanation: 'The asset isn''t released, and hence the license is unknown.

      '''
    value: unknown
  modality: text; text
  model_card: https://docs.cohere.ai/representation-card
  monitoring: 'The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/representation-card).

    '
  name: Cohere Embed (Multilingual)
  nationality: Canada
  organization: Cohere
  prohibited_uses: 'The usage of the model is bound by the Cohere usage guidelines
    [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive
    list of specific application violating these guidelines are: extraction of identity
    and demographic information, building purposefully opaque text classification
    systems, and "building downstream classifiers that serve as automated decision-making
    systems that have real-world consequences on people, where those decisions are
    made without a human-in-the-loop" [[Model Card]](https://docs.cohere.ai/representation-card).

    '
  quality_control: unknown
  size: unknown
  training_emissions:
    explanation: The emissions of the models are unknown.
    value: unknown
  training_hardware:
    explanation: 'The training hardware wasn''t explicitly announced, but it was reported
      that Google Cloud teamed up with Cohere on a TPU partnership [[TechCrunch Article]]
      (https://techcrunch.com/2021/11/17/google-cloud-teams-up-with-nlp-startup-cohere-on-multi-year-partnership/).

      '
    value: unknown
  training_time:
    explanation: The training time for the models are unknown.
    value: unknown
  type: model
  url: https://txt.cohere.ai/multilingual/
- access:
    explanation: 'Users can access the Cohere API by signing up on the Cohere website
      [[Cohere Website]](https://cohere.ai/).

      '
    value: limited
  adaptation: unknown
  created_date:
    explanation: 'The date the Cohere API was announced on the news [[News Article]]
      (https://venturebeat.com/2021/11/15/openai-rival-cohere-launches-language-model-api/).

      '
    value: 2021-11-15
  dependencies:
  - Cohere Chat Endpoint
  - Cohere Generate Endpoint (legacy)
  - Cohere Embed Endpoint
  - Cohere Classify Endpoint
  - Cohere Summarize Endpoint
  description: 'Cohere API allows users to access the cohere language models and utilize
    them in their applications.

    '
  failures: unknown
  feedback: 'General feedback as well as the violations of the usage guidelines can
    be reported to Cohere at safety at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).

    '
  intended_uses: 'Intended to be used by developers who would like to incorporate
    NLP into their applications [[Cohere Website]](https://cohere.ai/).

    '
  license:
    explanation: '"Subject to these Terms of Use, we grant you anon-exclusive, non-transferable,
      non-sublicensable and revocable licence to use the Cohere Solution in accordance
      with these Terms of Use" - excerpt from the Terms of Service document.

      '
    value: custom
  monitoring: 'All applications developed using the Cohere API is subject to review
    by Cohere.

    '
  monthly_active_users: unknown
  name: Cohere API
  nationality: Canada
  organization: Cohere
  output_space: generation and embeddings
  prohibited_uses: 'The usage of the API is bound by the Cohere usage guidelines.
    Disallowed use cases include violence and threats, antisocial and antidemocratic
    uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,
    high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).

    '
  quality_control: 'The new users of the API get a limited access restricting the
    sizes of the models as well as the number of tokens that can be used. Users are
    required to go through an internal application to upgrade to full access [[Limited
    Access]](https://docs.cohere.ai/limited-access).

    '
  terms_of_service: https://cohere.ai/terms-of-use
  type: application
  url: https://cohere.ai/
  user_distribution: unknown
- access:
    explanation: 'Users can access the Cohere API by signing up on the Cohere website
      [[Cohere Website]](https://cohere.ai/).

      '
    value: limited
  adaptation: unknown
  created_date:
    explanation: The date the Chat API was announced in Cohere's blog [[Blog Post]](https://txt.cohere.com/chat-with-rag).
    value: 2023-09-28
  dependencies:
  - Cohere Command
  - Command R
  - Command R+
  description: This endpoint generates a text response to a user message in the context
    of a conversation, optionally grounding its generation with source material from
    tools like search engines, functions or databases, and attributing its responses
    with citations.
  failures: unknown
  feedback: 'General feedback as well as the violations of the usage guidelines can
    be reported to Cohere at safety at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).

    '
  intended_uses: 'Intended to be used by developers who would like to incorporate
    NLP into their applications [[Cohere Website]](https://cohere.ai/).

    '
  license: 'Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).

    '
  monitoring: 'All applications developed using the Cohere API is subject to review
    by Cohere.

    '
  monthly_active_users: unknown
  name: Cohere Chat Endpoint
  nationality: Canada
  organization: Cohere
  output_space: generation
  prohibited_uses: 'The usage of the API is bound by the Cohere usage guidelines.
    Disallowed use cases include violence and threats, antisocial and antidemocratic
    uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,
    high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).

    '
  quality_control: ''
  terms_of_service: https://cohere.ai/terms-of-use
  type: application
  url: https://docs.cohere.com/reference/chat
  user_distribution: unknown
- access:
    explanation: 'Users can access the Cohere API by signing up on the Cohere website
      [[Cohere Website]](https://cohere.ai/).

      '
    value: limited
  adaptation: unknown
  created_date:
    explanation: 'The launch of Generate was announced in this [[news article]] (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/).

      '
    value: 2021-11-15
  dependencies:
  - Cohere Base
  - Cohere Command
  description: 'This endpoint generates realistic text conditioned on a given input.

    '
  failures: unknown
  feedback: 'General feedback as well as the violations of the usage guidelines can
    be reported to Cohere at safety at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).

    '
  intended_uses: 'Intended to be used by developers who would like to incorporate
    NLP into their applications [[Cohere Website]](https://cohere.ai/).

    '
  license: 'Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).

    '
  monitoring: 'All applications developed using the Cohere API is subject to review
    by Cohere.

    '
  monthly_active_users: unknown
  name: Cohere Generate Endpoint (legacy)
  nationality: Canada
  organization: Cohere
  output_space: generation
  prohibited_uses: 'The usage of the API is bound by the Cohere usage guidelines.
    Disallowed use cases include violence and threats, antisocial and antidemocratic
    uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,
    high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).

    '
  quality_control: 'The new users of the API get a limited access restricting the
    sizes of the models as well as the number of tokens that can be used. Users are
    required to go through an internal application to upgrade to full access [[Limited
    Access]](https://docs.cohere.ai/limited-access).

    '
  terms_of_service: https://cohere.ai/terms-of-use
  type: application
  url: https://docs.cohere.ai/reference/generate
  user_distribution: unknown
- access:
    explanation: 'Users can access the Cohere API by signing up on the Cohere website
      [[Cohere Website]](https://cohere.ai/).

      '
    value: limited
  adaptation: unknown
  created_date:
    explanation: 'The launch of Embed was announced in this [[news article]] (https://venturebeat.com/uncategorized/openai-rival-cohere-launches-language-model-api/).

      '
    value: 2021-11-15
  dependencies:
  - Cohere Embed (Multilingual)
  - Cohere Embed (English)
  description: 'This endpoint returns text embeddings. An embedding is a list of floating
    point numbers that captures semantic information about the text that it represents.

    '
  failures: unknown
  feedback: 'General feedback as well as the violations of the usage guidelines can
    be reported to Cohere at safety at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).

    '
  intended_uses: 'Intended to be used by developers who would like to incorporate
    NLP into their applications [[Cohere Website]](https://cohere.ai/).

    '
  license: 'Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).

    '
  monitoring: 'All applications developed using the Cohere API is subject to review
    by Cohere.

    '
  monthly_active_users: unknown
  name: Cohere Embed Endpoint
  nationality: Canada
  organization: Cohere
  output_space: embedding
  prohibited_uses: 'The usage of the API is bound by the Cohere usage guidelines.
    Disallowed use cases include violence and threats, antisocial and antidemocratic
    uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,
    high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).

    '
  quality_control: 'The new users of the API get a limited access restricting the
    sizes of the models as well as the number of tokens that can be used. Users are
    required to go through an internal application to upgrade to full access [[Limited
    Access]](https://docs.cohere.ai/limited-access).

    '
  terms_of_service: https://cohere.ai/terms-of-use
  type: application
  url: https://docs.cohere.ai/reference/embed
  user_distribution: unknown
- access:
    explanation: 'Users can access the Cohere API by signing up on the Cohere website
      [[Cohere Website]](https://cohere.ai/).

      '
    value: limited
  adaptation: unknown
  created_date:
    explanation: 'The launch of Classify was announced in this [[blog post]] (https://txt.cohere.com/classification-is-now-available/).

      '
    value: 2022-05-05
  dependencies:
  - Cohere Embed (Multilingual)
  - Cohere Embed (English)
  description: 'This endpoint makes a prediction about which label best fits a specified
    text input. To make a prediction, Classify uses the provided examples of text
    + label pairs as a reference.

    '
  failures: unknown
  feedback: 'General feedback as well as the violations of the usage guidelines can
    be reported to Cohere at safety at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).

    '
  intended_uses: 'Intended to be used by developers who would like to incorporate
    NLP into their applications [[Cohere Website]](https://cohere.ai/).

    '
  license: 'Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).

    '
  monitoring: 'All applications developed using the Cohere API is subject to review
    by Cohere.

    '
  monthly_active_users: unknown
  name: Cohere Classify Endpoint
  nationality: Canada
  organization: Cohere
  output_space: embedding
  prohibited_uses: 'The usage of the API is bound by the Cohere usage guidelines.
    Disallowed use cases include violence and threats, antisocial and antidemocratic
    uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,
    high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).

    '
  quality_control: 'The new users of the API get a limited access restricting the
    sizes of the models as well as the number of tokens that can be used. Users are
    required to go through an internal application to upgrade to full access [[Limited
    Access]](https://docs.cohere.ai/limited-access).

    '
  terms_of_service: https://cohere.ai/terms-of-use
  type: application
  url: https://docs.cohere.ai/reference/classify
  user_distribution: unknown
- access:
    explanation: 'Users can access the Cohere API by signing up on the Cohere website
      [[Cohere Website]](https://cohere.ai/).

      '
    value: limited
  adaptation: unknown
  created_date:
    explanation: 'The beta launch of summarize was announced in this [[blog post]]
      (https://txt.cohere.com/summarize-beta/).

      '
    value: 2023-02-22
  dependencies: []
  description: 'This endpoint generates a succinct version of the original text that
    relays the most important information.

    '
  failures: unknown
  feedback: 'General feedback as well as the violations of the usage guidelines can
    be reported to Cohere at safety at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).

    '
  intended_uses: 'Intended to be used by developers who would like to incorporate
    NLP into their applications [[Cohere Website]](https://cohere.ai/).

    '
  license: 'Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).

    '
  monitoring: 'All applications developed using the Cohere API is subject to review
    by Cohere.

    '
  monthly_active_users: unknown
  name: Cohere Summarize Endpoint
  nationality: Canada
  organization: Cohere
  output_space: generation
  prohibited_uses: 'The usage of the API is bound by the Cohere usage guidelines.
    Disallowed use cases include violence and threats, antisocial and antidemocratic
    uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making,
    high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).

    '
  quality_control: 'The new users of the API get a limited access restricting the
    sizes of the models as well as the number of tokens that can be used. Users are
    required to go through an internal application to upgrade to full access [[Limited
    Access]](https://docs.cohere.ai/limited-access).

    '
  terms_of_service: https://cohere.ai/terms-of-use
  type: application
  url: https://docs.cohere.ai/reference/summarize
  user_distribution: unknown
- access:
    explanation: 'The model is available to the public through the Cohere Platform
      [[Cohere Platform]](https://cohere.ai/).

      '
    value: limited
  analysis: Achieves SOTA performances on trusted MTEB and BEIR benchmarks.
  created_date: 2023-11-02
  dependencies: []
  description: As of release, Cohere Embedv3 is Cohere's latest and most advanced
    embeddings model.
  feedback: https://huggingface.co/Cohere/Cohere-embed-english-v3.0/discussions
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: https://huggingface.co/Cohere/Cohere-embed-english-v3.0
  monitoring: none
  name: Cohere Embedv3 (English)
  nationality: Canada
  organization: Cohere
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://txt.cohere.com/introducing-embed-v3/
- access: open
  analysis: Evaluated on standard LLM and multilingual benchmarks in comparison to
    SotA models.
  created_date: 2024-02-12
  dependencies:
  - mT5
  - Aya Dataset
  description: Aya is a massively multilingual generative language model that follows
    instructions in 101 languages of which over 50% are considered as lower-resourced.
  feedback: https://huggingface.co/CohereForAI/aya-101/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/CohereForAI/aya-101
  monitoring: unknown
  name: Aya
  nationality: unknown
  organization: Cohere for AI, Cohere, Brown University, Carnegie Mellon University,
    MIT
  prohibited_uses: ''
  quality_control: ''
  size: 13B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2402.07827.pdf
- access:
    explanation: Model weights are made available through the [Hugging Face Model
      Hub](https://huggingface.co/CohereForAI/c4ai-command-r-v01).
    value: open
  analysis: Command R was evaluated on standard English and multilingual (translated
    MMLU, FLoRES) benchmarks. It was also evaluated on KILT, HotpotQA and Bamboogle.
  created_date:
    explanation: The date that Command R was announced.
    value: 2024-03-11
  dependencies: []
  description: Command R is a scalable generative model targeting RAG and Tool Use
    to enable production-scale AI for enterprise. It is an auto-regressive large language
    model that uses an optimized transformer architecture. After pretraining, supervised
    fine-tuning (SFT) and preference training were applied to align model behavior
    to human preferences for helpfulness and safety. Command R is part of a family
    of open weight releases from Cohere and Cohere For AI. Its larger companion model
    is [Command R+](https://huggingface.co/CohereForAI/c4ai-command-r-plus).
  feedback: '[https://huggingface.co/CohereForAI/c4ai-command-r-v01/discussions](https://huggingface.co/CohereForAI/c4ai-command-r-v01/discussions)'
  intended_uses: 'Command R is optimized to perform well up to 128K context length
    and in the following languages: English, French, Spanish, Italian, German, Brazilian
    Portuguese, Japanese, Korean, Simplified Chinese, and Arabic. It has been trained
    for conversational tool use and grounded generation capabilities. The open weights
    release of Command R is intended to make community-based research efforts more
    accessible.'
  license: CC BY NC 4.0
  modality: text; text
  model_card: https://huggingface.co/CohereForAI/c4ai-command-r-v01
  monitoring: unknown
  name: Command R
  nationality: Canada
  organization: Cohere
  prohibited_uses: 'Usage of this model requires adherence to [C4AI''s Acceptable
    Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy). Usage via
    the Cohere Platform is subject to Cohere''s [Terms of Use](https://cohere.com/terms-of-use)
    and [Usage Guidelines](https://docs.cohere.com/docs/usage-guidelines). A non-exhaustive
    list of disallowed use cases include:

    - Violence and harm

    - Harassment and abuse

    - Sexual exploitation, harm, or abuse

    - Sensitve information

    - Fraud and deception

    - Synthetic data for commercial uses


    Misuse should be reported to Cohere at safety at cohere.ai.'
  quality_control: Comprehensive guidance for working with Command R's tool use and
    grounded generation prompt template can be found in Cohere's [documentation](https://docs.cohere.com/docs/prompting-command-r).
  size: 35B parameters (dense)
  terms_of_service: https://cohere.ai/terms-of-use
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://txt.cohere.com/command-r/
- access: open
  analysis: unknown
  created_date: 2024-02-09
  datasheet: https://huggingface.co/datasets/CohereForAI/aya_dataset
  dependencies: []
  description: The Aya Dataset is a dataset that consists of original human-curated
    prompt-completion pairs written by fluent speakers of 65 languages.
  excluded: ''
  feedback: https://huggingface.co/datasets/CohereForAI/aya_dataset/discussions
  included: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text
  monitoring: unknown
  name: Aya Dataset
  nationality: unknown
  organization: Cohere for AI, Beijing Academy of Artificial Intelligence, Cohere,
    Binghamton University
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 204k human-annotated prompt-completion pairs
  type: dataset
  url: https://arxiv.org/pdf/2402.06619.pdf
- access: limited
  analysis: Evaluated on code retrieval and data retrieval capabilities, with improvements
    compared to the standard in both.
  created_date: 2024-04-11
  dependencies: []
  description: Rerank 3 is a new foundation model for efficient enterprise search
    and retrieval with 4k context length.
  feedback: none
  intended_uses: Efficient enterprise search and retrieval.
  license: unknown
  modality: text; text
  model_card: none
  monitoring: unknown
  name: Rerank 3
  nationality: Canada
  organization: Cohere
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://cohere.com/blog/rerank-3
- access: open
  analysis: Evaluated across 23 languages with the highest results in all tasks and
    languages compared to other multilingual language models.
  created_date: 2024-05-31
  dependencies: []
  description: Aya 23 is an open weights research release of an instruction fine-tuned
    model with multilingual capabilities. It focuses on pairing a highly performant
    pre-trained Command family of models with the recently released Aya Collection.
    This model supports 23 languages.
  feedback: https://huggingface.co/CohereForAI/aya-23-35B/discussions
  intended_uses: This model is designed to be used for multilingual tasks covering
    23 languages.
  license: CC-BY-NC
  modality: text; text
  model_card: https://huggingface.co/CohereForAI/aya-23-35B
  monitoring: unknown
  name: Aya 23
  nationality: Canada
  organization: Cohere
  prohibited_uses: unknown
  quality_control: unknown
  size: 35B parameters
  training_emissions: unknown
  training_hardware: TPUv4 chips with up to 128 pod slices
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2405.15032
- access:
    explanation: Model weights are made available through the [Hugging Face Model
      Hub](https://huggingface.co/CohereForAI/c4ai-command-r-plus).
    value: open
  analysis: Command R+ was evaluated on standard English and multilingual (e.g. FLoRES,
    WMT23) benchmarks. It was also evaluated on multihop reasoning tasks (e.g. HotpotQA,
    Bamboogle, StrategyQA), and tool use (e.g. ToolTalk, Berkeley Function-Calling).
  created_date:
    explanation: The date that Command R+ was announced.
    value: 2024-04-03
  dependencies: []
  description: Command R+ is a state-of-the-art RAG-optimized model designed to tackle
    enterprise-grade workloads. It is an auto-regressive large language model that
    uses an optimized transformer architecture. After pretraining, supervised fine-tuning
    (SFT) and preference training were applied to align model behavior to human preferences
    for helpfulness and safety. Command R+ is part of a family of open weight releases
    from Cohere and Cohere For AI. Its smaller companion model is [Command R](https://huggingface.co/CohereForAI/c4ai-command-r-v01).
  feedback: '[https://huggingface.co/CohereForAI/c4ai-command-r-plus/discussions](https://huggingface.co/CohereForAI/c4ai-command-r-plus/discussions)'
  intended_uses: 'Command R+ is optimized to perform well up to 128K context length
    and in the following languages: English, French, Spanish, Italian, German, Brazilian
    Portuguese, Japanese, Korean, Simplified Chinese, and Arabic. It has been trained
    for conversational tool use and grounded generation capabilities. The open weights
    release of Command R+ is intended to make community-based research efforts more
    accessible.'
  license: CC BY NC 4.0
  modality: text; text
  model_card: https://huggingface.co/CohereForAI/c4ai-command-r-plus
  monitoring: unknown
  name: Command R+
  nationality: Canada
  organization: Cohere
  prohibited_uses: 'Usage of this model requires adherence to [C4AI''s Acceptable
    Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy). Usage via
    the Cohere Platform is subject to Cohere''s [Terms of Use](https://cohere.com/terms-of-use)
    and [Usage Guidelines](https://docs.cohere.com/docs/usage-guidelines). A non-exhaustive
    list of disallowed use cases include:

    - Violence and harm

    - Harassment and abuse

    - Sexual exploitation, harm, or abuse

    - Sensitve information

    - Fraud and deception

    - Synthetic data for commercial uses


    Misuse should be reported to Cohere at safety at cohere.ai.'
  quality_control: Comprehensive guidance for working with Command R+'s tool use and
    grounded generation prompt template can be found in Cohere's [documentation](https://docs.cohere.com/docs/prompting-command-r).
  size: 104B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://txt.cohere.com/command-r-plus-microsoft-azure/


================================================
FILE: assets/columbia.yaml
================================================
- access: open
  analysis: Evaluated on wide range of tasks using own evaluation benchmarks.
  created_date: 2022-11-20
  dependencies:
  - AlphaFold2
  - OpenProteinSet
  description: OpenFold is an open source recreation of AlphaFold2.
  feedback: ''
  intended_uses: ''
  license: CC BY 4.0
  modality: amino acid sequence; protein structure
  model_card: ''
  monitoring: ''
  name: OpenFold
  nationality: USA
  organization: Columbia
  prohibited_uses: ''
  quality_control: ''
  size: ''
  training_emissions: unknown
  training_hardware: Single A100 NVIDIA GPU
  training_time: 50,000 GPU hours
  type: model
  url: https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2
- access: open
  analysis: Evaluated on the object hallucination benchmark and compared to GPT-4V.
  created_date: 2023-10-11
  dependencies:
  - CLIP
  - Vicuna
  description: Ferret is a Multimodal Large Language Model (MLLM) capable of understanding
    spatial referring of any shape or granularity within an image and accurately grounding
    open-vocabulary descriptions.
  feedback: none
  intended_uses: ''
  license:
    explanation: License can be found at https://github.com/apple/ml-ferret/blob/main/LICENSE
    value: Apple
  modality: image, text; image, text
  model_card: none
  monitoring: none
  name: Ferret
  nationality: unknown
  organization: Columbia, Apple AI
  prohibited_uses: ''
  quality_control: ''
  size: 13B parameters
  training_emissions: unknown
  training_hardware: 8 A100 GPUs
  training_time: 2.5 to 5 days
  type: model
  url: https://arxiv.org/pdf/2310.07704.pdf


================================================
FILE: assets/compute_01_research.yaml
================================================
- access:
    explanation: to ensure AGI will be open-source, transparent, and accessible
    value: open
  analysis: ''
  created_date: 2024-10-11
  dependencies:
  - DeepMind's Distributed Low-Communication (DiLoCo)
  description: INTELLECT-1 is a decentralized training model with 10 billion parameters,
    designed to be open-source and transparent, allowing global contributors to participate
    in its computation.
  feedback: unknown
  intended_uses: Training globally distributed AI models in decentralized environments.
  license: Apache 2.0
  modality: text; text
  model_card: unknown
  monitoring: unknown
  name: INTELLECT-1
  nationality: USA
  organization: PrimeIntellect
  prohibited_uses: ''
  quality_control: ''
  size:
    explanation: a 10-billion-parameter model
    value: 10B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.primeintellect.ai/blog/intellect-1


================================================
FILE: assets/continue.yaml
================================================
- access: open
  adaptation: ''
  created_date: 2023-07-26
  dependencies:
  - GPT-4 API
  - Code Llama
  - Claude API
  - WizardCoder
  - PaLM API
  description: "Continue is the open-source autopilot for software development. It\
    \ is an IDE extension that brings the power of ChatGPT to VS Code and JetBrains.\
    \ It\u2019s built to be deeply customizable and continuously learn from development\
    \ data."
  failures: ''
  feedback: ''
  intended_uses: ''
  license: Apache 2.0
  monitoring: ''
  monthly_active_users: ''
  name: Continue
  nationality: USA
  organization: Continue Dev, Inc.
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://github.com/continuedev/continue/blob/main/LICENSE
  type: application
  url: https://continue.dev
  user_distribution: ''


================================================
FILE: assets/cresta.yaml
================================================
- access: closed
  analysis: Outperforms GPT-4 in common sense and reasoning tasks on the basis of
    both efficiency and accuracy.
  created_date: 2023-06-20
  dependencies:
  - GPT-4
  - Claude
  - Falcon-40B
  description: Ocean-1 is the culmination of Cresta's experience in deploying generative
    AI systems for large enterprises and signifies their latest milestone in advancing
    the cutting edge AI technology for customer facing conversations.
  feedback: none
  intended_uses: Acting as a contact center chatbot agent.
  license: unknown
  modality: text; text
  model_card: none
  monitoring: unknown
  name: Ocean-1
  nationality: USA
  organization: Cresta
  prohibited_uses: none
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://cresta.com/blog/introducing-ocean-1-worlds-first-contact-center-foundation-model/


================================================
FILE: assets/databricks.yaml
================================================
- access:
    explanation: 'Model training code can be accessed at the official Dolly repository.
      Trained weights can be requested at hello-dolly@databricks.com. [[Dolly Repository]](https://github.com/databrickslabs/dolly).

      '
    value: open
  analysis: '"We evaluated Dolly on the instruction-following capabilities described
    in the InstructGPT paper that ChatGPT is based on and found that it exhibits many
    of the same qualitative capabilities, including text generation, brainstorming
    and open Q&A." [[Databricks Blog Post]] (https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).

    '
  created_date:
    explanation: 'The date the model was announced in the [[Cerebras blog post]](https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).

      '
    value: 2023-03-24
  dependencies:
  - GPT-J
  - Alpaca dataset
  description: "\"Databricks\u2019 Dolly, a large language model trained on the Databricks\n\
    \ Machine Learning Platform, demonstrates that a two-years-old open source\n model\
    \ (GPT-J) can, when subjected to just 30 minutes of fine tuning on a\n focused\
    \ corpus of 50k records (Stanford Alpaca), exhibit surprisingly\n high quality\
    \ instruction following behavior not characteristic of the\n foundation model\
    \ on which it is based.\"\n [[Dolly Repository]](https://github.com/databrickslabs/dolly).\n"
  feedback: https://github.com/databrickslabs/dolly/issues
  intended_uses: '"Dolly is intended exclusively for research purposes and is not
    licensed for commercial use." [[Limitations]](https://github.com/databrickslabs/dolly#limitations).

    '
  license: Apache 2.0
  modality: text; text
  model_card: ''
  monitoring: none
  name: Dolly
  nationality: USA
  organization: Databricks
  prohibited_uses: 'Authors note the following limitations of the model: "The Dolly
    model family is under active development, and so any list of shortcomings is unlikely
    to be exhaustive, but we include known limitations and misfires here as a means
    to document and share our preliminary findings with the community. In particular,
    dolly-6b struggles with syntactically complex prompts, mathematical operations,
    factual errors, dates and times, open-ended question answering, hallucination,
    enumerating lists of specific length, and stylistic mimicry." [[Limitations]](https://github.com/databrickslabs/dolly#limitations).

    '
  quality_control: none
  size: 6B parameters (dense)
  training_emissions: unknown
  training_hardware:
    explanation: 'According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview)

      '
    value: A single NDasrA100_v4 machine with 8x A100 40GB GPUs
  training_time:
    explanation: 'According to [[Model Overview]](https://github.com/databrickslabs/dolly#model-overview)

      '
    value: 30 minutes
  type: model
  url: https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html
- access: open
  analysis: DBRX outperforms established open-source and open-weight base models on
    the Databricks Model Gauntlet, the Hugging Face Open LLM Leaderboard, and HumanEval.
    Full evaluation details can be found in the corresponding technical blog post.
  created_date: 2024-03-27
  dependencies: []
  description: DBRX is a transformer-based decoder-only large language model (LLM)
    that was trained using next-token prediction by Databricks. It uses a fine-grained
    mixture-of-experts (MoE) architecture with 132B total parameters of which 36B
    parameters are active on any input. DBRX only accepts text-based inputs and produces
    text-based outputs.
  feedback: https://huggingface.co/databricks/dbrx-base/discussions
  intended_uses: DBRX models are open, general-purpose LLMs intended and licensed
    for both commercial and research applications. They can be further fine-tuned
    for various domain-specific natural language and coding tasks.
  license: Databricks Open Model License
  modality: text; text
  model_card: https://huggingface.co/databricks/dbrx-base
  monitoring: unknown
  name: DBRX
  nationality: USA
  organization: Databricks
  prohibited_uses: DBRX models are not intended to be used out-of-the-box in non-English
    languages, and do not support native code execution, function calling or any use
    that violates applicable laws or regulations or is otherwise prohibited by the
    Databricks Open Model License and Databricks Open Model Acceptable Use Policy.
  quality_control: Recommendations provided for retrieval augmented generation (RAG)
    in scenarios where accuracy and fidelity are important and additional testing
    around safety in the context of the specific application and domain is suggested.
  size: 132B parameters (sparse)
  training_emissions: unknown
  training_hardware: 3072 NVIDIA H100s connected by 3.2Tbps Infiniband
  training_time: 3 months
  type: model
  url: https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm


================================================
FILE: assets/deci.yaml
================================================
- access: open
  analysis: Evaluated on the OpenLLM benchmarks and, on release, outperforms all other
    7B models on the OpenLLM Leaderboard.
  created_date: 2023-12-12
  dependencies: []
  description: DeciLM is a LLM that on release ranks as the fastest and most accurate
    model of its size.
  feedback: none
  intended_uses: This model is intended for commercial and research use in English
    and can be fine-tuned for use in other languages.
  license: Apache 2.0
  modality: text; text
  model_card: https://deci.ai/model-zoo/decilm-7b/
  monitoring: unknown
  name: DeciLM
  nationality: Israel
  organization: Deci
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: NVIDIA A10 GPUs
  training_time: unknown
  type: model
  url: https://deci.ai/blog/introducing-decilm-7b-the-fastest-and-most-accurate-7b-large-language-model-to-date


================================================
FILE: assets/deepmind.yaml
================================================
- access:
    explanation: 'The dataset access is limited to DeepMind researchers [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).

      '
    value: closed
  analysis: 'MassiveText data was analyzed for toxicity, language distribution, URL
    breakdown, and tokenizer compression rates on the subsets [[Section A.2]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.2).

    '
  created_date:
    explanation: 'The date that Gopher was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).

      '
    value: 2021-12-08
  datasheet: https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5
  dependencies: []
  description: 'The MassiveText dataset was used to train the Gopher model.

    '
  excluded: 'Documents that are not in English are excluded.

    '
  feedback:
    explanation: 'The internal feedback mechanisms for WebText are unknown [[Model
      Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).

      '
    value: unknown
  included: 'MassiveText data come from 6 sources: MassiveWeb (48%), Books (27%),
    C4 (10%), News (10%), GitHub (3%), and Wikipedia (2%). MassiveWeb is a web text
    corpus curated for MassiveText.

    '
  intended_uses: 'Pre-training of language models by DeepMind researchers [[Model
    Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).

    '
  license:
    explanation: 'The model likely has a license specifically for DeepMind''s use,
      based on the information provided in the datasheet [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5).

      '
    value: unknown
  modality: code, text
  monitoring:
    explanation: 'There is no information on how DeepMind is internally monitoring
      the use of the dataset.

      '
    value: unknown
  name: MassiveText
  nationality: UK
  organization: Google Deepmind
  prohibited_uses:
    explanation: 'There are no known prohibited uses of the dataset, but the authors
      state that it should not be used for training models with multilingual capabilities
      as it only contains the English language [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).

      '
    value: unknown
  quality_control: "The authors use simple heuristics for filtering low quality documents\
    \ as opposed to relying on a classifier based on a \"gold\" set such as the English\
    \ Wikipedia, which could \"inadvertently bias towards a certain demographic or\
    \ erase certain dialects or sociolects from representation.\" MassiveWeb subset\
    \ was filtered using Google\u2019s SafeSearch filter, preferring it over to word\
    \ filters that \"disproportinately filter out inoffensive content associated with\
    \ minority groups. MassiveWeb was filtered further for word or phrase repetitions.\
    \ All the subsets were filtered for document deduplication and test set contamination\"\
    \ [[Appendix A]](https://arxiv.org/pdf/2112.11446.pdf#appendix.A).\n"
  sample: []
  size: 10.5 TB
  type: dataset
  url: https://arxiv.org/pdf/2112.11446.pdf
- access:
    explanation: 'The dataset access is limited to DeepMind researchers [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).

      '
    value: closed
  analysis: ''
  created_date:
    explanation: 'The date that Flamingo was posted to arXiv [[arXiv]] (https://arxiv.org/pdf/2204.14198.pdf).

      '
    value: 2022-04-29
  datasheet: https://arxiv.org/pdf/2204.14198.pdf#appendix.F
  dependencies: []
  description: 'M3W (MassiveWeb) is dataset used to train Flamingo, and other vision-language
    models and was created by researchers and engineers.

    '
  excluded: unknown
  feedback:
    explanation: 'No feedback mechanism is mentioned in the datasheet [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).

      '
    value: none
  included: 'M3W has interleaved images (185M) and text (182GB) from the web.

    '
  intended_uses: 'Pre-training of vision and language models by DeepMind researchers
    [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).

    '
  license:
    explanation: 'The model likely has a license specifically for DeepMind''s use,
      based on the information provided in the datasheet [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).

      '
    value: unknown
  modality: code, text
  monitoring:
    explanation: 'There is no information on how DeepMind is internally monitoring
      the use of the dataset.

      '
    value: unknown
  name: M3W
  nationality: UK
  organization: Google Deepmind
  prohibited_uses:
    explanation: 'There are no known prohibited uses of the dataset [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).

      '
    value: unknown
  quality_control: 'The authors provide a basic description of data processing and
    cleaning.

    '
  sample: []
  size: 182GB Text, 185M Images
  type: dataset
  url: https://arxiv.org/pdf/2204.14198.pdf
- access:
    explanation: 'The full dataset is not directly provided by the authors, though
      some underlying data is public whereas others (e.g. MassiveText) is not.

      '
    value: closed
  analysis: 'The Gato dataset compiles many datasets introduced in prior works, with
    associated analyses.

    '
  created_date:
    explanation: 'The date that Gato was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/a-generalist-agent).

      '
    value: 2022-05-12
  datasheet: none
  dependencies:
  - MassiveText
  description: 'The Gato datasets are a collection of data used to train the Gato
    model.

    '
  excluded:
    explanation: 'No specific filtering is mentioned in the Gato paper.

      '
    value: none
  feedback:
    explanation: 'There is no mention on feedback mechanisms either internally or
      externally.

      '
    value: none
  included: 'The full composition of the dataset across individual sources can be
    found in the paper.

    '
  intended_uses:
    explanation: 'There are no known intended uses of the dataset stated by authors
      beyond training Gato.

      '
    value: unknown
  license:
    explanation: 'The datasets have individual licenses, but no overall license is
      mentioned by the authors.

      '
    value: unknown
  modality: image, text, robotics trajectories, simulated control tasks
  monitoring:
    explanation: 'There is no mention on how DeepMind is internally monitoring the
      use of the dataset.

      '
    value: none
  name: Gato dataset
  nationality: UK
  organization: Google Deepmind
  prohibited_uses:
    explanation: 'There are no known prohibited uses of the dataset stated by authors.

      '
    value: unknown
  quality_control: unknown
  sample: []
  size: 10.5 TB Text, 2.2B Text-Image pairs, 1.5T tokens of simulated control, 500k
    robotics trajectories
  type: dataset
  url: https://www.deepmind.com/blog/a-generalist-agent
- access:
    explanation: Models are available for download from the [[Github repository]](https://github.com/deepmind/alphafold)
    value: open
  analysis: ''
  created_date:
    explanation: The date the model paper was released
    value: 2021-07-15
  dependencies:
  - Protein Data Bank
  description: AlphaFold2 is a protein language model trained on protein sequences
  feedback: ''
  intended_uses: ''
  license:
    explanation: The license is provided in the [[Github repository]](https://github.com/deepmind/alphafold)
    value: Apache 2.0
  modality: amino acid sequence; protein structure
  model_card: none
  monitoring: ''
  name: AlphaFold2
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: ''
  quality_control: ''
  size: 93M parameters (dense)
  training_emissions: ''
  training_hardware:
    explanation: Described in paper
    value: 128 TPUv3 cores
  training_time:
    explanation: Training takes "approximately 1 week" and finetuning takes "approximately
      4 days"
    value: 11 days
  type: model
  url: https://www.nature.com/articles/s41586-021-03819-2
- access:
    explanation: 'The model has not been released and no discussion of release is
      stated in the model card [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).

      '
    value: closed
  analysis: 'Model performance was evaluated on image and video datasets primarily,
    including dialogue.

    '
  created_date:
    explanation: 'The date that Flamingo was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2204.14198).

      '
    value: 2022-04-29
  dependencies:
  - M3W
  - ALIGN
  - LTIP
  - VTP
  - Chinchilla
  description: 'Flamingo is a Visual Language Model using the Transformer architecture
    that is intended for few-shot learning.

    '
  feedback:
    explanation: 'No contact information is provided for feedback in the model card
      [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).

      '
    value: none
  intended_uses: 'The intended uses are stated in the model card: "The primary use
    is research on visual language models (VLM), including: research on VLM applications
    like classification, captioning or visual question answering, understanding how
    strong VLMs can contribute to AGI, advancing fairness and safety research in the
    area of multimodal research, and understanding limitations of current large VLMs."
    [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).

    '
  license:
    explanation: 'No license is provided in the model card [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).

      '
    value: unknown
  modality: image, text; text
  model_card: https://arxiv.org/pdf/2204.14198.pdf#appendix.E
  monitoring:
    explanation: 'There is no information on how DeepMind is internally monitoring
      the use of the model.

      '
    value: unknown
  name: Flamingo
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: 'The model card lists the following as out of scope uses of the
    model: "Uses of the model for visually conditioned language generation in harmful
    or deceitful settings. Broadly speaking, the model should not be used for downstream
    applications without further safety and fairness mitigations specific to each
    application." [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).

    '
  quality_control:
    explanation: 'Reported in the mitigations in the model card [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).

      '
    value: none
  size: 80B parameters (dense)
  training_emissions:
    explanation: 'Authors do not report the training emissions.

      '
    value: unknown
  training_hardware:
    explanation: 'Reported in the paper checklist [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf).

      '
    value: TPU
  training_time:
    explanation: 'Reported in the paper checklist [[Checklist]](https://arxiv.org/pdf/2204.14198.pdf).

      '
    value: 15 days on 1536 TPUs
  type: model
  url: https://arxiv.org/pdf/2204.14198.pdf
- access:
    explanation: DeepMind does not provide access to AlphaCode to external researchers
    value: closed
  analysis: ''
  created_date:
    explanation: The date the model paper was released
    value: 2022-02-02
  dependencies: []
  description: AlphaCode is an autoregressive language model trained on code
  feedback: ''
  intended_uses: ''
  license:
    explanation: ''
    value: unknown
  modality: text; code
  model_card: ''
  monitoring: ''
  name: AlphaCode
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: ''
  quality_control: ''
  size: 41B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2203.07814
- access:
    explanation: 'The model access is limited to DeepMind researchers. The model won''t
      be released to the public [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).

      '
    value: closed
  analysis: 'Model performance was evaluated and analyzed on 152 NLP tasks including:
    Language Modelling (20), Reading Comprehension (3), Fact Checking (3), Question
    Answering (3), Common Sense (4), MMLU (57), BIG-bench (62) [[Section 4]](https://arxiv.org/pdf/2112.11446.pdf#section.4);
    on toxicity and bias datasets [[Section 5]](https://arxiv.org/pdf/2112.11446.pdf#section.5);
    and on dialogue tasks [[Section 6]](https://arxiv.org/pdf/2112.11446.pdf#section.6).

    '
  created_date:
    explanation: 'The date that Gopher was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/language-modelling-at-scale-gopher-ethical-considerations-and-retrieval).

      '
    value: 2021-12-08
  dependencies:
  - MassiveText
  description: 'Gopher is an autoregressive language model based on the Transformer
    architecture with two modifications: using RMSNorm instead of LayerNorm and using
    relative positional encoding scheme instead of absolute positional encodings [[Section
    3]](https://arxiv.org/pdf/2112.11446.pdf#subsection.3.1).

    '
  feedback: 'The feedback for the model can be provided at the email linked in the
    model card, geoffreyi at google.com [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).

    '
  intended_uses: 'The intended uses are stated in the Gopher model card: "The primary
    use is research on language models, including: research on NLP applications like
    machine translation and question answering, understanding how strong language
    models can contribute to AGI, advancing fairness and safety research, and understanding
    limitations of current LLMs" [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).

    '
  license:
    explanation: 'The model likely has a license specifically for DeepMind''s use,
      based on the information provided in the model card [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).

      '
    value: unknown
  modality: text; code
  model_card: https://arxiv.org/pdf/2112.11446.pdf#appendix.B
  monitoring:
    explanation: 'There is no information on how DeepMind is internally monitoring
      the use of the model.

      '
    value: unknown
  name: Gopher
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: 'The model card lists the following as out of scope uses of the
    model: "for language generation in harmful or deceitful settings. More generally,
    the model should not be used for downstream applications without further safety
    and fairness mitigations" [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).

    '
  quality_control: none
  size: 280B parameters (dense)
  training_emissions:
    explanation: 'The training emission estimate from the paper [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F)

      '
    value: 380 tCO2e
  training_hardware:
    explanation: 'Reported in the paper [[Section F]](https://arxiv.org/pdf/2112.11446.pdf#appendix.F).

      '
    value: TPUv3 pods
  training_time:
    explanation: 'The authors reported the training petaflops for all of the 4 different
      sizes of the model. For the 280B parameter model, the petaflops was reported
      as 6.31E+08. We compute the Gopher''s training time in petaflop/s-day as 6.31E+08
      / (60*60*24) = 7303.24 petaflop/s-day.

      '
    value: 7303.24 petaflop/s-day
  type: model
  url: https://arxiv.org/pdf/2112.11446.pdf
- access:
    explanation: 'The model access is limited to DeepMind researchers. The model won''t
      be released to the public [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).

      '
    value: closed
  analysis: 'Model performance was evaluated and analyzed on many NLP tasks including
    language modeling, reading comprehension, question answering, commonsense-intensive
    tasks, and the BIG-Bench and MMLU meta-benchmarks.

    '
  created_date:
    explanation: 'The date that Chinchilla was posted on arXiv [[arXiv]] (https://arxiv.org/abs/2203.15556).

      '
    value: 2022-03-29
  dependencies:
  - MassiveText
  description: 'Chinchilla is an autoregressive language model based on the Transformer
    architecture with improved scaling laws.

    '
  feedback: 'The feedback for the model can be provided at the email linked in the
    model card, {jordanhoffmann, sborgeaud, amensch,sifre} at deepmind.com [[Model
    Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).

    '
  intended_uses: 'The intended uses are stated in the Chinchilla model card: "The
    primary use is research on language models, including: research on the scaling
    behaviour of language models along with those listed in Gopher paper" [[Model
    Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).

    '
  license:
    explanation: 'The model likely has a license specifically for DeepMind''s use,
      based on the information provided in the model card [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).

      '
    value: unknown
  modality: text; code
  model_card: https://arxiv.org/pdf/2203.15556.pdf
  monitoring:
    explanation: 'There is no information on how DeepMind is internally monitoring
      the use of the model.

      '
    value: Unknown
  name: Chinchilla
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: 'The model card lists the following as out of scope uses of the
    model: "for language generation in harmful or deceitful settings. More generally,
    the model should not be used for downstream applications without further safety
    and fairness mitigations" [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).

    '
  quality_control: none
  size: 70B parameters (dense)
  training_emissions:
    explanation: 'Authors do not report the training emissions.

      '
    value: Unknown
  training_hardware:
    explanation: 'Reported in the paper [[Section 4]](https://arxiv.org/pdf/2203.15556.pdf).

      '
    value: TPUv3/TPUv4 pods
  training_time:
    explanation: 'The authors reported the training petaflops for all models, including
      hypothetical larger models. For the 70B parameter model, the petaflops was reported
      as 5.76E+08. We compute the Gopher''s training time in petaflop/s-day as 5.76E+08
      / (60*60*24) = 6666.66 petaflop/s-day.

      '
    value: 7303.24 petaflop/s-day
  type: model
  url: https://arxiv.org/pdf/2203.15556.pdf
- access:
    explanation: 'The model access is limited to DeepMind researchers. The model won''t
      be released to the public [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).

      '
    value: closed
  analysis: 'Model performance was evaluated on simulated and robotics task primarily,
    including out-of-distribution and skill generalization.

    '
  created_date:
    explanation: 'The date that Gato was announced [[DeepMind Blog Post]] (https://www.deepmind.com/blog/a-generalist-agent).

      '
    value: 2022-05-12
  dependencies:
  - Gato dataset
  description: 'Gato is a generalist agent based on sequence modeling using the Transformer
    architecture to implement multi-modal, multi-task, multi-embodiment generalist
    policy.

    '
  feedback: 'The feedback for the model can be provided at the email linked in the
    model card, reedscot at google.com [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).

    '
  intended_uses: 'The intended uses are stated in the Gopher model card: "Learn to
    accomplish a wide variety of tasks from expert demonstrations, such as playing
    video games, controlling simulated embodiments, and real world block stacking."
    [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).

    '
  license: unknown
  modality: image, text; image, text, robotics trajectories
  model_card: https://openreview.net/pdf?id=1ikK0kHjvj#appendix.B
  monitoring:
    explanation: 'There is no information on how DeepMind is internally monitoring
      the use of the model.

      '
    value: unknown
  name: Gato
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: 'The model card lists the following as out of scope uses of the
    model: "Not intended for commercial or production use. Military uses are strictly
    prohibited." [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).

    '
  quality_control:
    explanation: 'Reported in the mitigations in the model card [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).

      '
    value: none
  size: 1.2B parameters (dense)
  training_emissions:
    explanation: 'Authors do not report the training emissions.

      '
    value: unknown
  training_hardware:
    explanation: 'Reported in the paper [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj).

      '
    value: 16x16 TPU v3 slice
  training_time:
    explanation: 'Reported in the paper [[Section 2.3]](https://openreview.net/pdf?id=1ikK0kHjvj).

      '
    value: 4 days on a 16x16 TPU v3 slice
  type: model
  url: https://www.deepmind.com/blog/a-generalist-agent
- access: closed
  analysis: ''
  created_date: 2022-09-28
  dependencies:
  - Chinchilla
  - Google Search
  - Sparrow Rule reward model
  - Sparrow Preference reward model
  description: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'The asset isn''t released, and hence the license is unknown.

      '''
    value: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Sparrow
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: ''
  quality_control: ''
  size: 70B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2209.14375
- access: closed
  analysis: ''
  created_date: 2021-12-08
  dependencies:
  - MassiveText
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: RETRO
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: ''
  quality_control: ''
  size: 7.5B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2112.04426
- access: closed
  analysis: ''
  created_date: 2022-09-28
  dependencies:
  - Chinchilla
  - Sparrow adversarial probing dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Sparrow Rule reward model
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: ''
  quality_control: ''
  size: 70B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2209.14375
- access: closed
  analysis: ''
  created_date: 2022-09-28
  dependencies:
  - Chinchilla
  - Sparrow response preference dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Sparrow Preference reward model
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: ''
  quality_control: ''
  size: 70B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2209.14375
- access: closed
  analysis: ''
  created_date: 2022-09-28
  datasheet: ''
  dependencies:
  - Chinchilla
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: Sparrow adversarial probing dataset
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 27k ratings
  type: dataset
  url: https://arxiv.org/abs/2209.14375
- access: closed
  analysis: ''
  created_date: 2022-09-28
  datasheet: ''
  dependencies:
  - Chinchilla
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: Sparrow response preference dataset
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 72k comparisons
  type: dataset
  url: https://arxiv.org/abs/2209.14375
- access: closed
  analysis: ''
  created_date: 2022-03-16
  dependencies:
  - Gopher
  - Google Search
  - GopherCite reward model
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: GopherCite
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: ''
  quality_control: ''
  size: 280B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
- access: closed
  analysis: ''
  created_date: 2022-03-16
  dependencies:
  - Gopher
  - GopherCite Preference dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: GopherCite reward model
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
- access: closed
  analysis: ''
  created_date: 2022-03-16
  datasheet: ''
  dependencies:
  - Gopher
  - Google Search
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: GopherCite Preference dataset
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 33k response pairs
  type: dataset
  url: https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf
- access: closed
  analysis: ''
  created_date: 2022-09-29
  dependencies:
  - Chinchilla
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Dramatron
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: ''
  quality_control: ''
  size: 70B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2209.14958
- access: open
  analysis: Evaluated on evaluation trajectories and SoTA baselines using robotic
    data.
  created_date: 2023-07-28
  dependencies:
  - PaLI-X
  - PaLM-E
  - RT-2 action tokens
  description: RT-2 is a vision-language-action model for robotic actions that incorporates
    chain of thought reasoning.
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text, video; text, robotics trajectories
  model_card: ''
  monitoring: ''
  name: RT-2
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: ''
  quality_control: ''
  size: 55B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2307.15818.pdf
- access: closed
  analysis: unknown
  created_date: 2023-11-16
  dependencies: []
  description: Lyria is DeepMind's most advanced AI music generation model to date.
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; music
  model_card: none
  monitoring: ''
  name: Lyria
  nationality: UK
  organization: Google Deepmind
  prohibited_uses: ''
  quality_control: worked with artists and music industry to ensure utility
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://deepmind.google/discover/blog/transforming-the-future-of-music-creation/
- access: closed
  analysis: Evaluated using only out-of-distribution image prompts for qualitative
    results.
  created_date: 2024-02-23
  dependencies: []
  description: "Gene is a foundation world model\_trained from Internet videos\_that\
    \ can generate an endless variety of playable (action-controllable) worlds from\
    \ synthetic images, photographs, and even sketches."
  feedback: none
  intended_uses: ''
  license: unknown
  modality: image; video
  model_card: none
  monitoring: ''
  name: Genie
  nationality: unknown
  organization: Google DeepMind
  prohibited_uses: ''
  quality_control: ''
  size: 11B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://sites.google.com/view/genie-2024
- access: limited
  analysis: The model was tested and evaluated on various prompts to assess its understanding
    of natural language, its ability to generate high-quality images in various formats
    and styles and generate fine details and complex textures. Red teaming and evaluations
    were conducted on topics including fairness, bias, and content safety.
  created_date: 2024-05-14
  dependencies: []
  description: Imagen 3 is a high-quality text-to-image model, capable of generating
    images with better detail, richer lighting, and fewer distracting artifacts compared
    to previous models. Improved understanding of prompts allows for a wide range
    of visual styles and captures small details from longer prompts. It also understands
    prompts written in natural, everyday language, making it easier to use. Imagen
    3 is available in multiple versions, optimized for different types of tasks, from
    generating quick sketches to high-resolution images.
  feedback: unknown
  intended_uses: Generate high-quality images for various purposes, from photorealistic
    landscapes to textured oil paintings or whimsical claymation scenes. It is useful
    in situations where detailed visual representation is required based on the textual
    description.
  license: unknown
  modality: text; image
  model_card: none
  monitoring: Through digital watermarking tool SynthID embedded in pixels for detection
    and identification.
  name: Imagen 3
  nationality: unknown
  organization: Google DeepMind
  prohibited_uses: unknown
  quality_control: Extensive filtering and data labeling were used to minimize harmful
    content in datasets and reduce the likelihood of harmful outputs. Privacy, safety,
    and security technologies were leveraged in deploying the model, including watermarking
    tool SynthID.
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://deepmind.google/technologies/imagen-3/
- access: closed
  analysis: unknown
  created_date: 2024-05-14
  dependencies: []
  description: Veo is Google DeepMind's most capable video generation model to date.
    It generates high-quality, 1080p resolution videos that can go beyond a minute,
    in a wide range of cinematic and visual styles. It accurately captures the nuance
    and tone of a prompt, and provides an unprecedented level of creative control.
    The model is also capable of maintaining visual consistency in video frames, and
    supports masked editing.
  feedback: Feedback from leading creators and filmmakers is incorporated to improve
    Veo's generative video technologies.
  intended_uses: Veo is intended to help create tools that make video production accessible
    to everyone. It can be used by filmmakers, creators, or educators for storytelling,
    education and more. Some of its features will be also brought to products like
    YouTube Shorts.
  license: unknown
  modality: text; video
  model_card: none
  monitoring: unknown
  name: Veo
  nationality: unknown
  organization: Google DeepMind
  prohibited_uses: unknown
  quality_control: Videos created by Veo are watermarked using SynthID, DeepMinds
    tool for watermarking and identifying AI-generated content, and passed through
    safety filters and memorization checking processes to mitigate privacy, copyright
    and bias risks.
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://deepmind.google/technologies/veo/
- access: limited
  analysis: The model was evaluated on various benchmarks like General MMLU, Code
    Natural2Code, MATH, GPQA, Big-Bench, WMT23, MMMU, and MathVista providing performance
    across various domains like multilingual translation, image processing, and code
    generation.
  created_date: 2024-05-30
  dependencies: []
  description: Gemini Flash is a lightweight model, optimized for speed and efficiency.
    It features multimodal reasoning and a breakthrough long context window of up
    to one million tokens. It's designed to serve at scale and is efficient on cost,
    providing quality results at a fraction of the cost of larger models.
  feedback: none
  intended_uses: The model is intended for developer and enterprise use cases. It
    can process hours of video and audio, and hundreds of thousands of words or lines
    of code, making it beneficial for a wide range of tasks.
  license: Googles Terms and Conditions
  modality: audio, image, text, video; text
  model_card: none
  monitoring: unknown
  name: Gemini 1.5 Flash
  nationality: unknown
  organization: Google DeepMind
  prohibited_uses: ''
  quality_control: The research team is continually exploring new ideas at the frontier
    of AI and building innovative products for consistent progress.
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://deepmind.google/technologies/gemini/flash/


================================================
FILE: assets/deepnight.yaml
================================================
- access: open
  analysis: none
  created_date: 2023-11-04
  dependencies: []
  description: SaiLy is a series/collection of AI Models by Deepnight Research which
    are highly experimental and uncensored.
  feedback: https://huggingface.co/deepnight-research/saily_100b/discussions
  intended_uses: ''
  license: MIT
  modality: text; text
  model_card: https://huggingface.co/deepnight-research/saily_100b
  monitoring: unknown
  name: SaiLY
  nationality: International
  organization: Deepnight Research
  prohibited_uses: ''
  quality_control: ''
  size: 100B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/deepnight-research/saily_100b


================================================
FILE: assets/deepseek.yaml
================================================
- access: open
  analysis: Deepseek and baseline models (for comparison) evaluated on a series of
    representative benchmarks, both in English and Chinese.
  created_date: 2023-11-28
  dependencies: []
  description: Deepseek is a 67B parameter model with Grouped-Query Attention trained
    on 2 trillion tokens from scratch.
  feedback: https://huggingface.co/deepseek-ai/deepseek-llm-67b-base/discussions
  intended_uses: ''
  license:
    explanation: Model license can be found at https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL.
      Code license is under MIT
    value: custom
  modality: text; text
  model_card: https://huggingface.co/deepseek-ai/deepseek-llm-67b-base
  monitoring: unknown
  name: Deepseek
  nationality: USA
  organization: Deepseek AI
  prohibited_uses: none
  quality_control: Training dataset comprised of diverse data composition and pruned
    and deduplicated.
  size: 67B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/deepseek-ai/DeepSeek-LLM
- access: open
  analysis: Deepseek and baseline models (for comparison) evaluated on a series of
    representative benchmarks, both in English and Chinese.
  created_date: 2023-11-29
  dependencies:
  - Deepseek
  description: Deepseek Chat is a 67B parameter model initialized from Deepseek and
    fine-tuned on extra instruction data.
  feedback: https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat/discussions
  intended_uses: ''
  license:
    explanation: Model license can be found at https://github.com/deepseek-ai/DeepSeek-LLM/blob/main/LICENSE-MODEL.
      Code license is under MIT
    value: custom
  modality: text; text
  model_card: https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat
  monitoring: unknown
  name: Deepseek Chat
  nationality: USA
  organization: Deepseek AI
  prohibited_uses: none
  quality_control: Training dataset comprised of diverse data composition and pruned
    and deduplicated.
  size: 67B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/deepseek-ai/DeepSeek-LLM
- access: open
  analysis: Evaluated on code generation, code completion, cross-file code completion,
    and program-based math reasoning across standard benchmarks.
  created_date: 2023-11-03
  dependencies: []
  description: Deepseek Coder is composed of a series of code language models, each
    trained from scratch on 2T tokens, with a composition of 87% code and 13% natural
    language in both English and Chinese.
  feedback: https://huggingface.co/deepseek-ai/deepseek-coder-33b-base/discussions
  intended_uses: ''
  license:
    explanation: Model license can be found at https://github.com/deepseek-ai/DeepSeek-Coder/blob/main/LICENSE-MODEL.
      Code license is under MIT
    value: custom
  modality: text; code
  model_card: https://huggingface.co/deepseek-ai/deepseek-coder-33b-base
  monitoring: unknkown
  name: Deepseek Coder
  nationality: USA
  organization: Deepseek AI
  prohibited_uses: ''
  quality_control: ''
  size: 33B parameters (dense)
  training_emissions: unknown
  training_hardware: 8 NVIDIA A100 GPUs and 8 NVIDIA H800 GPUs
  training_time: unknown
  type: model
  url: https://github.com/deepseek-ai/DeepSeek-Coder


================================================
FILE: assets/dibt.yaml
================================================
- access: open
  analysis: none
  created_date: 2024-02-27
  datasheet: https://huggingface.co/datasets/DIBT/10k_prompts_ranked
  dependencies: []
  description: 10k_prompts_ranked is a dataset of prompts with quality rankings created
    by 314 members of the open-source ML community using Argilla, an open-source tool
    to label data.
  excluded: ''
  feedback: https://huggingface.co/datasets/DIBT/10k_prompts_ranked/discussions
  included: ''
  intended_uses: Training and evaluating language models on prompt ranking tasks and
    as a dataset that can be filtered only to include high-quality prompts. These
    can serve as seed data for generating synthetic prompts and generations.
  license: unknown
  modality: text
  monitoring: ''
  name: 10k_prompts_ranked
  nationality: USA
  organization: Data is Better Together
  prohibited_uses: This dataset only contains rankings for prompts, not prompt/response
    pairs so it is not suitable for direct use for supervised fine-tuning of language
    models.
  quality_control: ''
  sample: []
  size: 10k examples
  type: dataset
  url: https://huggingface.co/blog/community-datasets


================================================
FILE: assets/duckduckgo.yaml
================================================
- access: open
  adaptation: ''
  created_date: 2023-03-08
  dependencies:
  - Anthropic API
  description: The first Instant Answer in DuckDuckGo search results to use natural
    language technology to generate answers to search queries using Wikipedia and
    other related sources
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: DuckAssist
  nationality: USA
  organization: DuckDuckGo
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://spreadprivacy.com/duckassist-launch/
  user_distribution: ''


================================================
FILE: assets/duolingo.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-03-14
  dependencies:
  - GPT-4 API
  description: Explain My Answer offers learners the chance to learn more about their
    response in a lesson (whether their answer was correct or incorrect!) By tapping
    a button after certain exercise types, learners can enter a chat with Duo to get
    a simple explanation on why their answer was right or wrong, and ask for examples
    or further clarification.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'Users are granted a limited license per Duolingo Terms.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Duolingo Explain My Answer
  nationality: USA
  organization: Duolingo
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.duolingo.com/terms
  type: application
  url: https://blog.duolingo.com/duolingo-max/
  user_distribution: ''
- access: limited
  adaptation: ''
  created_date: 2023-03-14
  dependencies:
  - Duolingo Role Play
  - Duolingo Explain My Answer
  description: Duolingo Max is a new subscription tier above Super Duolingo that gives
    learners access to two brand-new features and exercises - Explain My Answer and
    Roleplay.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'Users are granted a limited license per Duolingo Terms.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Duolingo Max
  nationality: USA
  organization: Duolingo
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://blog.duolingo.com/duolingo-max/
  user_distribution: ''
- access: limited
  adaptation: ''
  created_date: 2023-03-14
  dependencies:
  - GPT-4 API
  description: "Roleplay allows learners to practice real-world conversation skills\
    \ with world characters in the app. These challenges, which earn XP, will live\
    \ alongside the path as one of the \u201CSide Quests\u201D learners can access\
    \ by tapping on the character. What will you talk about? We\u2019ll guide you\
    \ through different scenarios! Learners might discuss future vacation plans with\
    \ Lin, order coffee at a caf\xE9 in Paris, go furniture shopping with Eddy, or\
    \ ask a friend to go for a hike."
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: '"Subject to the terms of these Terms and Conditions, Duolingo grants
      you a non-transferable, non-exclusive license to download, install, and use
      one copy of each App in object code form only on an interactive wireless device
      that you own or control" - excerpt from the Terms of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Duolingo Role Play
  nationality: USA
  organization: Duolingo
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.duolingo.com/terms
  type: application
  url: https://blog.duolingo.com/duolingo-max/
  user_distribution: ''


================================================
FILE: assets/eleutherai.yaml
================================================
- access:
    explanation: 'The dataset is freely available to the public and can be downloaded
      from The Eye [[The Pile]](https://mystic.the-eye.eu/public/AI/pile/).

      '
    value: open
  analysis: 'Analyses of the data''s composition, document statistics, language/dialectal
    coverage, topical distribution, and biases are conducted are conducted in the
    paper [[The Pile Paper]](https://arxiv.org/pdf/2101.00027.pdf).

    '
  created_date: 2021-01-01
  datasheet: https://arxiv.org/pdf/2201.07311.pdf
  dependencies: []
  description: 'A large language model training dataset, used to train GPT-NeoX-20B.

    '
  excluded: 'Authors report that they have excluded some datasets "because they were
    too small to be worth spending time or because the English component of the data
    did not merit inclusion on its own. Three datasets were excluded for other reasons:
    (1) US Congressional Records were excluded because it "reflects the opinions and
    biases of the political class over the past 200 years, including segregationism
    and xenophobia." (2) Online Fanfiction resources amounting to Hundreds of GiB
    were excluded on logistical grounds. (3) Literotica, platform where users can
    upload short-form erotic fiction, was excluded because the authors decided to
    exclude fanfiction, the corpus would require significant investigation, and corpus
    contain significant amount of stereotyping [[Appendix B]](https://arxiv.org/pdf/2101.00027.pdf).

    '
  feedback: 'Feedback can be given by emailing the authors at contact at eleuther.ai.

    '
  included: 'The Pile data come from 22 sources, with over half of the data being
    from Common Crawl (Pile-CC; 227GB), fiction and nonfiction books (Books3; 101GB),
    biomedical articles (PubMed Central; 90GB), and code (Github; 95 GB). Refer to
    the paper for full decomposition [[Table 1]](https://arxiv.org/pdf/2101.00027.pdf#table.caption.2).

    '
  intended_uses: 'The Pile was intended to be used as a high quality large text dataset
    for language modeling tasks, explained in more detail in the paper [[Section 1]](https://arxiv.org/pdf/2101.00027.pdf#section.1).

    '
  license:
    explanation: As indicated in the paper.
    value: MIT
  modality: code, text
  monitoring: none
  name: The Pile
  nationality: USA
  organization: EleutherAI
  prohibited_uses: none
  quality_control: 'In addition to the data inclusion and exclusion decisions, the
    quality was controlled through filtering for English (pycld2 language classifier),
    filtering for documents similar to OpenWebText2 (classifier on CommonCrawl), and
    several forms of deduplication as detailed in the paper [[Appendix C]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.C)
    [[Appendix D]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.D).

    '
  sample:
  - "...pot trending topics and the coverage around them. First up, there\u2019s a\
    \ bit of a visual redesign. Previously, clicking on a trending topic would highlight\
    \ a story from one publication, and you\u2019d have to scroll down past a live\
    \ video section to view related stories. Facebook is replacing that system with\
    \ a simple carousel, which does a better job of showing you different coverage\
    \ options. To be clear, the change doesn\u2019t affect how stories are sourced,\
    \ according to Facebook. It\u2019s still the same algorithm pickin..."
  - Total knee arthroplasty (TKA) is a promising treatment for endstage osteoarthritis
    (OA) of the knee for alleviating pain and restoring the function of the knee.
    Some of the cases with bilateral TKA are symptomatic, necessitating revision arthroplasty
    in both the knees. A bilateral revision TKA can be done ei
  - On the converse, the set-valued map $\Phi:[0,3]\rightrightarrows [0,3]$ $$\Phi(x):=\left\{\begin{array}{ll}
    \{1\} & \mbox{ if } 0\leq x<1\\ {}[1,2] & \mbox{ if } 1\leq x\leq 2\\ \{2\} &
  - "This Court thus uses the same interpretation of V.R.C.P. 52(a) as it did *487\
    \ under the previous statutory requirement found in 12 V.S.A. \xA7 2385.  In essense,\
    \ the defendants urge that this Court should reconsider the case of Green Mountain\
    \ Marble Co. v. Highway Board, supra, and follow the Federal practice of looking\
    \ to the evide"
  size: 825 GB
  type: dataset
  url: https://arxiv.org/pdf/2101.00027.pdf
- access:
    explanation: 'The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/GPT-J-6B/step_383500.tar.zstd)

      '
    value: open
  analysis: ''
  created_date:
    explanation: Date model blog post was published
    value: 2021-06-04
  dependencies:
  - The Pile
  description: GPT-J is an open-source autoregressive language model.
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'As indicated in the [[Github repository]](https://github.com/kingoflolz/mesh-transformer-jax)

      '
    value: Apache 2.0
  modality: text; text
  model_card: ''
  monitoring: ''
  name: GPT-J
  nationality: USA
  organization: EleutherAI
  prohibited_uses: ''
  quality_control: ''
  size: 6B parameters (dense)
  training_emissions: ''
  training_hardware: 'TRC (Unspecified # of TPU v3-8s)'
  training_time: ''
  type: model
  url: https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/
- access:
    explanation: 'The model can be downloaded for free from [[The Eye]](https://mystic.the-eye.eu/public/AI/gptneo-release/GPT3_2-7B/)

      '
    value: open
  analysis: ''
  created_date:
    explanation: Date Github repo was update
    value: 2021-03-21
  dependencies:
  - The Pile
  description: ''
  feedback: ''
  intended_uses: ''
  license: MIT
  modality: text; text
  model_card: ''
  monitoring: ''
  name: GPT-Neo
  nationality: USA
  organization: EleutherAI
  prohibited_uses: ''
  quality_control: ''
  size: 2.7B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://github.com/EleutherAI/gpt-neo
- access:
    explanation: 'The model can be downloaded for free The Eye [[GPT-NeoX-20B]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/).

      '
    value: open
  analysis: 'The model was evaluated on standard NLP benchmarks: LAMBADA, ANLI, HellaSwag,
    MMLU among others [[Section 4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#section.4).

    '
  created_date: 2022-02-02
  dependencies:
  - The Pile
  description: 'GPT-NeoX (20B) is an open-sourced autoregressive language model.

    '
  feedback: 'Feedback can be provided using the  # 20b channel in EleutherAI Discord
    group [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/). Find
    the Discord link in the FAQ page [[FAQ]](https://www.eleuther.ai/faq/).

    '
  intended_uses: 'As stated in the model card: "GPT-NeoX-20B learns an inner representation
    of the English language that can be used to extract features useful for downstream
    tasks. The model is best at what it was pretrained for however, which is generating
    text from a prompt. Due to the generality of the pretraining set, it has acquired
    the ability to generate completions across a wide range of tasks - from programming
    to fiction writing [[Model Card]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md)."

    '
  license:
    explanation: 'As indicated in the accompanying blog post [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/).

      '
    value: Apache 2.0
  modality: text; text, code
  model_card: https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md
  monitoring: none
  name: GPT-NeoX
  nationality: USA
  organization: EleutherAI
  prohibited_uses: none
  quality_control: none
  size: 20B parameters (dense)
  training_emissions:
    explanation: 'The amount of emission during the development and training of the
      model based on the author''s estimation [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4).

      '
    value: 31.73 tCO2e
  training_hardware:
    explanation: 'As outline by the authors [[Section 2.3]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.2.3)

      '
    value: 12 x 8 A100 GPUs
  training_time:
    explanation: 'Training time was reported as 1830 hours reported by the authors,
      equaling 76.25 days. [[Section 6.4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#subsection.6.4).
      The authors report that 96 (12 * 8) A100 GPUs were used during the training.
      The A100 GPUs have a single precision performance of 0.0195 petaflops [[A100
      Datasheet]](https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf).
      Assuming the estimated utilization is 33%, following [[OpenAI AI and Computer
      Blog]](https://openai.com/blog/ai-and-compute/#addendum), the training time
      is 47.10 petaflop/s-day (76.25 * 96 * 0.0195 * 0.33).

      '
    value: 47.10 petaflop/s-day
  type: model
  url: http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf
- access:
    explanation: 'GooseAI API can be accessed by signing up on the goose.ai website.

      '
    value: limited
  adaptation: unknown
  created_date: unknown
  dependencies:
  - GPT-NeoX
  description: 'GooseAI API is an API service providing access to NLP services.

    '
  failures: unknown
  feedback:
    explanation: 'In the "Error Reporting and Feedback" section of the Goose.ai Terms
      of Service, GooseAI asks all the feedback to be sent to support at goose.ai
      [[GooseAI Terms of Service]](https://goose.ai/docs/tos).

      '
    value: Email support
  intended_uses: 'Intended to be used as an NLP infrastructure.

    '
  license:
    explanation: "\"Subject to Customer\u2019s strict compliance with this TOS, GooseAI\
      \ grants Customer a limited, non-exclusive, non-transferable, non-sublicensable,\
      \ revocable license to access and use the Platform as described in and subject\
      \ to this TOS\" - excerpt from the Terms of Service document.\n"
    value: custom
  monitoring:
    explanation: 'In the "GooseAI Monitoring and Enforcement" section of GooseAI''s
      Acceptable Use Policy (AUP), it is stated that Goose.AI has the right to investigate
      any suspected violation of its AUP [[GooseAI Acceptable Use Policy]](https://goose.ai/docs/aup).

      '
    value: At will monitoring by the provider
  monthly_active_users: unknown
  name: GooseAI API
  nationality: USA
  organization: GooseAI
  output_space:
    explanation: 'Question/Answer and Classification tasks are coming soon according
      to GooseAI [[Main Page]](goose.ai).

      '
    value: Text Generation, Text Completion
  prohibited_uses:
    explanation: 'Prohibited uses are detailed in the Acceptable Use Policy [[GooseAI
      Acceptable Use Policy]](https://goose.ai/docs/aup).

      '
    value: 'Illegal or abusive activity, security violations, network abuse

      '
  quality_control: unknown
  terms_of_service: https://goose.ai/docs/tos
  type: application
  url: goose.ai
  user_distribution: unknown
- access: open
  analysis: Evaluated by human testers rating alignment of text input, image output
    pairs.
  created_date: 2022-09-04
  dependencies:
  - VQGAN
  - CLIP
  description: VQGAN-CLIP is a model that better generates and edits images using
    a multimodal encoder to guide image generation.
  feedback: ''
  intended_uses: ''
  license: MIT
  modality: text; image
  model_card: ''
  monitoring: ''
  name: VQGAN-CLIP
  nationality: USA
  organization: EleutherAI
  prohibited_uses: ''
  quality_control: ''
  size: 227M parameters (dense)
  training_emissions: unknown
  training_hardware: 1 NVIDIA Tesla K80 GPU
  training_time: Less than 1 V100-hour
  type: model
  url: https://arxiv.org/pdf/2204.08583.pdf
- access: open
  analysis: Evaluated on a variety of NLP benchmarks and found to perform similarly
    to OPT and BLOOM models.
  created_date: 2023-05-31
  dependencies:
  - The Pile
  description: A suite of 16 LLMs all trained on public data seen in the exact same
    order and ranging in size from 70M to 12B parameters
  feedback: https://huggingface.co/EleutherAI/pythia-6.9b/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/EleutherAI/pythia-12b
  monitoring: ''
  name: Pythia
  nationality: unknown
  organization: Eleuther AI
  prohibited_uses: ''
  quality_control: ''
  size: 12B parameters (dense)
  training_emissions: ''
  training_hardware: 64 A100 GPUs
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2304.01373.pdf
- access: open
  analysis: Evaluated on math benchmarks in comparison to general large language models.
  created_date: 2023-10-16
  dependencies:
  - Proof Pile 2
  - Code LLaMA
  description: Llemma is a large language model for mathematics.
  feedback: https://huggingface.co/EleutherAI/llemma_34b/discussions
  intended_uses: ''
  license: LLaMA 2
  modality: text; text
  model_card: https://huggingface.co/EleutherAI/llemma_34b
  monitoring: none
  name: Llemma
  nationality: unknown
  organization: Princeton University, Eleuther AI
  prohibited_uses: ''
  quality_control: ''
  size: 34B parameters (dense)
  training_emissions: unknown
  training_hardware: 256 A100 40GB GPUs
  training_time: 47k A100 hours
  type: model
  url: https://arxiv.org/pdf/2310.10631.pdf
- access: open
  analysis: ''
  created_date: 2023-10-16
  datasheet: https://huggingface.co/datasets/EleutherAI/proof-pile-2
  dependencies:
  - Common Crawl
  - OpenWebMath
  - Arxiv
  - RedPajama-Data
  description: Proof Pile 2 is a corpus for language modeling of mathematics.
  excluded: ''
  feedback: https://huggingface.co/datasets/EleutherAI/proof-pile-2/discussions
  included: scientific papers, web data containing mathematics, mathematical code
  intended_uses: ''
  license: MIT
  modality: text
  monitoring: none
  name: Proof Pile 2
  nationality: unknown
  organization: Princeton University, Eleuther AI
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 55B tokens
  type: dataset
  url: https://arxiv.org/pdf/2310.10631.pdf
- access: open
  analysis: The models were evaluated on SuperGLUE, CodeXGLUE, as well as MMLU and
    Bigbench Hard. Comparisons were made with T5v1.1 and found that Pile-T5 models
    performed better in most conditions.
  created_date: 2024-04-15
  dependencies:
  - The Pile
  - T5x
  - LLaMA
  - umT5
  description: Pile-T5 is a version of the broadly used T5 model, but improved to
    eliminate weaknesses such as the omission of crucial code-related tokens. It utilizes
    LLaMA tokenizer and is trained on the Pile, offering enhancements for finetuning
    on downstream tasks, particularly those involving code.
  feedback: unknown
  intended_uses: The model is aimed at downstream tasks that benefit from the encoder-decoder
    architecture. Particularly useful for tasks involving code.
  license: unknown
  modality: text; text
  model_card: none
  monitoring: unknown
  name: Pile-T5
  nationality: USA
  organization: EleutherAI
  prohibited_uses: unknown
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: 2 million steps
  type: model
  url: https://blog.eleuther.ai/pile-t5/


================================================
FILE: assets/ens.yaml
================================================
- access:
    explanation: Dataset, evaluation code and models are publicly available at the
      [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/).
    value: open
  analysis:
    explanation: See [[Experiments]](https://arxiv.org/pdf/1906.03327v2.pdf#section.5)
    value: Authors use the dataset to learn a joint text-video embedding by leveraging
      more than 130M video clip-caption pairs. They then evaluate the learned embeddings
      on the tasks of localizing steps in instructional videos of CrossTask and textbased
      video retrieval on YouCook2, MSR-VTT and LSMDC datasets. They show that their
      learned embedding can perform better compared to models trained on existing
      carefully annotated but smaller video description datasets.
  created_date:
    explanation: The date the [[paper]](https://arxiv.org/abs/1906.03327) was submitted.
    value: 2019-06-07
  datasheet: https://arxiv.org/pdf/1906.03327.pdf
  dependencies:
  - YouTube
  description: HowTo100M is a large-scale dataset of narrated videos with an emphasis
    on instructional videos where content creators teach complex tasks with an explicit
    intention of explaining the visual content on screen. HowTo100M features a total
    of 136M video clips with captions sourced from 1.2M Youtube videos (15 years of
    video) and 23k activities from domains such as cooking, hand crafting, personal
    care, gardening or fitness.
  excluded:
    explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1)
    value: Categories such as Relationships and Finance and Business, that may be
      more abstract, are excluded. Videos with less than 100 views are removed. Authors
      also ignore videos that have less than 100 words. Videos longer than 2,000 seconds
      are removed. As some videos may appear in several tasks, the videos are deduplicated
      based on YouTube IDs.
  feedback: ''
  included:
    explanation: See [[Data collection]](https://arxiv.org/pdf/1906.03327v2.pdf#subsection.3.1)
    value: The dataset features 1.22 million videos from YouTube with a primary focus
      on videos containing "visual tasks", that involve some interaction with the
      physical world (e.g. Making peanut butter, Pruning a tree) as compared to others
      that are more abstract (e.g. Ending a toxic relationship, Choosing a gift).
      To obtain predominantly visual tasks, the authors limit them to one of 12 categories
      (Food and Entertaining, Home and Garden, Hobbies and Crafts, Cars & Other Vehicles,
      Pets and Animals, Holidays and Traditions, Personal Care and Style, Sports and
      Fitness, Health, Education and Communications, Arts and Entertainment, Computers
      and Electronics). They also restrict to the top 200 YouTube search results,
      as the latter ones may not be related to the query task.
  intended_uses: ''
  license: Apache 2.0
  modality: text, video
  monitoring: ''
  name: HowTo100M
  nationality: unknown
  organization: "\xC9cole Normale Sup\xE9rieure, Inria"
  prohibited_uses:
    explanation: See [[HowTo100M dataset webpage]](https://www.di.ens.fr/willow/research/howto100m/)
    value: 'No uses are explicitly prohibited by the authors. They note the following
      limitations of the dataset: "We note that the distribution of identities and
      activities in the HowTo100M dataset may not be representative of the global
      human population and the diversity in society. Please be careful of unintended
      societal, gender, racial and other biases when training or deploying models
      trained on this data."

      '
  quality_control: ''
  sample: []
  size: 136M video clips
  type: dataset
  url: https://arxiv.org/pdf/1906.03327.pdf


================================================
FILE: assets/epfl.yaml
================================================
- access: open
  analysis: Evaluated on TruthfulQA as main evaluation benchmark.
  created_date: 2023-11-27
  dependencies: []
  description: Meditron is a large-scale medical LLM that remains open-source.
  feedback: https://huggingface.co/epfl-llm/meditron-7b/discussions
  intended_uses: Medical exam question answering, supporting differential diagnosis,
    disease information.
  license: LLaMA 2
  modality: text; text
  model_card: https://huggingface.co/epfl-llm/meditron-70b
  monitoring: none
  name: MediTron
  nationality: unknown
  organization: EPFL, Idiap Research Institute, OpenAssistant, Yale
  prohibited_uses: ''
  quality_control: ''
  size: 70B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2311.16079.pdf


================================================
FILE: assets/evolutionaryscale.yaml
================================================
- access: open
  analysis: The model was tested in the generation of a new green fluorescent protein.
    Its effectiveness was compared to natural evolutionary processes, and it was deemed
    to simulate over 500 million years of evolution.
  created_date: 2024-06-25
  dependencies: []
  description: ESM3 is the first generative model for biology that simultaneously
    reasons over the sequence, structure, and function of proteins. It is trained
    across the natural diversity of Earth, reasoning over billions of proteins from
    diverse environments. It advances the ability to program and create with the code
    of life, simulating evolution, and making biology programmable. ESM3 is generative,
    and scientists can guide the model to create proteins for various applications.
  feedback: Unknown
  intended_uses: To engineer biology from first principles. It functions as a tool
    for scientists to create proteins for various applications, including medicine,
    biology research, and clean energy.
  license: Unknown
  modality: text; image, text
  model_card: unknown
  monitoring: Unknown though specific measures are not specified.
  name: ESM3
  nationality: USA
  organization: EvolutionaryScale
  prohibited_uses: Unknown
  quality_control: The creators have put in place a responsible development framework
    to ensure transparency and accountability from the start. ESM3 was tested in the
    generation of a new protein, ensuring its quality and effectiveness.
  size: 98B parameters (Dense)
  training_emissions: Unknown
  training_hardware: unknown
  training_time: Unknown
  type: model
  url: https://www.evolutionaryscale.ai/blog/esm3-release


================================================
FILE: assets/faraday.yaml
================================================
- access: open
  adaptation: ''
  created_date:
    explanation: The date Faraday Lab's hugging face card was last updated. Date for
      ARES release is not published on site.
    value: 2023-04-26
  dependencies:
  - Stable Diffusion
  description: ARES is a text-to-image generator based on Stable Diffusion. The goal
    is to provide a simple tool with a user interface allowing mainstream AI access
    for artists and creators.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: ARES
  nationality: USA
  organization: Faraday Lab
  output_space: generated images
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://faradaylab.fr/
  user_distribution: ''


================================================
FILE: assets/fuse.yaml
================================================
- access: open
  analysis: The FuseChat model was evaluated on MT-Bench which comprises 80 multi-turn
    dialogues spanning writing, roleplay, reasoning, math, coding, stem, and humanities
    domains. It yields an average performance of 66.52 with specific scores for individual
    domains available in the leaderboard results.
  created_date: 2024-02-26
  dependencies:
  - Nous Hermes 2
  - OpenChat 3.5
  description: FuseChat is a powerful chat Language Learning Model (LLM) that integrates
    multiple structure and scale-varied chat LLMs using a fuse-then-merge strategy.
    The fusion is done using two stages
  feedback: https://huggingface.co/FuseAI/FuseChat-7B-VaRM/discussions
  intended_uses: FuseChat is intended to be used as a powerful chat bot that takes
    in text inputs and provides text-based responses. It can be utilized in a variety
    of domains including writing, roleplay, reasoning, math, coding, stem, and humanities.
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/FuseAI/FuseChat-7B-VaRM
  monitoring: unknown
  name: FuseChat
  nationality: USA
  organization: FuseAI
  prohibited_uses: unknown
  quality_control: none
  size: 7B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/abs/2402.16107


================================================
FILE: assets/genmo.yaml
================================================
- access:
    explanation: open state-of-the-art video generation model... The weights and architecture
      for Mochi 1 are open
    value: open
  analysis: Mochi 1 sets a new best-in-class standard for open-source video generation.
    It also performs very competitively with the leading closed models... We benchmark
    prompt adherence with an automated metric using a vision language model as a judge
    following the protocol in OpenAI DALL-E 3. We evaluate generated videos using
    Gemini-1.5-Pro-002.
  created_date: 2025-01-14
  dependencies:
  - DDPM
  - DreamFusion
  - Emu Video
  - T5-XXL
  description: Mochi 1 is an open-source video generation model designed to produce
    high-fidelity motion and strong prompt adherence in generated videos, setting
    a new standard for open video generation systems.
  feedback: unknown
  intended_uses: Advance the field of video generation and explore new methodologies.
    Build innovative applications in entertainment, advertising, education, and more.
    Empower artists and creators to bring their visions to life with AI-generated
    videos. Generate synthetic data for training AI models in robotics, autonomous
    vehicles and virtual environments.
  license:
    explanation: We're releasing the model under a permissive Apache 2.0 license.
    value: Apache 2.0
  modality:
    explanation: Mochi 1 generates smooth videos... Measures how accurately generated
      videos follow the provided textual instructions
    value: text; video
  model_card: unknown
  monitoring: unknown
  name: Mochi 1
  nationality: USA
  organization: Genmo
  prohibited_uses: unknown
  quality_control: robust safety moderation protocols in the playground to ensure
    that all video generations remain safe and aligned with ethical guidelines.
  size:
    explanation: featuring a 10 billion parameter diffusion model
    value: 10B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.genmo.ai/blog


================================================
FILE: assets/glm.yaml
================================================
- access: Open
  analysis: Evaluations show that GLM-4, 1) closely rivals or outperforms GPT-4 in
    terms of general metrics such as MMLU, GSM8K, MATH, BBH, GPQA, and HumanEval,
    2) gets close to GPT-4-Turbo in instruction following as measured by IFEval, 3)
    matches GPT-4 Turbo (128K) and Claude 3 for long context tasks, and 4) outperforms
    GPT-4 in Chinese alignments as measured by AlignBench.
  created_date: 2023-07-02
  dependencies: []
  description: ChatGLM is an evolving family of large language models that have been
    developed over time. The GLM-4 language series, includes GLM-4, GLM-4-Air, and
    GLM-4-9B. They are pre-trained on ten trillions of tokens mostly in Chinese and
    English and are aligned primarily for Chinese and English usage. The high-quality
    alignment is achieved via a multi-stage post-training process, which involves
    supervised fine-tuning and learning from human feedback. GLM-4 All Tools model
    is further aligned to understand user intent and autonomously decide when and
    which tool(s) to use.
  feedback: Unknown
  intended_uses: General language modeling, complex tasks like accessing online information
    via web browsing and solving math problems using Python interpreter.
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/THUDM/glm-4-9b
  monitoring: Unknown
  name: ChatGLM
  nationality: unknown
  organization: Team GLM, Zhipu AI, Tsinghua University
  prohibited_uses: Unknown
  quality_control: High-quality alignment is achieved via a multi-stage post-training
    process, which involves supervised fine-tuning and learning from human feedback.
  size: 9B parameters
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://arxiv.org/pdf/2406.12793


================================================
FILE: assets/google.yaml
================================================
- access:
    explanation: https://huggingface.co/datasets/c4
    value: open
  analysis: https://arxiv.org/abs/2104.08758
  created_date:
    explanation: The date the T5 paper was made public.
    value: 2019-10-23
  datasheet: https://huggingface.co/datasets/c4
  dependencies:
  - CommonCrawl
  description: The Colossal Clean Crawled Corpus (C4) is a processed version of Common
    Crawl to facilitate transfer learning in NLP.
  excluded: 'Data was filtered for English using langdetect. Further, data was filtered
    to end in terminal punctuation, to remove short pages (less than 5 sentences),
    and to remove "Dirty, Naughty, Obscene or Otherwise Bad Words".

    '
  feedback: https://huggingface.co/datasets/c4/discussions
  included: none
  intended_uses: To faciliate transfer learning research in NLP.
  license: ODC-By 1.0
  modality: text
  monitoring: none
  name: C4
  nationality: USA
  organization: Google
  prohibited_uses: none
  quality_control: 'Data filtering excluded obscene words from a block list as well
    as short documents and some deduplication was done based on string overlap.

    '
  sample:
  - https://huggingface.co/datasets/c4/viewer/en/train
  size: 750GB
  type: dataset
  url: https://arxiv.org/abs/1910.10683
- access: closed
  analysis: unknown
  created_date:
    explanation: 'The date of the Google product update blog announcing that BERT
      models were for ranking and featured snippets in Search.

      '
    value: 2019-11-25
  datasheet: none
  dependencies: []
  description: 'The dataset used to train Internal Google BERT models.

    '
  excluded: unknown
  feedback: unknown
  included:
    explanation: 'Although we don''t exactly know the contents of the Internal Google
      BERT dataset, it likely includes contents from web pages and search queries.

      '
    value: Web pages, and search queries
  intended_uses:
    explanation: 'We don''t have an exhaustive list of the intended use cases for
      the Internal Google BERT dataset, but we know that BERT was used in Google Search.

      '
    value: unknown
  license: unknown
  modality: text
  monitoring: unknown
  name: Internal Google BERT dataset
  nationality: USA
  organization: Google
  prohibited_uses: unknown
  quality_control: unknown
  sample: []
  size: unknown
  type: dataset
  url: https://blog.google/products/search/search-language-understanding-bert/
- access:
    explanation: Dataset can be downloaded at [[Download]](https://ai.google.com/research/ConceptualCaptions/download)
    value: open
  analysis:
    explanation: See [[Experimental Results]](https://aclanthology.org/P18-1238.pdf#section.5)
    value: Authors evaluate the dataset on two image captioning models - RNN-based
      model and Transformer model, under two experimental conditions - using the training
      & development sets provided by the MS COCO dataset, versus training & development
      sets using the Conceptual dataset. They use three different test sets- the blind
      COCO-C40 test set, the Conceptual test set and the Flickr 1K test set. They
      present both Human and Automatic evaluation results. Human evaluations indicate
      that the Conceptual-based models are superior. Automatic models fail to corroborate
      the human evaluation results. This highlights the weakness of these automatic
      metrics.
  created_date:
    explanation: Due to the lack of information about the exact date, it is assumed
      to be the 1st of the known month of creation.
    value: 2018-07-01
  datasheet: none
  dependencies: []
  description: 'A dataset containing 3 million (image-URL, caption) pairs designed
    for the training and evaluation of machine learned image captioning systems.

    '
  excluded:
    explanation: See [[Conceptual Captions Dataset Creation]](https://aclanthology.org/P18-1238.pdf#section.3)
    value: "The following filtering steps are applied in the given order:\n1. Image-based\
      \ Filtering - \"It only keeps JPEG images where both dimensions are greater\
      \ than 400 pixels, and the ratio of larger to smaller dimension is no more than\
      \ 2. It excludes images that trigger pornography or profanity detectors. These\
      \ filters discard more than 65% of the candidates.\"\n2. Text-based Filtering\
      \ - \"Candidates with no determiner, no noun, or no preposition are discarded;\
      \ candidates with a high noun ratio are also discarded; candidates with a high\
      \ rate of token repetition are discarded; candidates where the first word is\
      \ not capitalized, or with too high capitalized-word ratio are discarded; we\
      \ use a vocabulary VW of 1B token types, appearing at least 5 times in the English\
      \ Wikipedia, and discard candidates that contain tokens that are not found in\
      \ this vocabulary. candidates that score too high or too low on the polarity\
      \ annotations, or trigger the pornography/profanity detectors, are discarded;\
      \ predefined boiler-plate prefix/suffix sequences matching the text are cropped,\
      \ e.g. \u201Cclick to enlarge picture\u201D, \u201Cstock photo\u201D; we also\
      \ drop text which begins/ends in certain patterns, e.g. \u201Cembedded image\
      \ permalink\u201D, \u201Cprofile photo\u201D. These filters only allow around\
      \ 3% of the incoming candidates to pass to the later stages.\"\n3. Image&Text-based\
      \ Filtering - \"We filter out candidates for which none of the text tokens can\
      \ be mapped to the content of the image. This filter discards around 60% of\
      \ the incoming candidates.\"\n4. Text Transformation with Hypernymization -\
      \ \"Noun modifiers of certain types (proper nouns, numbers, units) are removed;\
      \ dates, durations, and preposition-based locations (e.g., \"in Los Angeles\"\
      ) are removed; named-entities are identified, matched against the KG entries,\
      \ and substitute with their hypernym; resulting coordination noun-phrases with\
      \ the same head (e.g., \"actor and actor\") are resolved into a single-head,\
      \ pluralized form (e.g., \"actors\"). Around 20% of samples are discarded during\
      \ this transformation. We then cluster all resolved entities (e.g., 2560 \"\
      actor\", \"dog\", \"neighborhood\", etc.) and keep only the candidates for which\
      \ all detected types have a count of over 100 (around 55% of the candidates).\"\
      \n"
  feedback: Feedback can be provided by creating an issue in the [[Conceptual Captions
    GitHub repository]](https://github.com/google-research-datasets/conceptual-captions)
    or by emailing at conceptual-captions at google.com
  included: ''
  intended_uses: ''
  license:
    explanation: '[[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE)

      '
    value: Conceptual Captions License
  modality: image, text
  monitoring: unknown
  name: Conceptual Captions
  nationality: USA
  organization: Google
  prohibited_uses: unknown
  quality_control:
    explanation: See [[Conceptual Captions Dataset Creation]](https://aclanthology.org/P18-1238.pdf#section.3)
    value: Input candidate (image, caption) pairs pass through several stages of filtering
      and processing to ensure quality.
  sample: []
  size: 3.3M (image, text) pairs
  type: dataset
  url: https://aclanthology.org/P18-1238/
- access:
    explanation: Dataset is available at [[Conceptual 12M GitHub repository]](https://github.com/google-research-datasets/conceptual-12m).
    value: open
  analysis:
    explanation: See [[Evaluating Vision-and-Language PreTraining Data]](https://arxiv.org/pdf/2102.08981.pdf#section.3)
    value: 'The dataset is benchmarked against CC3M on two most fundamental V+L tasks:
      vision-to-language generation and vision-and-language matching, with an emphasis
      on long-tail visual recognition. The results illustrate the benefit of scaling
      up pre-training data for vision-and-language tasks, as indicated by the new
      state-of-the-art results on both the nocaps and Conceptual Captions benchmarks.

      '
  created_date:
    explanation: 'The date the [[paper]](https://arxiv.org/abs/2102.08981) was submitted.

      '
    value: 2021-02-17
  datasheet: none
  dependencies: []
  description: 'A dataset with 12 million image-text pairs specifically meant to be
    used for vision-and-language pre-training.

    '
  excluded:
    explanation: See [[Relaxing filters for higher recall]](https://arxiv.org/pdf/2102.08981.pdf#subsection.2.2)
    value: 'Some of the filtering steps used in the preparation of Conceptual Captions
      dataset are relaxed to trade off high-recall for low-precision. The following
      steps are applied in the given order:

      1. Image-based Filtering - Only keep JPEG images where both dimensions are greater
      than 400 pixels, and the ratio of larger to smaller dimension is no more than
      2.5. Exclude images that trigger pornography or profanity detectors.

      2. Text-based Filtering - Allow text between 3 and 256 words in the alt-text.
      Discard candidates with no noun or no determiner, but permit ones without prepositions.
      Set the maximum fraction of word repetition allowed to 0.2. Increase the threshold
      for counting a word type as rare from 5 to 20.

      3. Image&Text-based Filtering - Filter out candidates for which none of the
      text tokens can be mapped to the content of the image.

      '
  feedback: Feedback can be provided by creating an issue in the [[Conceptual 12M
    GitHub repository]](https://github.com/google-research-datasets/conceptual-12m)
    or by emailing at conceptual-captions at google.com
  included: ''
  intended_uses: ''
  license:
    explanation: '[[Conceptual Captions License]](https://github.com/google-research-datasets/conceptual-captions/blob/master/LICENSE)

      '
    value: Conceptual Captions License
  modality: image, text
  monitoring: unknown
  name: Conceptual 12M
  nationality: USA
  organization: Google
  prohibited_uses: unknown
  quality_control:
    explanation: See [[Vision-and-Language Pre-Training Data]](https://arxiv.org/pdf/2102.08981.pdf#section.2)
    value: Input candidate (image, caption) pairs pass through several stages of filtering
      and processing to ensure quality. Person-name substitutions are performed in
      the alt-texts to protect the privacy of individuals in the associated images.
  sample: []
  size: 12M (image, text) pairs
  type: dataset
  url: https://arxiv.org/pdf/2102.08981.pdf
- access:
    explanation: https://huggingface.co/t5-large
    value: open
  analysis: https://huggingface.co/t5-base#evaluation
  created_date:
    explanation: The date the T5 paper was made public.
    value: 2019-10-23
  dependencies:
  - C4
  description: Text-To-Text Transfer Transformer (T5) is a model that unifies all
    NLP tasks under the text-to-text format.
  feedback: https://huggingface.co/t5-large/discussions
  intended_uses: NLP tasks
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/t5-base
  monitoring: none
  name: T5
  nationality: USA
  organization: Google
  prohibited_uses: unknown
  quality_control: The T5 paper documents many analyses/ablations that were considered
    before arriving at the final architecture/training procedure.
  size: 11B parameters (dense)
  training_emissions: unknown
  training_hardware: 1,024 TPU v3 chips (Cloud TPU Pods)
  training_time: unknown
  type: model
  url: https://arxiv.org/abs/1910.10683
- access: closed
  analysis: unknown
  created_date:
    explanation: 'The date of the Google product update blog announcing that BERT
      models were for ranking and featured snippets in Search.

      '
    value: 2019-11-25
  dependencies:
  - Internal Google BERT dataset
  description: 'Internal Google BERT model used to power Google Search products.

    '
  feedback: unknown
  intended_uses:
    explanation: 'We don''t have an exhaustive list of the intended use cases for
      the Internal Google BERT model, but we know that Google Search was powered by
      a fine-tuned BERT.

      '
    value: unknown
  license: unknown
  modality: text; text
  model_card: unknown
  monitoring: unknown
  name: Internal Google BERT
  nationality: USA
  organization: Google
  prohibited_uses: unknown
  quality_control: unknown
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://blog.google/products/search/search-language-understanding-bert/
- access: open
  adaptation: unknown
  created_date:
    explanation: 'The date of the Google product update blog announcing that BERT
      models were for ranking and featured snippets in Search.

      '
    value: 2019-11-25
  dependencies:
  - Internal Google BERT
  - MUM
  description: 'Google Search is Google''s search engine.

    '
  failures: unknown
  feedback: 'Feedback can be sent to Google Feedback using the product interface [[Google
    Feedback]](https://www.google.com/tools/feedback).

    '
  intended_uses: Searching the web using text, voice or image
  license: none
  monitoring: 'It is implied that Google scan uses of its products for spam, malware
    and illegal content in the [[Term of Service]](https://policies.google.com/terms).

    '
  monthly_active_users: unknown
  name: Google Search
  nationality: USA
  organization: Google
  output_space: web page ranking
  prohibited_uses: 'Prohibited use cases aren''t specifically spelled out for Google
    search, but several illegal and discouraged use cases are shared in the Respect
    Others section of the [[Term of Service]](https://policies.google.com/terms).

    '
  quality_control: unknown
  terms_of_service: https://policies.google.com/terms
  type: application
  url: https://blog.google/products/search/search-language-understanding-bert/
  user_distribution: unknown
- access: closed
  analysis: unknown
  created_date:
    explanation: 'The date of the Google company news blog announcing LaMDA [[Google
      News Blog]](https://blog.google/technology/ai/lamda/).

      '
    value: 2021-06-18
  datasheet: none
  dependencies: []
  description: 'Infiniset "is a combination of dialog data from public dialog data
    and other public web documents" [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).

    '
  excluded: unknown
  feedback:
    explanation: 'Author contact information was not provided.

      '
    value: none
  included: 'Included in the dataset are data from "public forums (0%); C4 data (12.5%
    ); code documents from sites related to programming like Q&A sites tutorials,
    etc (12.5%); Wikipedia (English) (12.5%); English web documents (6.25%); and Non-English
    web documents (6.25%)."

    '
  intended_uses:
    explanation: 'Intended uses of the dataset wasn''t explicitly linked, but it is
      likely intended for training language models specialized in dialogue.

      '
    value: unknown
  license: unknown
  modality: code, text
  monitoring: unknown
  name: Infiniset
  nationality: USA
  organization: Google
  prohibited_uses: 'The prohibited uses for Infiniset weren''t specifically listed,
    but the Google AI principles inspired safety objectives in [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1)
    advises avoiding harm, unjust impact and misinformation, among others.

    '
  quality_control: unknown
  sample: []
  size:
    explanation: 'The size of the dataset is unclear, but it is reported that the
      dataset "consists of 2.97B documents and 1.12B dialogs with 13.39B utterances"
      [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).

      '
    value: unknown
  type: dataset
  url: https://arxiv.org/pdf/2201.08239.pdf
- access: closed
  analysis: 'The model performance was analyzed on sensibleness, specificity and interestingness.
    The model was also analyzed on safety, following metrics derived from Google AI
    Principles [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1).
    Finally, the model was analyzed on groundedness, testing its ability to produce
    responses that can be associated with "known sources whenever possible [[Section
    4.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.4.1)."

    '
  created_date:
    explanation: 'The date of the Google company news blog announcing LaMDA [[Google
      News Blog]](https://blog.google/technology/ai/lamda/).

      '
    value: 2021-06-18
  dependencies:
  - Infiniset
  description: 'LaMDA stands for Language Models for Dialog Application. It is a transformer
    based language model trained on dialogue data.

    '
  feedback:
    explanation: 'Author contact information was not provided.

      '
    value: none
  intended_uses: 'LaMDA is a language model, so it can be used for regular langauge
    modelling tasks without fine-tuning, but its fine-tuned for dialogue tasks.

    '
  license: unknown
  modality: text; text
  model_card: none
  monitoring: unknown
  name: LaMDA
  nationality: USA
  organization: Google
  prohibited_uses: 'The prohibited uses of LaMDA weren''t specifically listed, but
    the Google AI principles inspired safety objectives in [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1)
    advises avoiding harm, unjust impact and misinformation, among others.

    '
  quality_control: 'LaMDA was fine-tuned to predict sensibleness, specificity and
    interestingness as well as safety. Then, the candidates were filtered out if the
    model safety predictions were below a certain threshold. The next candidates in
    the conversation were selected as a combination of these predictions. The model
    was also fine-tuned for groundedness. The results are shown in [[Figure 5]](https://arxiv.org/pdf/2201.08239.pdf#figure.caption.23).

    '
  size: 137B parameters (dense)
  training_emissions:
    explanation: "\"...total carbon footprint of LaMDA\u2019s pre-training of the\
      \ largest model is approximately 25.2 tCO2e. The carbon footprint of pre-training\
      \ of smaller models and fine-tuning of all models is approximately 0.7 tCO2e\
      \ ... which brings the total footprint of LaMDA to approximately 26 tCO2e [[Section\
      \ 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10)\"\n"
    value: 26 tCO2e
  training_hardware:
    explanation: 'Reported in [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10).

      '
    value: 1024 TPU-V3 chips
  training_time:
    explanation: 'The total number of training flops of LaMDA was reported as 3.55E+23
      (3.55E+8 petaflops) [[Section 10]](https://arxiv.org/pdf/2201.08239.pdf#section.10),
      which is equal to 4108.80 = 3.55E+8 / (60 * 60 * 24) petaflop/s-day.

      '
    value: 4108.80 petaflop/s-day
  type: model
  url: https://arxiv.org/pdf/2201.08239.pdf
- access: closed
  analysis: unknown
  created_date:
    explanation: 'The date of the Google AI blog announcing the details of PaLM [[Google
      AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).

      '
    value: 2022-04-04
  datasheet: https://arxiv.org/pdf/2204.02311.pdf#appendix.D
  dependencies:
  - Infiniset
  description: 'PaLM dataset "was created for pre-training language models" [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).

    '
  excluded: 'GitHub repositories with copyleft licenses were excluded. Programming
    languageges other than the most common 24 were excluded [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).

    '
  feedback:
    explanation: 'Author contact information is shared in the paper [[Paper]](https://arxiv.org/pdf/2204.02311.pdf).

      '
    value: Contact the authors.
  included: 'The dataset is based on Infiniset. It included multilingual text containing
    text from over 100 languages. The breakdown of the data included is as follows:
    Social media conversations (multilingual) 50, Filtered webpages (multilingual)
    27%, BooksCorpus (English) 13%, GitHub (code) 5%, Wikipedia (multilingual) 4%,
    and News (English) 1%. Code was collected from GitHub repositories with appropriate
    licenses, totalling 96GB of source code [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).

    '
  intended_uses:
    explanation: 'As stated in [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).

      '
    value: '"The dataset was created for pre-training language models by a team of
      researchers at Google".

      '
  license: unknown
  modality: code, text
  monitoring: unknown
  name: PaLM dataset
  nationality: USA
  organization: Google
  prohibited_uses: '"... should not be used for any of the unacceptable language model
    use cases, e.g., generation of toxic speech" [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).

    '
  quality_control: 'In order to reduce low quality web pages, the web pages were sampled
    according to a "quality score" classifier. Code files were de-duplicated using
    Levenshtein distance [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).

    '
  sample: []
  size:
    explanation: 'Dataset size in GB is not reported, but the dataset is reported
      to have 780 billion tokens [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).
      The code portion of the dataset is reported to be 5% totaling a 196GB of source
      code [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3). It is unclear
      whether the reported size is before or after de-duplication. Nonetheless, one
      can estimate the dataset size by multiplying 196GB with 20 = 3.92 TB.

      '
    value: 3.92 TB
  type: dataset
  url: https://arxiv.org/pdf/2204.02311.pdf
- access:
    explanation: Weights can be downloaded from [Github](https://github.com/google-research/t5x/blob/main/docs/models.md)
    value: open
  analysis: Evaluated on a variety of standard language datasets.
  created_date:
    explanation: Date paper was released
    value: 2022-10-20
  dependencies:
  - T5
  - Muffin
  - P3
  - NaturalInstructions-v2
  - Flan CoT
  description: Flan-T5 is a version of the T5 language model fine-tuned on instruction
    data
  feedback: https://huggingface.co/google/flan-t5-xxl/discussions
  intended_uses: unknown
  license:
    explanation: License on the [[Github repository]](https://github.com/google-research/google-research)
    value: Apache 2.0
  modality: text; text
  model_card: https://arxiv.org/pdf/2210.11416.pdf
  monitoring: none
  name: Flan-T5
  nationality: USA
  organization: Google
  prohibited_uses: none
  quality_control: Across different multitask datasets, templates and formatting were
    maintained. For the chain-of-thoughts (CoT) data, specific exemplars were used.
  size: 11B parameters (dense)
  training_emissions: Unknown
  training_hardware: 512 v4 TPU Chips
  training_time: Unknown
  type: model
  url: https://arxiv.org/abs/2210.11416
- access:
    explanation: Model weights available for download in the [[Github repo]](https://github.com/google-research/google-research/tree/master/ul2)
    value: open
  analysis: ''
  created_date:
    explanation: Date model paper was released
    value: 2022-05-10
  dependencies:
  - C4
  description: UL2 is a language model trained with a new pretraining objective
  feedback: ''
  intended_uses: ''
  license:
    explanation: 20B checkpoints only for three different iteration steps
    value: Apache 2.0
  modality: text; text
  model_card: ''
  monitoring: ''
  name: UL2
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 20B parameters (dense)
  training_emissions: ''
  training_hardware: 128 TPUv4
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2205.05131
- access:
    explanation: Google does not provide access to Parti for external researchers.
    value: closed
  analysis: ''
  created_date:
    explanation: Date the model website was made public
    value: 2022-06-22
  dependencies:
  - C4
  - LAION-400M
  - FIT400M
  - JFT-4B
  description: Parti is a text-to-image diffusion model
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; image
  model_card: ''
  monitoring: ''
  name: Parti
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 20B parameters (dense)
  training_emissions: ''
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://parti.research.google/
- access: open
  analysis: ''
  created_date:
    explanation: Date the model website was made public
    value: 2022-05-23
  dependencies:
  - LAION-400M
  - Google internal image-text dataset
  description: Imagen is a text-to-image diffusion model
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; image
  model_card: ''
  monitoring: ''
  name: Imagen
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 14B parameters (dense)
  training_emissions: unknown
  training_hardware: 128 TPU-v4
  training_time: unknown
  type: model
  url: https://imagen.research.google/
- access:
    explanation: Model checkpoints can be downloaded from the [[Github repository]](https://github.com/google-research/google-research/tree/master/vatt)
    value: open
  analysis: ''
  created_date:
    explanation: Date the model paper was made public
    value: 2022-04-22
  dependencies:
  - AudioSet
  - HowTo100M
  description: VATT is a family of models trained on multimodal data
  feedback: ''
  intended_uses: ''
  license:
    explanation: License on the [[Github repository]](https://github.com/google-research/google-research)
    value: Apache 2.0
  modality: text; audio, video
  model_card: ''
  monitoring: ''
  name: VATT
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 155M parameters (dense)
  training_emissions: unknown
  training_hardware: 256 TPU-v3
  training_time: 3 days
  type: model
  url: https://arxiv.org/abs/2104.11178
- access:
    explanation: Made available through the PaLM API as of March 14, 2023.
    value: limited
  analysis: '"PaLM is evaluated on English Natural Language Processing (NLP) tasks,
    tasks from BIG-bench, reasoning tasks, code completion tasks, multilingual generation
    and question answering tasks, translation tasks, and bias and toxicity benchmarks"
    [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).

    '
  created_date:
    explanation: 'The date of the Google AI blog announcing the details of PaLM [[Google
      AI Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).

      '
    value: 2022-04-04
  dependencies:
  - PaLM dataset
  description: 'PaLM stands Pathways Language Model, "dense decoder-only Transformer
    model trained with the Pathways system" [[Google ai Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).

    '
  feedback:
    explanation: 'Author contact information is shared in the paper [[Paper]](https://arxiv.org/pdf/2204.02311.pdf).

      '
    value: Contact the authors.
  intended_uses: '"The primary use is research on language models, including: research
    on NLP applications like machine translation and question answering, advancing
    fairness and safety research, and understanding limitations of current LLMs. Within
    Google, PaLM is being used for research on a variety of open- ended text and code
    generation tasks, including reasoning [[Section 6.3]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.3)
    and code synthesis and understanding [[Section 6.4]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.4)"
    [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).

    '
  license: unknown
  modality: text; text, code
  model_card: https://arxiv.org/pdf/2204.02311.pdf#appendix.E
  monitoring: unknown
  name: PaLM
  nationality: USA
  organization: Google
  prohibited_uses: 'The model "should not be used for downstream applications without
    further analysis on factors in the proposed downstream application [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E)"

    '
  quality_control: Unknown
  size: 540B parameters (dense)
  training_emissions:
    explanation: 'Reported in [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A)

      '
    value: 271.43 tCO2
  training_hardware:
    explanation: 'Reported in [[Section 4]](https://arxiv.org/pdf/2204.02311.pdf#section.4).

      '
    value: 6144 TPU v4 chips
  training_time:
    explanation: 'Reported in [[Appendix B]](https://arxiv.org/pdf/2204.02311.pdf#appendix.A)

      '
    value: 29600 petaflop/s-days
  type: model
  url: https://arxiv.org/pdf/2204.02311.pdf
- access: limited
  adaptation: ''
  created_date: 2023-03-14
  dependencies:
  - PaLM
  description: "a new developer offering that makes it easy and safe to experiment\
    \ with Google\u2019s language models."
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: PaLM API
  nationality: USA
  organization: Google
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://developers.googleblog.com/2023/03/announcing-palm-api-and-makersuite.html
  user_distribution: ''
- access: closed
  analysis: ''
  created_date: 2022-12-26
  dependencies:
  - Flan-PaLM
  - MultiMedQA
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Med-PaLM
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 540B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2212.13138
- access: closed
  analysis: Evaluated on MultiMedBench tasks and radiologist evaluations of model-generated
    chest X-ray reports
  created_date: 2023-07-26
  dependencies:
  - PaLM-E
  - MultiMedBench
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: image, text, genome sequence; text
  model_card: ''
  monitoring: ''
  name: Med-PaLM Multimodal
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 562B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2307.14334.pdf
- access: closed
  analysis: ''
  created_date: 2022-12-26
  dependencies:
  - MedQA
  - MedMCQA
  - PubMedQA
  - MMLU
  - LiveQA
  - Medication QA
  - HealthSearchQA
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: MultiMedQA
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2212.13138
- access: closed
  analysis: ''
  created_date: 2022-10-20
  dependencies:
  - PaLM
  - Muffin
  - P3
  - NaturalInstructions-v2
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Flan-PaLM
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 540B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2210.11416
- access: closed
  analysis: ''
  created_date: 2022-10-20
  dependencies:
  - U-PaLM
  - Muffin
  - P3
  - NaturalInstructions-v2
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Flan-U-PaLM
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 540B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2210.11416
- access: open
  analysis: ''
  created_date: 2021-09-03
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text
  monitoring: ''
  name: Muffin
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 62 tasks
  type: dataset
  url: https://arxiv.org/abs/2109.01652
- access: closed
  analysis: ''
  created_date: 2022-10-20
  dependencies:
  - PaLM
  - PaLM dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: U-PaLM
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 540B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2210.11399
- access: closed
  analysis: ''
  created_date: 2022-08-16
  dependencies:
  - PaLM
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown (model weights), Apache 2.0 (SayCan code)
  modality: text; robotics trajectories
  model_card: ''
  monitoring: ''
  name: PaLM-SayCan
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 540B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2204.01691
- access: closed
  analysis: ''
  created_date: 2021-12-13
  dependencies:
  - GLaM Web dataset
  - Wikipedia
  - GLaM Conversations dataset
  - GLaM Forums dataset
  - BooksCorpus
  - GLaM News dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: GLaM
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 1.2T parameters (sparse)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2112.06905
- access: closed
  analysis: ''
  created_date: 2021-12-13
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: GLaM Web dataset
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: unknown
  type: dataset
  url: https://arxiv.org/abs/2112.06905
- access: closed
  analysis: ''
  created_date: 2021-12-13
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: GLaM Conversations dataset
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: unknown
  type: dataset
  url: https://arxiv.org/abs/2112.06905
- access: closed
  analysis: ''
  created_date: 2021-12-13
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: GLaM Forums dataset
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: unknown
  type: dataset
  url: https://arxiv.org/abs/2112.06905
- access: closed
  analysis: ''
  created_date: 2021-12-13
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: GLaM News dataset
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: unknown
  type: dataset
  url: https://arxiv.org/abs/2112.06905
- access: closed
  analysis: ''
  created_date: 2021-05-18
  dependencies:
  - MUM dataset
  description: MUM (Multitask Unified Model) is a multimodal model that is specialized
    for more complex queries.
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: image, text; text
  model_card: ''
  monitoring: ''
  name: MUM
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://blog.google/products/search/introducing-mum/
- access: closed
  analysis: ''
  created_date: 2021-05-18
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: image, text
  monitoring: ''
  name: MUM dataset
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: unknown
  type: dataset
  url: https://blog.google/products/search/introducing-mum/
- access: closed
  analysis: ''
  created_date: 2023-02-01
  dependencies:
  - LAION-400M
  - Phenaki Video-Text Corpus
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; video
  model_card: ''
  monitoring: ''
  name: Phenaki
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 1.8B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://openreview.net/pdf?id=vOEXS39nOF
- access: closed
  analysis: ''
  created_date: 2023-02-01
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text, video
  monitoring: ''
  name: Phenaki Video-Text Corpus
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 15M text-video pairs at 8FPS
  type: dataset
  url: https://openreview.net/pdf?id=vOEXS39nOF
- access: open
  analysis: ''
  created_date: 2023-03-02
  dependencies:
  - UL2
  - Flan Collection
  description: ''
  feedback: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Flan-UL2
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 20B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2205.05131
- access: open
  analysis: ''
  created_date: 2023-01-31
  datasheet: ''
  dependencies:
  - Flan dataset
  - P3
  - NaturalInstructions-v2
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text
  monitoring: ''
  name: Flan Collection
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 1836 tasks
  type: dataset
  url: https://arxiv.org/abs/2301.13688
- access: closed
  analysis: ''
  created_date: 2023-01-26
  dependencies:
  - SoundStream
  - w2v-BERT
  - MuLan
  - MusicLM semantic model
  - MusicLM acoustic model
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; audio
  model_card: ''
  monitoring: ''
  name: MusicLM
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 1.4B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2301.11325.pdf
- access: closed
  analysis: ''
  created_date: 2023-01-26
  dependencies:
  - Free Music Archive
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: audio; audio
  model_card: ''
  monitoring: ''
  name: SoundStream
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2301.11325.pdf
- access: closed
  analysis: ''
  created_date: 2023-01-26
  dependencies:
  - Free Music Archive
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: audio; audio
  model_card: ''
  monitoring: ''
  name: w2v-BERT
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 600M parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2301.11325.pdf
- access: closed
  analysis: ''
  created_date: 2022-08-26
  dependencies:
  - AST
  - BERT
  - MuLan dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; audio
  model_card: ''
  monitoring: ''
  name: MuLan
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2208.12415
- access: closed
  analysis: ''
  created_date: 2022-08-26
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: audio, text
  monitoring: ''
  name: MuLan dataset
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 370K hours audio
  type: dataset
  url: https://arxiv.org/abs/2208.12415
- access: closed
  analysis: ''
  created_date: 2023-01-26
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: audio
  monitoring: ''
  name: MusicLM dataset
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 280K hours audio
  type: dataset
  url: https://arxiv.org/pdf/2301.11325.pdf
- access: closed
  analysis: ''
  created_date: 2023-01-26
  dependencies:
  - MusicLM dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: audio; audio
  model_card: ''
  monitoring: ''
  name: MusicLM semantic model
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 430M parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2301.11325.pdf
- access: closed
  analysis: ''
  created_date: 2023-01-26
  dependencies:
  - MusicLM dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: audio; audio
  model_card: ''
  monitoring: ''
  name: MusicLM acoustic model
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 430M parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2301.11325.pdf
- access: closed
  analysis: ''
  created_date: 2023-02-08
  dependencies:
  - Noise2Music pseudolabel dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknkown
  modality: audio, text; audio
  model_card: ''
  monitoring: ''
  name: Noise2Music
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://google-research.github.io/noise2music/noise2music.pdf
- access: closed
  analysis: ''
  created_date: 2023-02-08
  datasheet: ''
  dependencies:
  - LaMDA
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: LaMDA-LF
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 150k songs
  type: dataset
  url: https://google-research.github.io/noise2music/noise2music.pdf
- access: closed
  analysis: ''
  created_date: 2023-02-08
  datasheet: ''
  dependencies:
  - MusicCaps
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: Rater-LF
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 10k captions
  type: dataset
  url: https://google-research.github.io/noise2music/noise2music.pdf
- access: closed
  analysis: ''
  created_date: 2023-02-08
  datasheet: ''
  dependencies:
  - MusicCaps
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: Rater-SF
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 24k captions
  type: dataset
  url: https://google-research.github.io/noise2music/noise2music.pdf
- access: closed
  analysis: ''
  created_date: 2023-02-08
  dependencies:
  - MuLan
  - MuLaMCap
  - LaMDA-LF
  - Rater-LF
  - Rater-SF
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: audio, text; audio
  model_card: ''
  monitoring: ''
  name: Noise2Music pseudolabeler
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://google-research.github.io/noise2music/noise2music.pdf
- access: closed
  analysis: ''
  created_date: 2023-02-08
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: audio
  monitoring: ''
  name: Noise2Music audio dataset
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 340k hours audio
  type: dataset
  url: https://google-research.github.io/noise2music/noise2music.pdf
- access: closed
  analysis: ''
  created_date: 2023-02-08
  datasheet: ''
  dependencies:
  - Noise2Music audio dataset
  - Noise2Music pseudolabeler
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license:
    explanation: 'The asset isn''t released, and hence the license is unknown.

      '''
    value: unknown
  modality: audio, text
  monitoring: ''
  name: Noise2Music pseudolabel dataset
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 340k hours audio with pseudolabels
  type: dataset
  url: https://google-research.github.io/noise2music/noise2music.pdf
- access: limited
  adaptation: ''
  created_date: 2022-08-25
  dependencies:
  - LaMDA
  description: AI Test Kitchen provides a new way for people to learn about, experience,
    and give feedback on emerging AI technology, like LaMDA.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: AI Test Kitchen
  nationality: USA
  organization: Google
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://blog.google/technology/ai/join-us-in-the-ai-test-kitchen/
  user_distribution: ''
- access: closed
  adaptation: ''
  created_date: 2023-02-06
  dependencies:
  - LaMDA
  description: Conversational AI service, powered by LaMDA
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: Bard
  nationality: USA
  organization: Google
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://blog.google/technology/ai/bard-google-ai-search-updates/
  user_distribution: ''
- access: closed
  analysis: ''
  created_date: 2022-06-29
  dependencies:
  - PaLM
  - arXiv
  - PaLM dataset
  - Minerva Math Web Pages dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Minerva
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 540B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2206.14858
- access: closed
  analysis: ''
  created_date: 2022-06-29
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: Minerva Math Web Pages dataset
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 17.5B tokens
  type: dataset
  url: https://arxiv.org/abs/2206.14858
- access: limited
  analysis: ''
  created_date: 2023-03-06
  dependencies:
  - YT-NLU-U
  - Pub-U
  - Web-NTL
  - YT-SUP+
  - Pub-S
  description: Universal Speech Model (USM) is a family of state-of-the-art speech
    models with 2B parameters trained on 12 million hours of speech and 28 billion
    sentences of text, spanning 300+ languages. USM, which is for use in YouTube (e.g.,
    for closed captions), can perform automatic speech recognition (ASR) on widely-spoken
    languages like English and Mandarin, but also languages like Punjabi, Assamese,
    Santhali, Balinese, Shona, Malagasy, Luganda, Luo, Bambara, Soga, Maninka, Xhosa,
    Akan, Lingala, Chichewa, Nkore, Nzema to name a few. Some of these languages are
    spoken by fewer than twenty million people, making it very hard to find the necessary
    training data.
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: audio, text; text
  model_card: ''
  monitoring: ''
  name: USM
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 2B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2303.01037
- access: open
  adaptation: ''
  created_date: 2005-02-14
  dependencies:
  - USM
  description: YouTube is a global online video sharing and social media platform
  failures: ''
  feedback: ''
  intended_uses: ''
  license: ''
  monitoring: ''
  monthly_active_users: ''
  name: YouTube
  nationality: USA
  organization: Google
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://www.youtube.com/
  user_distribution: ''
- access: closed
  analysis: ''
  created_date: 2023-03-06
  dependencies:
  - PaLM
  - ViT-22B
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: image, text; text
  model_card: ''
  monitoring: ''
  name: PaLM-E
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 562B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2303.03378
- access: closed
  analysis: ''
  created_date: 2023-02-10
  dependencies:
  - JFT
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: image, image
  model_card: ''
  monitoring: ''
  name: ViT-22B
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 22B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2302.05442
- access: closed
  analysis: ''
  created_date: 2022-09-07
  dependencies:
  - w2v-BERT
  - SoundStream
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: audio, text; audio
  model_card: ''
  monitoring: ''
  name: AudioLM
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 1B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2209.03143
- access: closed
  analysis: ''
  created_date: 2022-09-14
  dependencies:
  - mT5
  - ViT-e
  - WebLI
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; image
  model_card: ''
  monitoring: ''
  name: PaLI
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 17B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2209.06794
- access: closed
  analysis: ''
  created_date: 2022-09-14
  dependencies:
  - JFT
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: image; image
  model_card: ''
  monitoring: ''
  name: ViT-e
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 3.9B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2209.06794
- access: closed
  analysis: ''
  created_date: 2022-09-14
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: image, text
  monitoring: ''
  name: WebLI
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 10B images, 12B alt-text
  type: dataset
  url: https://arxiv.org/abs/2209.06794
- access: open
  analysis: ''
  created_date: 2023-02-27
  dependencies:
  - T5
  - CLIP
  - YT-Temporal-1B
  description: ''
  feedback: ''
  intended_uses: ''
  license: Apache 2.0
  modality: video; text
  model_card: ''
  monitoring: ''
  name: Vid2Seq
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 500M parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2302.14115
- access: closed
  analysis: evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus
    on dialog state tracking.
  created_date: 2023-06-08
  dependencies:
  - CTC blank-filtering
  - Speech2Text adapter
  description: Joint speech and language model using a Speech2Text adapter and using
    a CTC-based blank-filtering.
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: audio; text
  model_card: ''
  monitoring: ''
  name: Google Joint SLM
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2306.07944.pdf
- access: closed
  analysis: Reports results on standard code benchmarks across a variety of programming
    languages.
  created_date: 2023-05-10
  dependencies:
  - PaLM 2 dataset
  description: PaLM 2 is a new state-of-the-art language model that has better multilingual
    and reasoning capabilities and is more compute-efficient than its predecessor
    PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives
    similar to UL2.
  feedback: Specific queries provided by annotators
  intended_uses: general use large language model that can be used for language, reasoning,
    and code tasks.
  license: unknown
  modality: text; text
  model_card: https://ai.google/static/documents/palm2techreport.pdf
  monitoring: Google internal monitoring
  name: PaLM 2
  nationality: USA
  organization: Google
  prohibited_uses: becoming part of a general-purpose service or product or use within
    specific downstream applications without prior assessment
  quality_control: Employed de-duplication, removal of sensitive-PII and filtering.
    Added control tokens marking toxicity of text.
  size: unknown
  training_emissions: ''
  training_hardware: TPU v4 (number unspecified)
  training_time: ''
  type: model
  url: https://blog.google/technology/ai/google-palm-2-ai-large-language-model/
- access: limited
  analysis: Assessed on medical benchmarks of professional medical exams, medical
    research, and consumer queries.
  created_date: 2023-12-13
  dependencies: []
  description: MedLM is a collection of foundation models tuned to follow natural
    language instructions for tasks in medicine, such as question answering and creating
    draft summaries.
  feedback: none
  intended_uses: to be used for question answering and creating draft summaries from
    existing documentation, to be reviewed, edited, and approved by the user before
    use.
  license: unknown
  modality: text; text
  model_card: https://cloud.google.com/static/vertex-ai/docs/generative-ai/medlm/MedLM-model-card.pdf
  monitoring: Google internal monitoring
  name: MedLM
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://cloud.google.com/vertex-ai/docs/generative-ai/medlm/overview
- access: closed
  analysis: Evaluated on standard general, reasoning, math, coding, and multimodal
    benchmarks with results that surpass GPT-4 on almost all.
  created_date: 2023-12-06
  dependencies: []
  description: As of release, Gemini is Google's most capable and flexible AI model,
    proficient in multimodal domains.
  feedback: none
  intended_uses: general use large language model that can be used for language, reasoning,
    and code tasks.
  license: unknown
  modality: text; image, text, video
  model_card: none
  monitoring: Google internal monitoring
  name: Gemini
  nationality: USA
  organization: Google
  prohibited_uses: becoming part of a general-purpose service or product or use within
    specific downstream applications without prior assessment
  quality_control: ''
  size:
    explanation: Comes in sizes Ultra, Pro, and Nano.
    value: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://deepmind.google/technologies/gemini/#introduction
- access: closed
  analysis: Evaluated on popular time-series benchmarks.
  created_date: 2024-02-02
  dependencies: []
  description: TimesFM is a single forecasting model pre-trained on a large time-series
    corpus of 100 billion real world time-points.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: ''
  model_card: none
  monitoring: unknown
  name: TimesFM
  nationality: USA
  organization: Google
  prohibited_uses: ''
  quality_control: ''
  size: 200M parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://blog.research.google/2024/02/a-decoder-only-foundation-model-for.html
- access: open
  analysis: Evaluation was conducted on standard LLM benchmarks and includes internal
    red-teaming testing of relevant content policies.
  created_date: 2024-02-21
  dependencies: []
  description: Gemma is a family of lightweight, state-of-the-art open models from
    Google, based on the Gemini models. They are text-to-text, decoder-only large
    language models, available in English.
  feedback: https://huggingface.co/google/gemma-7b/discussions
  intended_uses: Text generation tasks including question answering, summarization,
    and reasoning; content creation, communication, research, and education.
  license:
    explanation: License can be found at https://ai.google.dev/gemma/terms.
    value: custom
  modality: text; text
  model_card: https://huggingface.co/google/gemma-7b
  monitoring: ''
  name: Gemma
  nationality: USA
  organization: Google
  prohibited_uses: Prohibited uses are specified in the Gemma Prohibited Use Policy
    here https://ai.google.dev/gemma/prohibited_use_policy
  quality_control: Multiple evaluations and red-teaming conducted, with particular
    focus on ethics, bias, fair use cases, and safety.
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: TPUv5e
  training_time: unknown
  type: model
  url: https://blog.google/technology/developers/gemma-open-models/
- access: closed
  analysis: Evaluated Med-Gemini on 14 medical benchmarks spanning text, multimodal
    and long-context applications, establishing new state-of-the-art (SoTA) performance
    on 10 of them, and surpassing the GPT-4 model family on every benchmark where
    a direct comparison is viable.
  created_date: 2024-04-29
  dependencies:
  - Gemini
  - MultiMedBench
  description: Med-Gemini is a family of highly capable multimodal models that are
    specialized in medicine with the ability to seamlessly integrate the use of web
    search, and that can be efficiently tailored to novel modalities using custom
    encoders.
  feedback: none
  intended_uses: To be used in areas of medical research including medical summarization,
    referral letter generation, and medical simplification tasks.
  license: unknown
  modality: image, text; text
  model_card: none
  monitoring: ''
  name: Med-Gemini
  nationality: USA
  organization: Google
  prohibited_uses: Unfit for real-world deployment in the safety-critical medical
    domain.
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2404.18416
- access: open
  analysis: Unknown
  created_date: 2024-09-05
  dependencies: []
  description: Imagen 3 is a high-quality text-to-image model capable of generating
    images with improved detail, richer lighting, and fewer distracting artifacts.
    It features improved prompt understanding and can be used to generate a wide array
    of visual styles from quick sketches to high-resolution images. The model is available
    in multiple versions, each optimized for particular types of tasks. Imagen 3 has
    been trained to capture nuances like specific camera angles or compositions in
    long, complex prompts, making it a versatile tool for image generation from textual
    inputs.
  feedback: Unknown
  intended_uses: Imagen 3 is intended to be used for generation of high-resolution
    images from textual prompts, from photorealistic landscapes to richly textured
    oil paintings or whimsical claymation scenes. It can also be used for stylized
    birthday cards, presentations, and more, due to its improved text rendering capabilities.
  license: Unknown
  modality: text; image
  model_card: unknown
  monitoring: Unknown
  name: Imagen 3
  nationality: unknown
  organization: Google DeepMind
  prohibited_uses: Unknown
  quality_control: Unknown
  size: Unknown
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://deepmind.google/technologies/imagen-3/
- access: open
  analysis: The 27B Gemma 2 model outperforms other open models in its size category
    offering cutting-edge performance. Specific details can be found in the provided
    technical report.
  created_date: 2024-06-27
  dependencies:
  - Gemma
  - CodeGemma
  - RecurrentGemma
  - PaliGemma
  description: Gemma 2 is an open model that offers best-in-class performance and
    runs at incredible speed across different hardware. It easily integrates with
    other AI tools. This model is built on a redesigned architecture engineered for
    exceptional performance and inference efficiency. It is available in both 9 billion
    (9B) and 27 billion (27B) parameter sizes. Gemma 2 is optimized to run at incredible
    speed across a range of hardware, from powerful gaming laptops and high-end desktops,
    to cloud-based setups.
  feedback: Unknown
  intended_uses: Gemma 2 is designed for developers and researchers for various AI
    tasks. It can be used via the integrations it offers with other AI tools/platforms
    and can additionally be deployed for more accessible and budget-friendly AI deployments.
  license: Gemma (commercially-friendly license given by Google DeepMind)
  modality: text; text
  model_card: unknown
  monitoring: Unknown
  name: Gemma 2
  nationality: unknown
  organization: Google DeepMind
  prohibited_uses: Not specified
  quality_control: Google DeepMind implemented a refined architecture for Gemma 2.
    The model has improvements in safety and efficiency over the first generation.
    The deployment of Gemma 2 on Vertex AI, scheduled for the next month, will offer
    effortless management of the model.
  size: 27B parameters (dense)
  training_emissions: Unknown
  training_hardware: Google Cloud TPU host, NVIDIA A100 80GB Tensor Core GPU, NVIDIA
    H100 Tensor Core GPU
  training_time: Unknown
  type: model
  url: https://blog.google/technology/developers/google-gemma-2/
- access: open
  analysis: The model nearly matches the performance of the Gemini 1.5 Flash model
    across many benchmarks through developer feedback and testing.
  created_date: 2024-10-03
  dependencies: []
  description: Gemini 1.5 Flash-8B is a lightweight, speed-optimized variant of the
    Gemini 1.5 Flash model, designed for efficiency and lower latency especially for
    tasks such as chat, transcription, and long context language translation. The
    model is production-ready and offers the lowest cost per intelligence of any Gemini
    model with improved rate limits.
  feedback: Encourages developer feedback to inform model improvements and future
    updates.
  intended_uses: Designed for simple, high-volume tasks including multimodal use cases
    and long-context summarization.
  license: unknown
  modality: audio, image, text, video; text
  model_card: unknown
  monitoring: unknown
  name: Gemini 1.5 Flash-8B
  nationality: unknown
  organization: Google DeepMind
  prohibited_uses: unknown
  quality_control: Model improvement informed by developer feedback and extensive
    testing.
  size: 8B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://developers.googleblog.com/en/gemini-15-flash-8b-is-now-generally-available-for-use/
- access:
    explanation: The models are accessible via Vertex AI Model Garden [CXR, Derm,
      Path] and Hugging Face [CXR, Derm, Path].
    value: open
  analysis: After giving the community time to use the models and explore different
    applications, we collected feedback.
  created_date: 2024-11-25
  dependencies:
  - EfficientNet-L2
  - BERT
  - CLIP
  - BLIP-2
  - BiT ResNet-101x3
  - ViT-S
  description: Health AI Developer Foundations (HAI-DEF) is a new suite of open weight
    models to help developers more easily build AI models for healthcare applications.
  feedback: unknown
  intended_uses: Building AI models for healthcare applications including imaging
    applications in radiology, dermatology, and pathology.
  license: unknown
  modality:
    explanation: "CXR Foundation\u2019s image encoding model takes DICOM images, and\
      \ its text-encoder accepts textual strings..."
    value: text, image; vector
  model_card: unknown
  monitoring: After giving the community time to use the models and explore different
    applications, we collected feedback.
  name: Health AI Developer Foundations (HAI-DEF)
  nationality: USA
  organization: Google Research
  prohibited_uses: unknown
  quality_control: Learning from community experience with previous research endpoints...we
    collected feedback.
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: http://goo.gle/3AWVX33
- access: closed
  analysis: Unknown
  created_date: 2024-12-04
  dependencies:
  - Imagen 3
  description: Genie 2 is a foundation world model capable of generating an endless
    variety of action-controllable, playable 3D environments for training and evaluating
    embodied agents based on a single prompt image.
  feedback: Unknown
  intended_uses: Genie 2 can be used for generating diverse environments for training
    and evaluating AI agents, rapid prototyping interactive experiences, and experimenting
    with novel environments.
  license: Unknown
  modality:
    explanation: the model is prompted with a single image generated by Imagen 3
    value: image; video
  model_card: unknown
  monitoring: Unknown
  name: Genie 2
  nationality: unknown
  organization: Google DeepMind
  prohibited_uses: Unknown
  quality_control: Responsible development is emphasized, developing our technologies
    responsibly and building towards more general AI systems that can safely carry
    out tasks.
  size: Unknown
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://deepmind.google/discover/blog/genie-2-a-large-scale-foundation-world-model/
- access: limited
  analysis: Veo 2 outperforms other leading video generation models, based on human
    evaluations of its performance.
  created_date: 2024-12-16
  dependencies: []
  description: Veo 2 is a state-of-the-art video generation model that creates videos
    with realistic motion and high-quality output, up to 4K, with extensive camera
    controls. It simulates real-world physics and offers advanced motion capabilities
    with enhanced realism and fidelity.
  feedback: unknown
  intended_uses: Creating high-quality videos with realistic motion, different styles,
    camera controls, shot styles, angles, and movements.
  license: unknown
  modality:
    explanation: Our state-of-the-art video generation model ... text-to-image model
      Veo 2
    value: text; video
  model_card: unknown
  monitoring: unknown
  name: Veo 2
  nationality: unknown
  organization: Google DeepMind
  prohibited_uses: unknown
  quality_control: Veo 2 includes features that enhance realism, fidelity, detail,
    and artifact reduction to ensure high-quality output.
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://deepmind.google/technologies/veo/veo-2/
- access:
    explanation: Gemini 2.0 Flash is available to developers and trusted testers,
      with wider availability planned for early next year.
    value: limited
  analysis: unknown
  created_date: 2024-12-11
  dependencies: []
  description: Google DeepMind introduces Gemini 2.0, a new AI model designed for
    the 'agentic era.'
  feedback: unknown
  intended_uses: Develop more agentic models, meaning they can understand more about
    the world around you, think multiple steps ahead, and take action on your behalf,
    with your supervision.
  license: unknown
  modality:
    explanation: The first model built to be natively multimodal, Gemini 1.0 and 1.5
      drove big advances with multimodality and long context to understand information
      across text, video, images, audio and code...
    value: text, video, image, audio; image, text
  model_card: unknown
  monitoring: unknown
  name: Gemini 2.0
  nationality: unknown
  organization: Google DeepMind
  prohibited_uses: unknown
  quality_control: Google is committed to building AI responsibly, with safety and
    security as key priorities.
  size: unknown
  training_emissions: unknown
  training_hardware:
    explanation: "It\u2019s built on custom hardware like Trillium, our sixth-generation\
      \ TPUs."
    value: custom hardware like Trillium, our sixth-generation TPUs
  training_time: unknown
  type: model
  url: https://blog.google/technology/google-deepmind/google-gemini-ai-update-december-2024/#ceo-message
- access:
    explanation: 'Download models & code: Find the pre-trained models and code on
      Hugging Face and Kaggle'
    value: open
  analysis: Our research demonstrates leading performance on chemical formula recognition,
    music score recognition, spatial reasoning, and chest X-ray report generation,
    as detailed in the technical report.
  created_date: 2024-12-05
  dependencies:
  - PaliGemma
  - Hugging Face
  - Kaggle
  - Hugging Face Transformers
  - Keras
  - PyTorch
  - JAX
  description: PaliGemma 2 builds upon the performant Gemma 2 models, adding the power
    of vision and making it easier than ever to fine-tune for exceptional performance.
    With PaliGemma 2, these models can see, understand, and interact with visual input,
    opening up a world of new possibilities.
  feedback: Your feedback and contributions are invaluable in shaping the future of
    these models and driving innovation in the field.
  intended_uses: fine-tuning for specific tasks and datasets straightforward, empowering
    you to tailor its capabilities to your precise needs.
  license: unknown
  modality:
    explanation: PaliGemma 2 generates detailed, contextually relevant captions for
      images
    value: image; text
  model_card: unknown
  monitoring: unknown
  name: PaliGemma 2
  nationality: USA
  organization: Google
  prohibited_uses: unknown
  quality_control: unknown
  size:
    explanation: Optimize performance for any task with PaliGemma 2's multiple model
      sizes (3B, 10B, 28B parameters)
    value: 28B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://developers.googleblog.com/en/introducing-paligemma-2-powerful-vision-language-models-simple-fine-tuning/


================================================
FILE: assets/greenbit.yaml
================================================
- access: open
  analysis: Evaluated on common LLM benchmarks.
  created_date: 2023-09-29
  dependencies:
  - LLaMA
  description: GreenBit LLaMA is a series of fine-tuned LLaMA models.
  feedback: https://huggingface.co/GreenBitAI/LLaMA-30B-2bit-groupsize8/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/GreenBitAI/LLaMA-30B-2bit-groupsize8
  monitoring: unknown
  name: GreenBit LLaMA
  nationality: USA
  organization: GreenBit AI
  prohibited_uses: ''
  quality_control: ''
  size: 30B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/GreenBitAI/low_bit_llama


================================================
FILE: assets/h2oai.yaml
================================================
- access: open
  analysis: Evaluated on EleutherAI evaluation harness.
  created_date: 2023-06-16
  dependencies:
  - GPT-NeoX
  - H2O AI OpenAssistant
  - h2oGPT Repositories
  description: Series of models fine-tuned on well-known LLMs using the h2oGPT repositories.
  feedback: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b
  monitoring: ''
  name: h2oGPT
  nationality: USA
  organization: H2O AI
  prohibited_uses: ''
  quality_control: ''
  size: 20B parameters (dense)
  training_emissions: unknown
  training_hardware: unspecified number of 48GB A100 NVIDIA GPUs
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2306.08161.pdf
- access: open
  analysis: Evaluated on common sense and world knowledge benchmarks.
  created_date: 2024-01-30
  dependencies: []
  description: H2O Danube is a language model trained on 1T tokens following the core
    principles of LLaMA 2 and Mistral.
  feedback: https://huggingface.co/h2oai/h2o-danube-1.8b-base/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/h2oai/h2o-danube-1.8b-base
  monitoring: unknown
  name: H2O Danube
  nationality: USA
  organization: H2O AI
  prohibited_uses: Users are encouraged to use the large language model responsibly
    and ethically. By using this model, you agree not to use it for purposes that
    promote hate speech, discrimination, harassment, or any form of illegal or harmful
    activities.
  quality_control: unknown
  size: 1.8B parameters (dense)
  training_emissions: unknown
  training_hardware: 8x H100 GPUs on a single node
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2401.16818.pdf


================================================
FILE: assets/hubspot.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-01-31
  dependencies:
  - ChatGPT API
  description: Give your sales, marketing, and customer service teams one of the most
    powerful AI tools available - ChatGPT priority access, no timeout limits, company
    wide access managed through a single account, incorporate into your existing processes
    without leaving HubSpot
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'We weren''t able to locate a license.

      '
    value: unknown
  monitoring: ''
  monthly_active_users: ''
  name: ChatGPT powered by OBO
  nationality: USA
  organization: HubSpot
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://ecosystem.hubspot.com/marketplace/apps/sales/sales-enablement/the-obo-group-chatgpt-1398072
  user_distribution: ''


================================================
FILE: assets/huggingface.yaml
================================================
- access: open
  analysis: none
  created_date:
    explanation: The date the model was announced
    value: 2021-12-06
  dependencies: []
  description: CodeParrot is an autoregressive language model trained on code
  feedback: none
  intended_uses: none
  license:
    explanation: No license is explicitly provided for this model.
    value: none
  modality: text; code, text
  model_card: none
  monitoring: none
  name: CodeParrot
  nationality: USA
  organization: HuggingFace
  prohibited_uses: none
  quality_control: none
  size: 1B parameters (dense)
  training_emissions: unknown
  training_hardware: 16 x A100 (40GB)
  training_time: unknown
  type: model
  url: https://twitter.com/lvwerra/status/1467933794699259908
- access: open
  analysis: Evaluated on loss, rewards, logps, and logits rejected and chosen.
  created_date: 2023-10-11
  dependencies:
  - Mistral
  description: Zephyr is a series of language models that are trained to act as helpful
    assistants.
  feedback: https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha/discussions
  intended_uses: Educational and research purposes
  license: MIT
  modality: text; text
  model_card: https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha
  monitoring: none
  name: Zephyr
  nationality: USA
  organization: HuggingFace
  prohibited_uses: none
  quality_control: none
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha
- access: open
  analysis: Evaluated in comparison to Flamingo and OpenFlamingo on standard benchmarks.
  created_date: 2023-08-22
  dependencies:
  - OBELICS
  - Wikipedia
  - LAION-5B
  - PMD
  description: IDEFICS is an open-access visual language model, based on Flamingo.
  feedback: https://huggingface.co/HuggingFaceM4/idefics-80b-instruct/discussions
  intended_uses: Educational and research purposes
  license:
    explanation: Can be found at https://huggingface.co/HuggingFaceM4/idefics-80b-instruct#license
    value: custom
  modality: image, text; text
  model_card: https://huggingface.co/HuggingFaceM4/idefics-80b-instruct
  monitoring: none
  name: IDEFICS
  nationality: USA
  organization: HuggingFace
  prohibited_uses: none
  quality_control: none
  size: 80B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/blog/idefics
- access: open
  analysis: Subset of training dataset evaluated for bias using Data Measurements
    Tool.
  created_date: 2023-08-22
  datasheet: https://huggingface.co/datasets/HuggingFaceM4/OBELICS
  dependencies: []
  description: OBELICS is a dataset consisting of 141 million interleaved image-text
    documents scraped from the web and contains 353 million images.
  excluded: All images for which creators explicitly requested opt-out of AI training.
  feedback: https://huggingface.co/datasets/HuggingFaceM4/OBELICS/discussions
  included: ''
  intended_uses: ''
  license: CC-BY-4.0
  modality: image, text
  monitoring: ''
  name: OBELICS
  nationality: USA
  organization: HuggingFace
  prohibited_uses: ''
  quality_control: Sexual and violent content still present in OBELICS even after
    filtering.
  sample: []
  size: 115B tokens
  type: dataset
  url: https://huggingface.co/blog/idefics
- access: open
  analysis: Evaluated on in-house benchmark, FIN-bench, adapted from BIG-bench for
    Finnish.
  created_date: 2023-11-03
  dependencies: []
  description: FinGPT is a series of Finnish LLMs trained from scratch.
  feedback: https://huggingface.co/TurkuNLP/gpt3-finnish-13B/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/TurkuNLP/gpt3-finnish-13B
  monitoring: unknown
  name: FinGPT
  nationality: unknown
  organization: University of Turku, HuggingFace, National Library of Finland
  prohibited_uses: ''
  quality_control: unknown
  size: 13B parameters (dense)
  training_emissions: unknown
  training_hardware: 192 nodes, each consisting of 4 AMD Instinct MI250X GPUs, a single
    64-core AMD Trento CPU and 512GB of memory.
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2311.05640.pdf
- access: open
  analysis: Evaluated on in-house benchmark, FIN-bench, adapted from BIG-bench for
    Finnish.
  created_date: 2023-11-03
  dependencies:
  - BLOOM
  description: BLUUMI is a multilingual fine-tuned version of BLOOM.
  feedback: https://huggingface.co/TurkuNLP/bloom-finnish-176b/discussions
  intended_uses: ''
  license:
    explanation: Model card indicates same as license for BLOOM.
    value: BigScience RAIL v1.0
  modality: text; text
  model_card: https://huggingface.co/TurkuNLP/bloom-finnish-176b
  monitoring: unknown
  name: BLUUMI
  nationality: unknown
  organization: University of Turku, HuggingFace, National Library of Finland
  prohibited_uses: ''
  quality_control: unknown
  size: 176B parameters (dense)
  training_emissions: unknown
  training_hardware: 192 nodes, each consisting of 4 AMD Instinct MI250X GPUs, a single
    64-core AMD Trento CPU and 512GB of memory.
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2311.05640.pdf
- access: open
  analysis: Some seed samples were used in different prompt styles and audiences.
    Less than 1% of files are duplicates after running MinHash deduplication. Contaminated
    samples were removed from each dataset split.
  created_date: 2024-02-22
  datasheet: https://huggingface.co/datasets/HuggingFaceTB/cosmopedia
  dependencies:
  - Mixtral
  description: Cosmopedia is a dataset of synthetic textbooks, blogposts, stories,
    posts, and WikiHow articles generated by Mixtral-8x7B-Instruct-v0.1. The dataset
    contains over 30 million files and 25 billion tokens, making it the largest open
    synthetic dataset to date. It covers a variety of topics, mapping worldwide knowledge
    from Web datasets like RefinedWeb and RedPajama, to generate synthetic content.
  excluded: unknown
  feedback: https://huggingface.co/datasets/HuggingFaceTB/cosmopedia/discussions
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: Cosmopedia v0.1
  nationality: unknown
  organization: Hugging Face
  prohibited_uses: unknown
  quality_control: Measures were taken to reduce redundancy and ensure diversity in
    generated content. A decontamination pipeline was implemented to avoid benchmark
    contamination.
  sample: []
  size: 25B tokens
  type: dataset
  url: none
- access: open
  analysis: The performance of Idefics2 has been evaluated on numerous benchmarks.
    It is top of its class size and competes with much larger models such as LLava-Next-34B
    and MM1-30B-chat.
  created_date: 2024-04-15
  dependencies:
  - The Cauldron
  description: Idefics2 is a general multimodal model that takes as input arbitrary
    sequences of text and images, generating text responses. It has the capability
    to describe visual content, answer questions about images, perform basic arithmetic
    operations, create stories grounded in multiple images, and extract information
    from documents.
  feedback: https://huggingface.co/HuggingFaceM4/idefics2-8b/discussions
  intended_uses: The model can be used for answering questions about images, describing
    visual content, creating stories grounded in multiple images, extracting information
    from documents, and performing basic arithmetic operations.
  license: Apache 2.0
  modality: image, text; text
  model_card: https://huggingface.co/HuggingFaceM4/idefics2-8b
  monitoring: unknown
  name: Idefics2
  nationality: unknown
  organization: Hugging Face
  prohibited_uses: unknown
  quality_control: The quality of the model has been ensured by training it on a mixture
    of openly available datasets and enhancing its OCR capabilities. Further improvements
    include manipulating images in their native resolutions and aspect ratios, better
    pre-trained backbones, and allowing for sub-image splitting.
  size: 8B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/blog/idefics2
- access: open
  analysis: none
  created_date: 2024-04-15
  datasheet: https://huggingface.co/datasets/HuggingFaceM4/the_cauldron
  dependencies:
    explanation: These are the datasets with the most tokens included; the full list
      of all 50 datasets can be found at https://huggingface.co/datasets/HuggingFaceM4/the_cauldron
    value:
    - LNarratives
    - Rendered Text
    - WebSight
    - DaTikz
  description: The Cauldron is an open compilation of 50 manually-curated datasets
    formatted for multi-turn conversations.
  excluded: ''
  feedback: https://huggingface.co/datasets/HuggingFaceM4/the_cauldron/discussions
  included: ''
  intended_uses: ''
  license: CC BY 4.0
  modality: image, text
  monitoring: unknown
  name: The Cauldron
  nationality: unknown
  organization: Hugging Face
  prohibited_uses: ''
  quality_control: unknown
  sample: []
  size: 50 vision-language datasets
  type: dataset
  url: https://huggingface.co/blog/idefics2


================================================
FILE: assets/ibm.yaml
================================================
- access: open
  analysis: ''
  created_date:
    explanation: The date the model was announced in the [[Adept blog post]](https://www.adept.ai/blog/act-1).
    value: 2023-08-03
  dependencies:
  - NASA HLS data
  description: Prithvi is a first-of-its-kind temporal Vision transformer pre-trained
    by the IBM and NASA team on contiguous US Harmonised Landsat Sentinel 2 (HLS)
    data. The model adopts a self-supervised encoder developed with a ViT architecture
    and Masked AutoEncoder (MAE) learning strategy, with an MSE loss function.
  feedback: https://huggingface.co/ibm-nasa-geospatial/Prithvi-100M/discussions
  intended_uses: ''
  license: Apache 2.0
  modality:
    explanation: video; text
    value: text, video; text, video
  model_card: https://huggingface.co/ibm-nasa-geospatial/Prithvi-100M
  monitoring: ''
  name: Prithvi
  nationality: USA
  organization: IBM
  prohibited_uses: ''
  quality_control: ''
  size: 100M parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://github.com/NASA-IMPACT/hls-foundation-os
- access: limited
  adaptation: ''
  created_date: 2023-09-07
  dependencies:
  - Granite
  description: Watsonx.ai is part of the IBM watsonx platform that brings together
    new generative AI capabilities, powered by foundation models and traditional machine
    learning into a powerful studio spanning the AI lifecycle.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: License information can be found at https://www.ibm.com/docs/en/watsonxdata/1.0.x?topic=planning-licenses-entitlements
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Watsonx.ai
  nationality: USA
  organization: IBM
  output_space: deployed AI models
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.ibm.com/docs/en/watsonx-as-a-service?topic=models-terms-use
  type: application
  url: https://www.ibm.com/products/watsonx-ai
  user_distribution: ''
- access: limited
  analysis: unknown
  created_date: 2023-09-28
  dependencies: []
  description: Granite is a set of multi-size foundation models that apply generative
    AI to both language and code.
  feedback: ''
  intended_uses: ''
  license: ''
  modality: text; code, text
  model_card: none
  monitoring: ''
  name: Granite
  nationality: USA
  organization: IBM
  prohibited_uses: ''
  quality_control: Training data passed through IBM HAP detector, language model designed
    to remove harmful content. Data also deduplicated and filtered for document quality.
  size: 13B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.ibm.com/blog/building-ai-for-business-ibms-granite-foundation-models/
- access:
    explanation: "In keeping with IBM\u2019s strong historical commitment to open\
      \ source , all Granite models are released under the permissive Apache 2.0 license"
    value: open
  analysis: Granite 3.0 8B Instruct matches leading similarly-sized open models on
    academic benchmarks while outperforming those peers on benchmarks for enterprise
    tasks and safety.
  created_date: 2024-10-21
  dependencies:
  - "Hugging Face\u2019s OpenLLM Leaderboard v2"
  description: IBM Granite 3.0 models deliver state-of-the-art performance relative
    to model size while maximizing safety, speed and cost-efficiency for enterprise
    use cases.
  feedback: unknown
  intended_uses: classic natural language use cases including text generation, classification,
    summarization, entity extraction and customer service chatbots, programming language
    use cases such as code generation, code explanation and code editing, and for
    agentic use cases requiring tool calling
  license:
    explanation: "In keeping with IBM\u2019s strong historical commitment to open\
      \ source , all Granite models are released under the permissive Apache 2.0 license"
    value: Apache 2.0
  modality:
    explanation: IBM Granite 3.0 8B Instruct model for classic natural language use
      cases including text generation, classification, summarization, entity extraction
      and customer service chatbots
    value: text; text
  model_card: unknown
  monitoring: ''
  name: IBM Granite 3.0
  nationality: USA
  organization: IBM
  prohibited_uses: unknown
  quality_control: The entire Granite family of models are trained on carefully curated
    enterprise datasets, filtered for objectionable content with critical concerns
    like governance, risk, privacy and bias mitigation in mind
  size:
    explanation: 'Dense, general purpose LLMs: Granite-3.0-8B-Instruct'
    value: 8B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.ibm.com/new/ibm-granite-3-0-open-state-of-the-art-enterprise-models


================================================
FILE: assets/idea.yaml
================================================
- access: open
  analysis: Evaluated on human and machine benchmarks in comparison to established
    image models as a baseline.
  created_date: 2024-01-26
  dependencies:
  - CLIP
  - LAION-400M
  - Wukong
  - Stable Diffusion XL
  description: Taiyi Diffusion XL is a new Chinese and English bilingual text-to-image
    model which is developed by extending the capabilities of CLIP and Stable-DiffusionXL.
  feedback: https://huggingface.co/IDEA-CCNL/Taiyi-Stable-Diffusion-XL-3.5B/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; image
  model_card: https://huggingface.co/IDEA-CCNL/Taiyi-Stable-Diffusion-XL-3.5B
  monitoring: unknown
  name: Taiyi Diffusion XL
  nationality: unknown
  organization: International Digital Economy Academy, South China University of Technology,
    University of Science and Technology of China
  prohibited_uses: ''
  quality_control: unknown
  size: 3.5B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2401.14688.pdf


================================================
FILE: assets/ideogram.yaml
================================================
- access: limited
  analysis: "Compared to DALL\xB7E 3 based on a qualitative user comparison."
  created_date: 2024-02-28
  dependencies: []
  description: "Ideogram 1.0 is Ideogram\u2019s most advanced text-to-image model,\
    \ as of release."
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; image
  model_card: none
  monitoring: ''
  name: Ideogram 1.0
  nationality: USA
  organization: Ideogram AI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://about.ideogram.ai/1.0


================================================
FILE: assets/inflection.yaml
================================================
- access: limited
  analysis: Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K,
    and HellaSwag 10-shot among others.
  created_date: 2023-06-22
  dependencies: []
  description: Inflection AI's first version of its in-house LLM. via Inflection AI's
    conversational API.
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Inflection-1
  nationality: USA
  organization: Inflection AI
  prohibited_uses: ''
  quality_control: ''
  size:
    explanation: Designed to be roughly around Chat-GPT's size (175B parameters).
    value: unknown
  training_emissions: ''
  training_hardware: unknown
  training_time: ''
  type: model
  url: https://inflection.ai/inflection-1
- access: limited
  adaptation: ''
  created_date: 2023-05-02
  dependencies:
  - Inflection-2.5
  description: Personal AI chatbot designed to be conversational and specialized in
    emotional intelligence.
  failures: ''
  feedback: ''
  intended_uses: to be used as a personal assistant chatbot for everyday activities
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: Pi
  nationality: USA
  organization: Inflection AI
  output_space: natural language text responses
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://inflection.ai/press
  user_distribution: ''
- access: closed
  analysis: Evaluated against state of the art models on benchmarks, and found to
    be most performant model outside of GPT-4.
  created_date: 2023-11-22
  dependencies: []
  description: Inflection-2 is the best model in the world for its compute class and
    the second most capable LLM in the world, according to benchmark evaluation, as
    of its release.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: none
  monitoring: ''
  name: Inflection-2
  nationality: USA
  organization: Inflection AI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: 5000 NVIDIA H100 GPUs
  training_time: unknown
  type: model
  url: https://inflection.ai/inflection-2
- access: limited
  analysis: Evaluated on standard LLM and technical benchmarks in comparison to Inflection-1
    and GPT-4, along with advanced STEM examinations.
  created_date: 2024-03-07
  dependencies: []
  description: Inflection-2.5 is an upgraded in-house model that is competitive with
    all the world's leading LLMs, as of release, like GPT-4 and Gemini.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: none
  monitoring: ''
  name: Inflection-2.5
  nationality: USA
  organization: Inflection AI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://inflection.ai/inflection-2-5
- access:
    explanation: "Developers can now access Inflection AI\u2019s Large Language Model\
      \ through its new commercial API."
    value: open
  analysis: unknown
  created_date: 2024-10-07
  dependencies: []
  description: Inflection for Enterprise, powered by our industry-first, enterprise-grade
    AI system, Inflection 3.0.
  feedback: So please drop us a line. We want to keep hearing from enterprises about
    how we can help solve their challenges and make AI a reality for their business.
  intended_uses: unknown
  license: unknown
  modality: unknown
  model_card: unknown
  monitoring: unknown
  name: Inflection 3.0
  nationality: USA
  organization: Inflection AI
  prohibited_uses: unknown
  quality_control: unknown
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://inflection.ai/blog/enterprise


================================================
FILE: assets/instacart.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-03-01
  dependencies:
  - ChatGPT API
  description: "Instacart is augmenting the Instacart app to enable customers to ask\
    \ about food and get inspirational, shoppable answers. This uses ChatGPT alongside\
    \ Instacart\u2019s own AI and product data from their 75,000+ retail partner store\
    \ locations to help customers discover ideas for open-ended shopping goals, such\
    \ as \u201CHow do I make great fish tacos?\u201D or \u201CWhat\u2019s a healthy\
    \ lunch for my kids?\u201D Instacart plans to launch \u201CAsk Instacart\u201D\
    \ later this year."
  failures: ''
  feedback: ''
  intended_uses: ''
  license: ''
  monitoring: ''
  monthly_active_users: ''
  name: Ask Instacart
  nationality: USA
  organization: Instacart
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
  user_distribution: ''


================================================
FILE: assets/internlm.yaml
================================================
- access: open
  analysis: Evaluated on the dimensions proposed by OpenCompass in comparison to other
    LLMs.
  created_date: 2023-09-20
  dependencies: []
  description: InternLM is an LLM pre-trained on over 2.3T Tokens containing high-quality
    English, Chinese, and code data.
  feedback: https://huggingface.co/internlm/internlm-20b/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/internlm/internlm-20b
  monitoring: unknown
  name: InternLM
  nationality: China
  organization: InternLM
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/InternLM/InternLM


================================================
FILE: assets/juni.yaml
================================================
- access: limited
  adaptation: ''
  created_date: unknown
  dependencies:
  - Anthropic API
  description: An online tutoring solution to help students achieve academic success.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: Juni Tutor Bot
  nationality: USA
  organization: Juni Learning
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://junilearning.com/
  user_distribution: ''


================================================
FILE: assets/kaist.yaml
================================================
- access: open
  analysis: Evaluated in comparison to the MusicCaps dataset and with respect to n-gram,
    neural metrics.
  created_date: 2023-07-31
  datasheet: none
  dependencies:
  - MusicCaps
  - Million Song Dataset
  - Magnatagtune
  description: LP-MusicCaps is a LLM-based pseudo music caption dataset.
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: CC-BY-NC-4.0
  modality: {}
  monitoring: ''
  name: LP-MusicCaps
  nationality: South Korea
  organization: South Korea Graduate School of Culture Technology
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 2.2M captions paired with 0.5M audio clips
  type: dataset
  url: https://arxiv.org/pdf/2307.16372.pdf


================================================
FILE: assets/kakaobrain.yaml
================================================
- access:
    explanation: You can download the dataset from [[COYO Huggingface page]](https://huggingface.co/datasets/kakaobrain/coyo-700m).
      For more information on downloading the image dataset, please refer to [[download/README.md]](https://github.com/kakaobrain/coyo-dataset/blob/main/download/README.md)
      on Github.
    value: open
  analysis:
    explanation: See [[Experiments]](https://github.com/kakaobrain/coyo-dataset#experiments)
    value: '"We empirically validated the quality of COYO dataset by re-implementing
      popular models such as ALIGN, unCLIP, and ViT. We trained these models on COYO-700M
      or its subsets from scratch, achieving competitive performance to the reported
      numbers or generated samples in the original papers."

      '
  created_date:
    explanation: As mentioned in the [[Google Scholar page]](https://scholar.google.com/citations?view_op=view_citation&hl=en&user=8WXfStMAAAAJ&citation_for_view=8WXfStMAAAAJ:-f6ydRqryjwC).
    value: 2022-08-31
  datasheet: none
  dependencies:
  - CommonCrawl
  description: 'COYO-700M is a large-scale dataset that contains 747M image-text pairs
    as well as many other meta-attributes to increase the usability to train various
    models.

    '
  excluded:
    explanation: See [[Data Filtering]](https://github.com/kakaobrain/coyo-dataset#data-filtering)
    value: Removed images less than 5KB image size. Removed images with an aspect
      ratio greater than 3.0. Removed images with min(width, height) < 200. Removed
      images with a score of OpenNSFW2 or GantMan/NSFW higher than 0.5. Removed all
      duplicate images based on the image pHash value from external public datasets.
      Removed texts with a length of 5 or less. Removed texts that do not have a noun
      form. Removed texts with less than 3 words or more than 256 words and texts
      over 1000 in length. Removed texts appearing more than 10 times. Removed texts
      containing NSFW words. Removed duplicated samples based on (image_phash, text).
  feedback: Feedback can be given by emailing at coyo at kakaobrain.com
  included:
    explanation: See [[Data Filtering]](https://github.com/kakaobrain/coyo-dataset#data-filtering)
    value: Included all image formats that Pillow library can decode. Collected only
      English text using cld3.
  intended_uses:
    explanation: See [[Disclaimer & Content Warning]](https://github.com/kakaobrain/coyo-dataset#disclaimer--content-warning)
    value: '"It is strongly recommended that this dataset be used only for research,
      keeping this in mind when using the dataset, and Kakao Brain does not recommend
      using this dataset as it is without special processing to clear inappropriate
      data to create commercial products."

      '
  license: CC-BY-4.0
  modality: image, text
  monitoring: none
  name: COYO-700M
  nationality: South Korea
  organization: Kakao Brain
  prohibited_uses:
    explanation: See [[Disclaimer & Content Warning]](https://github.com/kakaobrain/coyo-dataset#disclaimer--content-warning)
    value: 'Authors note the following limitations: "Kakao Brain tried to construct
      a "Safe" dataset when building the COYO dataset. However, despite these efforts,
      this large-scale dataset was not hand-picked by humans to avoid the risk due
      to its very large size (over 700M). Keep in mind that the unscreened nature
      of the dataset means that the collected images can lead to strongly discomforting
      and disturbing content for humans. The COYO dataset may contain some inappropriate
      data, and any problems resulting from such data are the full responsibility
      of the user who used it."

      '
  quality_control: ''
  sample: []
  size: 747M image-text pairs
  type: dataset
  url: https://github.com/kakaobrain/coyo-dataset


================================================
FILE: assets/khan.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-03-14
  dependencies:
  - GPT-4 API
  description: An AI-powered assistant that functions as both a virtual tutor for
    students and a classroom assistant for teachers.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: Khanmigo
  nationality: USA
  organization: Khan Academy
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://www.khanacademy.org/khan-labs#khanmigo
  user_distribution: ''


================================================
FILE: assets/konan.yaml
================================================
- access: limited
  analysis: none
  created_date: 2023-09-17
  dependencies: []
  description: Konan LLM is a Large Language Model developed in-house by Konan Technology.
    Optimized for super-large AI training, it leverages high-quality, large-scale
    data and over 20 years of expertise in natural language processing.
  feedback: none
  intended_uses: Document generation, document review, Q&A, customer response scenarios.
  license: unknown
  modality: text; text
  model_card: none
  monitoring: ''
  name: Konan LLM
  nationality: Japan
  organization: Konan
  prohibited_uses: ''
  quality_control: ''
  size: 13B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://en.konantech.com/en/llm/konanllm


================================================
FILE: assets/kotoba.yaml
================================================
- access: open
  analysis: unknown
  created_date: 2024-03-13
  dependencies: []
  description: Kotoba-Speech is a Transformer-based speech generative model that supports
    fluent text-to-speech generation in Japanese and one-shot voice cloning through
    speech prompt.
  feedback: https://huggingface.co/kotoba-tech/kotoba-speech-v0.1/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; audio
  model_card: https://huggingface.co/kotoba-tech/kotoba-speech-v0.1
  monitoring: unknown
  name: Kotoba Speech
  nationality: Japan
  organization: Kotoba Tech
  prohibited_uses: ''
  quality_control: ''
  size: 1.2B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/kotoba-tech/kotoba-speech-v0.1


================================================
FILE: assets/ktai.yaml
================================================
- access: open
  analysis: unknown
  created_date: 2023-10-31
  dependencies:
  - AI-HUB dataset
  - National Institute of Korean Language dataset
  description: Midm is a pre-trained Korean-English language model developed by KT.
    It takes text as input and creates text. The model is based on Transformer architecture
    for an auto-regressive language model.
  feedback: https://huggingface.co/KT-AI/midm-bitext-S-7B-inst-v1/discussions
  intended_uses: It is expected to be used for various research purposes.
  license: CC-BY-NC 4.0
  modality: text; text
  model_card: https://huggingface.co/KT-AI/midm-bitext-S-7B-inst-v1
  monitoring: unknown
  name: Midm
  nationality: South Korea
  organization: KT Corporation
  prohibited_uses: It cannot be used for commercial purposes.
  quality_control: KT tried to remove unethical expressions such as profanity, slang,
    prejudice, and discrimination from training data.
  size: 7B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/KT-AI/midm-bitext-S-7B-inst-v1


================================================
FILE: assets/kunlun.yaml
================================================
- access: open
  analysis: Evaluated on several popular benchmarks and performance in different fields.
  created_date: 2023-10-30
  dependencies:
  - SkyPile
  description: The Skywork series is a family of large language models (LLMs) trained
    on a corpus of over 3.2 trillion tokens drawn from both English and Chinese texts.
  feedback: https://huggingface.co/Skywork/Skywork-13B-base/discussions
  intended_uses: ''
  license:
    explanation: can be found at https://github.com/SkyworkAI/Skywork/blob/main/LICENSE
    value: custom
  modality: text; text
  model_card: https://huggingface.co/Skywork/Skywork-13B-base
  monitoring: none
  name: Skywork
  nationality: China
  organization: Kunlun Inc.
  prohibited_uses: ''
  quality_control: ''
  size: 13B parameters (dense)
  training_emissions: unknown
  training_hardware: 512 A800-80GB GPUs
  training_time: 39 days
  type: model
  url: https://arxiv.org/pdf/2310.19341.pdf


================================================
FILE: assets/laion.yaml
================================================
- access:
    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/)
    value: open
  analysis: ''
  created_date:
    explanation: The date the [[blog post]](https://laion.ai/blog/laion-400-open-dataset/)
      was released.
    value: 2021-08-20
  datasheet: https://laion.ai/blog/laion-400-open-dataset/
  dependencies:
  - CLIP
  - CommonCrawl
  description: LAION-400M is a dataset with CLIP-filtered 400 million image-text pairs,
    their CLIP embeddings and kNN indices that allow efficient similarity search.
    This dataset is entirely openly, freely accessible.
  excluded:
    explanation: See [[Dataset and Methods]](https://arxiv.org/pdf/2111.02114.pdf#section.2)
    value: 'Authors apply the following filtering conditions on the WAT files downloaded
      from Common Crawl: "All samples with less than 5 character alt-text length or
      less than 5 KB image size are dropped. Duplicate removal is performed with bloom
      filter based on URL and alt-text. We use CLIP to compute embeddings of the image
      and alt-text. Then we compute the cosine similarity of both embeddings and drop
      all samples with cosine similarity below 0.3. This threshold was selected based
      on human inspections. We use the CLIP embeddings of images and texts to filter
      out illegal contents."

      '
  feedback: ''
  included: ''
  intended_uses: The authors recommend using the dataset "for research purposes" and
    warn that "this large-scale dataset is non-curated. It was built for research
    purposes to enable testing model training on larger scale for broad researcher
    and other interested communities, and is not meant for any real-world production
    or application."
  license:
    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-400-open-dataset/)
    value: CC BY 4.0
  modality: image, text
  monitoring: ''
  name: LAION-400M
  nationality: Germany
  organization: LAION
  prohibited_uses: No uses are explicitly prohibited by the license. Users are warned
    from using LAION-400M for any real-world production or application.
  quality_control:
    explanation: See [[Dataset and Methods]](https://arxiv.org/pdf/2111.02114.pdf#section.2)
    value: The authors use  CLIP embeddings of images and texts to filter out illegal
      contents. They also use CLIP to tag image-text pairs as NSFW. They note that
      less than 1% of images were detected as NSFW, which can be filtered out by an
      user with NSFW tag.
  sample: []
  size: 400M image-text pairs
  type: dataset
  url: https://laion.ai/blog/laion-400-open-dataset/
- access:
    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/)
    value: open
  analysis: ''
  created_date:
    explanation: The date the [[blog post]](https://laion.ai/blog/laion-5b/) was released.
      Note that the dataset was already compiled earlier, e.g. the Stable Diffusion
      model released earlier was trained on a subset of LAION-5B
    value: 2022-12-12
  datasheet: https://laion.ai/blog/laion-5b/
  dependencies:
  - CLIP
  - mCLIP
  - CommonCrawl
  description: LAION is a dataset of 5 billion image-text pairs from the Internet
  excluded:
    explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/)
    value: All samples with less than 5 characters alt-text length or less than 5
      KB image size are dropped. All images with the too big resolution, potentially
      DOS bombs, were dropped before attempting to process them. Duplicate removal
      is performed with a bloom filter based on URL. Future runs would include more
      variate deduplication rules, such as URL + language for the multilanguage dataset.
      We use CLIP respectively MCLIP to compute embeddings of the image and alt-text.
      Then we compute the cosine similarity of both embeddings and drop all samples
      with cosine similarity below 0.28 for the English language ( with CLIP B/32)
      and 0.26 for the multilingual dataset (MCLIP). These thresholds were selected
      based on human inspection of the test results. We use the CLIP embeddings of
      images and texts to filter out to the possible extent the illegal content.
  feedback: ''
  included: ''
  intended_uses: The authors recommend using the dataset "for research purposes" and
    "do not recommend using it for creating ready-to-go industrial products, as the
    basic research about general properties and safety of such large-scale models,
    which we would like to encourage with this release, is still in progress"
  license:
    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/)
    value: CC BY 4.0
  modality: image, text
  monitoring: ''
  name: LAION-5B
  nationality: Germany
  organization: LAION
  prohibited_uses: No uses are explicitly prohibited by the license. Users are warned
    from using LAION-5B for non-research purposes.
  quality_control: ''
  sample: []
  size: 5B image-text pairs
  type: dataset
  url: https://laion.ai/blog/laion-5b/
- access:
    explanation: The dataset index is available from [[dataset blog post]](https://laion.ai/blog/laion-5b/)
    value: open
  analysis: ''
  created_date:
    explanation: The date the [[blog post]](https://laion.ai/blog/laion-5b/) was released.
      Note that the dataset was already compiled earlier, e.g. the Stable Diffusion
      model released earlier was trained on a subset of LAION-5B
    value: 2022-12-12
  datasheet: https://laion.ai/blog/laion-5b/
  dependencies:
  - CLIP
  - LAION-5B
  description: LAION-2B-en is a subset of the LAION-5B dataset and contains 2.3 billion
    English image-text pairs.
  excluded:
    explanation: See [[dataset blog post]](https://laion.ai/blog/laion-5b/)
    value: All samples with less than 5 characters alt-text length or less than 5
      KB image size are dropped. All images with the too big resolution, potentially
      DOS bombs, were dropped before attempting to process them. Duplicate removal
      is performed with a bloom filter based on URL. Future runs would include more
      variate deduplication rules, such as URL + language for the multilanguage dataset.
      We use CLIP respectively MCLIP to compute embeddings of the image and alt-text.
      Then we compute the cosine similarity of both embeddings and drop all samples
      with cosine similarity below 0.28 for the English language ( with CLIP B/32)
      and 0.26 for the multilingual dataset (MCLIP). These thresholds were selected
      based on human inspection of the test results. We use the CLIP embeddings of
      images and texts to filter out to the possible extent the illegal content.
  feedback: ''
  included: ''
  intended_uses: The authors recommend using the dataset "for research purposes" and
    "do not recommend using it for creating ready-to-go industrial products, as the
    basic research about general properties and safety of such large-scale models,
    which we would like to encourage with this release, is still in progress"
  license:
    explanation: The license is listed on the [[dataset blog post]](https://laion.ai/blog/laion-5b/)
    value: CC BY 4.0
  modality: image, text
  monitoring: ''
  name: LAION-2B-en
  nationality: Germany
  organization: LAION
  prohibited_uses: No uses are explicitly prohibited by the license. Users are warned
    from using LAION-2B-en for non-research purposes.
  quality_control: ''
  sample: []
  size: 2.32B image-text pairs
  type: dataset
  url: https://arxiv.org/pdf/2210.08402.pdf
- access: open
  analysis: Evaluated on COCO captioning and VQAv2 vision-language tasks.
  created_date: 2023-03-28
  dependencies:
  - LLaMA
  - CLIP
  description: An open-source reproduction of DeepMind's Flamingo model. At its core,
    OpenFlamingo is a framework that enables training and evaluation of large multimodal
    models (LMMs).
  feedback: ''
  intended_uses: academic research purposes
  license: MIT
  modality: image, text; text
  model_card: https://github.com/mlfoundations/open_flamingo/blob/main/MODEL_CARD.md
  monitoring: ''
  name: OpenFlamingo
  nationality: Germany
  organization: LAION
  prohibited_uses: commercial use
  quality_control: ''
  size: 9B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://laion.ai/blog/open-flamingo/


================================================
FILE: assets/laion_ev.yaml
================================================
- access: open
  analysis: Re-LAION-5B aims to fix the issues as reported by Stanford Internet Observatory
    for the original LAION-5B. It is available for download in two versions, research
    and research-safe. In total, 2236 links that potentially led to inappropriate
    content were removed.
  created_date: 2024-08-30
  dependencies:
  - LAION-5B
  description: Re-LAION-5B is an updated version of LAION-5B, the first web-scale,
    text-link to images pair dataset to be thoroughly cleaned of known links to suspected
    CSAM. It is an open dataset for fully reproducible research on language-vision
    learning. This model was developed in response to issues identified by the Stanford
    Internet Observatory in December 2023. The updates were made in collaboration
    with multiple organizations like the Internet Watch Foundation (IWF), the Canadian
    Center for Child Protection (C3P), and Stanford Internet Observatory.
  feedback: Problems with the dataset should be reported to the LAION organization.
    They have open lines for communication with their partners and the broader research
    community.
  intended_uses: Re-LAION-5B is designed for research on language-vision learning.
    It can also be used by third parties to clean existing derivatives of LAION-5B
    by generating diffs and removing all matched content from their versions.
  license: Apache 2.0
  modality: text; image
  model_card: unknown
  monitoring: unknown
  name: Re-LAION-5B
  nationality: Germany
  organization: LAION e.V.
  prohibited_uses: The dataset should not be utilized for purposes that breach legal
    parameters or ethical standards, such as dealing with illegal content.
  quality_control: The model utilized lists of link and image hashes provided by partner
    organizations. These were used to remove inappropriate links from the original
    LAION-5B dataset to create Re-LAION-5B.
  size: 5.5B (text, image) pairs
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://laion

================================================
FILE: assets/latitude.yaml
================================================
- access:
    explanation: 'The game is available to public upon sign up.

      '
    value: limited
  adaptation: ''
  created_date:
    explanation: 'Release date of the game on iOS and Android platforms [[Wikipedia]](https://en.wikipedia.org/wiki/AI_Dungeon).

      '
    value: 2019-12-17
  dependencies:
  - OpenAI API
  description: 'AI Dungeon is a single-player text adventure game that uses AI to
    generate content.

    '
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: '"Subject to your compliance with these Terms, the Company hereby
      grants to you, a personal, worldwide, royalty-free, non-assignable, non-sublicensable,
      non-transferrable, and non-exclusive license to use the software provided to
      you as part of the Services (and to download a single copy of the App onto the
      equipment or device specified by us)" - excerpt from the Terms of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: AI Dungeon
  nationality: USA
  organization: Latitude
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://play.aidungeon.io/main/termsOfService
  type: application
  url: https://play.aidungeon.io
  user_distribution: ''


================================================
FILE: assets/lehigh.yaml
================================================
- access: open
  analysis: outperforms majority of preceding state-of-the-art models over 15 unique
    biomedical modalities.
  created_date: 2023-05-26
  dependencies:
  - GPT-style autoregressive decoder
  - BiomedGPT biomedical datasets
  description: BiomedGPT leverages self-supervision on large and diverse datasets
    to accept multi-modal inputs and perform a range of downstream tasks.
  feedback: ''
  intended_uses: furthering research in developing unified and generalist models for
    biomedicine.
  license: Apache 2.0
  modality: image, text; text
  model_card: ''
  monitoring: ''
  name: BiomedGPT
  nationality: USA
  organization: Lehigh University
  prohibited_uses: ''
  quality_control: No specific quality control is mentioned in model training, though
    details on data processing and how the model was trained are provided in the paper.
  size: 472M parameters (dense)
  training_emissions: unknown
  training_hardware: 10 NVIDIA A5000 GPUs
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2305.17100.pdf


================================================
FILE: assets/lg.yaml
================================================
- access: closed
  analysis: none
  created_date: 2023-07-19
  dependencies: []
  description: EXAONE 2.0 is a multimodal artificial intelligence that can be used
    to help develop new materials and medicines.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: image, text; image, text
  model_card: none
  monitoring: ''
  name: EXAONE 2.0
  nationality: South Korea
  organization: LG AI Research
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.lgresearch.ai/exaone


================================================
FILE: assets/lg_ai_research.yaml
================================================
- access: open
  analysis: The model was evaluated extensively across a wide range of public and
    in-house benchmarks. The comparative analysis showed that the performance of EXAONE
    3.0 was competitive in English and excellent in Korean compared to other large
    language models of a similar size.
  created_date: 2024-09-08
  dependencies:
  - MeCab
  description: EXAONE 3.0 is an instruction-tuned large language model developed by
    LG AI Research. It demonstrates notably robust performance across a range of tasks
    and benchmarks. It has been fine-tuned to be capable of complex reasoning and
    has a particular proficiency in Korean. The released 7.8B parameter model is designed
    to promote open research and innovation.
  feedback: Unknown
  intended_uses: The model was intended for non-commercial and research purposes.
    The capabilities of the model allow for use cases that involve advanced AI and
    language processing tasks, particularly in fields requiring proficiency in English
    and Korean.
  license: Unknown
  modality: text; text
  model_card: unknown
  monitoring: Unknown
  name: EXAONE 3.0 Instruction Tuned Language Model
  nationality: South Korea
  organization: LG AI Research
  prohibited_uses: Commercial use is not intended for this model. Its intended use
    is for non-commercial research and innovation.
  quality_control: Extensive pre-training on a diverse dataset, and advanced post-training
    techniques were employed to enhance instruction-following capabilities. The model
    was also trained to fully comply with data handling standards.
  size: 7.8B parameters (dense)
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://arxiv.org/pdf/2408.03541


================================================
FILE: assets/linkedin.yaml
================================================
- access: open
  adaptation: ''
  created_date: unknown
  dependencies:
  - Azure Cognitive Services for Vision
  description: More than 40 percent of LinkedIn's feed posts include at least one
    image. We want every member to have equal access to opportunity and are committed
    to ensuring that we make images accessible to our members who are blind or who
    have low vision so they can be a part of the online conversation. With Azure Cognitive
    Service for Vision, we can provide auto-captioning to edit and support alt. text
    descriptions.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: LinkedIn
  nationality: USA
  organization: LinkedIn
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://www.linkedin.com/
  user_distribution: ''


================================================
FILE: assets/llm360.yaml
================================================
- access: open
  analysis: Evaluated on several benchmark LLM tasks
  created_date: 2023-12-12
  dependencies:
  - Arxiv
  - Books
  - C4
  - RefinedWeb
  - StarCoder
  - StackExchange
  - Wikipedia
  description: Amber is the first model in the LLM360 family, an initiative for comprehensive
    and fully open-sourced LLMs, where all training details, model checkpoints, intermediate
    results, and additional analyses are made available to the community.
  feedback: https://huggingface.co/LLM360/Amber/discussions
  intended_uses: to support open and collaborative AI research by making the full
    LLM training process transparent.
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/LLM360/Amber
  monitoring: unknown
  name: Amber
  nationality: International
  organization: LLM360
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: 56 DGX A100 nodes, each equipped with 4 80GB A100 GPUs
  training_time: unknown
  type: model
  url: https://www.llm360.ai/
- access: open
  analysis: Evaluated on English and coding tasks and benchmarks, and outperforms
    LLaMA 2 in some.
  created_date: 2023-12-12
  dependencies:
  - SlimPajama
  - StarCoder
  description: CrystalCoder is a language model with a balance of code and text data
    that follows the initiative under LLM360 of its training process being fully transparent.
  feedback: https://huggingface.co/LLM360/CrystalCoder/discussions
  intended_uses: to support open and collaborative AI research by making the full
    LLM training process transparent.
  license: Apache 2.0
  modality: text; code, text
  model_card: https://huggingface.co/LLM360/CrystalCoder
  monitoring: unknown
  name: CrystalCoder
  nationality: International
  organization: LLM360
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: Trained on the Cerebras Condor Galaxy 1 (CG-1), a 4 exaFLOPS,
    54 million core, 64-node cloud AI supercomputer.
  training_time: unknown
  type: model
  url: https://www.llm360.ai/
- access: open
  analysis: Evaluated on the LLM360 Performance and Evaluation Collection that checks
    standard best practice benchmarks, medical, math, and coding knowledge.
  created_date: 2024-05-29
  dependencies: []
  description: K2 is a 65 billion parameter large language model inspired by the Llama
    2 65B model. The model is also supported with a suite of research tools, tutorials
    and step-by-step guides for learning pre-training and fine-tuning techniques.
  feedback: https://huggingface.co/LLM360/K2/discussions
  intended_uses: The model is intended for learning pre-training techniques or enhancing
    research capabilities in large language models.
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/LLM360/K2
  monitoring: unknown
  name: K2
  nationality: International
  organization: LLM360
  prohibited_uses: unknown
  quality_control: unknown
  size: 65B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.llm360.ai/paper2.pdf


================================================
FILE: assets/lmsys.yaml
================================================
- access: open
  analysis: Evaluated against similar LLMs using GPT-4 as a judge.
  created_date: 2023-03-30
  dependencies:
  - LLaMA
  - ShareGPT conversations data
  description: An open-source chatbot trained by fine-tuning LLaMA on user-shared
    conversations collected from ShareGPT.
  feedback: https://huggingface.co/datasets/bigcode/the-stack/discussions
  intended_uses: research on LLMs and chatbots
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/lmsys/vicuna-13b-delta-v0
  monitoring: ''
  name: Vicuna
  nationality: USA
  organization: LMSYS
  prohibited_uses: ''
  quality_control: ''
  size: 13B parameters (dense)
  training_emissions: ''
  training_hardware: 8 A100 GPUs
  training_time: 1 day
  type: model
  url: https://lmsys.org/blog/2023-03-30-vicuna/


================================================
FILE: assets/mathai.yaml
================================================
- access: open
  analysis: Mistral model fine-tuned on AutoMathText and evaluated on the MATH dataset.
  created_date: 2024-02-12
  datasheet: https://huggingface.co/datasets/math-ai/AutoMathText
  dependencies:
  - OpenWebMath
  - RedPajama-Data
  - Algebraic Stack
  - Qwen
  description: AutoMathText is an extensive and carefully curated dataset encompassing
    around 200 GB of mathematical texts.
  excluded: ''
  feedback: https://huggingface.co/datasets/math-ai/AutoMathText/discussions
  included: ''
  intended_uses: ''
  license: CC BY-SA 4.0
  modality: text
  monitoring: unknown
  name: AutoMathText
  nationality: USA
  organization: Math AI
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 200 GB
  type: dataset
  url: https://github.com/yifanzhang-pro/AutoMathText


================================================
FILE: assets/maya.yaml
================================================
- access: open
  analysis: Evaluated on the OpenLLM leaderboard, releasing at rank number 4 on the
    leaderboard.
  created_date: 2023-08-11
  dependencies:
  - LLaMA 2
  - Guanaco LLaMA dataset
  description: GodziLLa 2 is an experimental combination of various proprietary LoRAs
    from Maya Philippines and Guanaco LLaMA 2 1K dataset, with LLaMA 2.
  feedback: none
  intended_uses: ''
  license: LLaMA 2
  modality: text; text
  model_card: https://huggingface.co/MayaPH/GodziLLa2-70B
  monitoring: unknown
  name: GodziLLa 2
  nationality: Philippines
  organization: Maya Philippines
  prohibited_uses: ''
  quality_control: ''
  size: 70B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/MayaPH/GodziLLa2-70B
- access:
    explanation: The first 200 million tokens are free.
    value: limited
  analysis: We evaluated voyage-code-3 using an enhanced suite of evaluation datasets
    designed to address the shortcomings of existing benchmarks and deliver practical,
    robust results.
  created_date: 2024-12-04
  dependencies: []
  description: Introducing voyage-code-3, our next-generation embedding model optimized
    for code retrieval.
  feedback: "If you\u2019re also interested in fine-tuned embedding models, we\u2019\
    d love to hear from you\u2014please email us at contact@voyageai.com."
  intended_uses: optimized for code retrieval
  license: unknown
  modality: unknown
  model_card: unknown
  monitoring: unknown
  name: voyage-code-3
  nationality: USA
  organization: Voyage AI
  prohibited_uses: unknown
  quality_control: unknown
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://blog.voyageai.com/2024/12/04/voyage-code-3/


================================================
FILE: assets/meta.yaml
================================================
- access:
    explanation: Models are available for download from [[GitHub repository]](https://github.com/facebookresearch/esm#available-models)
    value: open
  analysis: ''
  created_date:
    explanation: The date the [[model paper]](https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html)
      was released
    value: 2022-10-31
  dependencies:
  - UniRef50
  - UniRef90
  description: ESM-2 is a series of protein language models trained on protein sequences
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'The license is provided in the [[Github repository]](https://github.com/facebookresearch/esm#available-models)

      '
    value: MIT
  modality: text; protein sequence
  model_card: none
  monitoring: ''
  name: ESM-2
  nationality: USA
  organization: Meta
  prohibited_uses: ''
  quality_control: ''
  size: 15B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html
- access: closed
  analysis: none
  created_date:
    explanation: The date the model paper was released
    value: 2021-12-08
  datasheet: none
  dependencies:
  - COCO
  - YFCC100M
  - SBU Captions
  - Localized Narratives
  - Visual Genome
  - Wikipedia
  - Conceptual Captions
  - Red Caps
  description: PMD (Public Multimodal Datasets) is a collection of image-text datasets
    introduced in the FLAVA work.
  excluded: YFCC100M is filtered for non-English captions and very short (< 2 word)
    captions.
  feedback: none
  included: none
  intended_uses: unknown
  license: unknown
  modality: image, text
  monitoring: none
  name: PMD
  nationality: USA
  organization: Meta
  prohibited_uses: unknown
  quality_control: Beyond filtering mentioned in excluded, nothing further is done.
  sample: []
  size: 70M
  type: dataset
  url: https://arxiv.org/abs/2112.04482
- access:
    explanation: 'Model checkpoints are available for download from the [[HuggingFace
      repository]](https://huggingface.co/facebook/flava-full)

      '
    value: open
  analysis: FLAVA is benchmarked on a range of vision-only (e.g. CIFAR-10), language-only
    (e.g. GLUE), and multimodal (e.g. Hateful Memes) standard evaluations.
  created_date:
    explanation: The date the model paper was released
    value: 2021-12-08
  dependencies:
  - PMD
  description: FLAVA is a multimodal model composed of an image encoder, text encoder,
    and multimodal encoder.
  feedback: https://huggingface.co/facebook/flava-full/discussions
  intended_uses: 'Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full),
    "The model is intended to serve as a reproducible research artifact for research
    communities in the light of models whose exact reproduction details are never
    released such as CLIP and SimVLM."

    '
  license:
    explanation: 'The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full)

      '
    value: BSD-3-Clause
  modality: image, text
  model_card: https://huggingface.co/facebook/flava-full
  monitoring: none
  name: FLAVA
  nationality: USA
  organization: Meta
  prohibited_uses: 'Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full),
    "Any deployed use case of the model - whether commercial or not" - is currently
    out of scope.

    '
  quality_control: FLAVA introduces a variety of new modeling techniques, specifically
    with an interest in improved text-image alignment through contrastive objectives.
  size:
    explanation: '110M (Language encoder) + 86M (Vision encoder) + 110M (mul encoder)

      '
    value: 306M
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/abs/2112.04482
- access: closed
  analysis: none
  created_date:
    explanation: 'The date the Galactica paper was released

      '
    value: 2022-11-15
  datasheet: none
  dependencies:
  - CommonCrawl
  - Wikipedia
  - arXiv
  description: The Galactica Corpus is a collection of scientific datasets introduced
    in the Galactica work.
  excluded: ''
  feedback: none
  included: Prompts and reasoning data is explicitly included to improve model capabilities
    derived from this data.
  intended_uses: unknown
  license: unknown
  modality: text
  monitoring: none
  name: The Galactica Corpus
  nationality: USA
  organization: Meta
  prohibited_uses: unknown
  quality_control: ''
  sample: []
  size: 106B tokens
  type: dataset
  url: https://galactica.org/static/paper.pdf
- access:
    explanation: Model checkpoints freely available at https://github.com/paperswithcode/galai
    value: open
  analysis: ''
  created_date:
    explanation: 'The date the Galactica paper was released

      '
    value: 2022-11-15
  dependencies:
  - The Galactica Corpus
  description: Galactica is a family of autoregressive language models.
  feedback: ''
  intended_uses: ''
  license:
    explanation: https://github.com/paperswithcode/galai/blob/main/LICENSE-MODEL.md
    value: CC BY-NC 4.0
  modality: code, text; code, text
  model_card: https://huggingface.co/facebook/galactica-6.7b
  monitoring: ''
  name: Galactica
  nationality: USA
  organization: Meta
  prohibited_uses: ''
  quality_control: ''
  size: 120B parameters (dense)
  training_emissions: unknown
  training_hardware: Meta AI Cluster. Trained on 1024 80GB A100 GPUs (128 8xA100 80GB
    nodes)
  training_time: unknown
  type: model
  url: https://galactica.org/static/paper.pdf
- access:
    explanation: Model weights are available via the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B)
    value: open
  analysis: none
  created_date:
    explanation: The date the model paper was released
    value: 2022-04-12
  dependencies: []
  description: InCoder is a language model trained on code with a causal masking objective
  feedback: ''
  intended_uses: ''
  license:
    explanation: The license is provided in the [[HuggingFace repository]](https://huggingface.co/facebook/incoder-6B?text=My+name+is+Lewis+and+I+like+to)
    value: CC BY-NC 4.0
  modality: text; code
  model_card: none
  monitoring: ''
  name: InCoder
  nationality: unknown
  organization: Meta, CMU, TTI-Chicago, UC Berkeley, University of Washington
  prohibited_uses: ''
  quality_control: unknown
  size: 6B parameters (dense)
  training_emissions: Unknown
  training_hardware: 248 V100 GPUs, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf)
  training_time: 24 days, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf)
  type: model
  url: https://arxiv.org/abs/2204.05999
- access:
    explanation: The 175B model requires manual approval from Meta to access. Other
      models are available through HuggingFace.
    value: limited
  analysis: ''
  created_date:
    explanation: 'The date the OPT paper was submitted to Arxiv

      '
    value: 2022-05-01
  dependencies:
  - RoBERTa dataset
  - The Pile
  - PushShift.io Reddit
  description: OPT is a family of autoregressive language models.
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'All released with the [[OPT-175B License]](https://github.com/facebookresearch/metaseq/blob/main/projects/OPT/MODEL_LICENSE.md),
      except 66B (TBD) and 17B (requires manual approval)

      '
    value: OPT-175B License
  modality: text; text
  model_card: https://arxiv.org/pdf/2205.01068.pdf
  monitoring: ''
  name: OPT
  nationality: USA
  organization: Meta
  prohibited_uses: ''
  quality_control: ''
  size: 175B parameters (dense)
  training_emissions:
    explanation: 'Estimate by authors for the OPT-175B model only. Not including ablations
      and baselines.

      '
    value: 75 tCO2e
  training_hardware: Meta AI cluster. Trained on 992 80GB A100 GPUs
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2205.01068
- access:
    explanation: 'The datasets involved are public, but the full dataset is not directly
      available, nor are filtering scripts.

      '
    value: limited
  analysis: ''
  created_date:
    explanation: 'The date that Make-A-Video was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2209.14792).

      '
    value: 2022-09-29
  datasheet: none
  dependencies:
  - LAION-5B
  - WebVid-10M
  - HD-VILA-100M
  description: 'The Make-A-Video dataset is the dataset used to train Make-A-Video,
    which includes both image-text and video-only datasets with specific and significant
    filtering.

    '
  excluded: 'The LAION-5B dataset is filtered to 2.3B by removing NSFW images using
    [https://github.com/GantMan/nsfw](https://github.com/GantMan/nsfw), toxic words
    in text, and images with watermark probability > 0.5. The HD-VILA-100M is randomly
    subsampled to 10M video clips.

    '
  feedback:
    explanation: 'No feedback mechanism is mentioned by the authors.

      '
    value: none
  included:
    explanation: 'Data from the three underlying datasets is filtered, but nothing
      is included beyond this.

      '
    value: none
  intended_uses: unknown
  license:
    explanation: 'No license was found, though the underlying datasets are public
      and have licenses.

      '
    value: none
  modality: image, text, video
  monitoring:
    explanation: 'There is no information on how Meta is internally monitoring the
      use of the dataset.

      '
    value: unknown
  name: Make-A-Video dataset
  nationality: USA
  organization: Meta
  prohibited_uses: unknown
  quality_control: 'The authors exclude NSFW, toxic, and likely watermarked data from
    LAION-5B.

    '
  sample: []
  size: 20M video clips, 2.3B image-text pairs
  type: dataset
  url: https://arxiv.org/pdf/2209.14792.pdf
- access:
    explanation: 'The model has not been released; a form existed to potentially acquire
      access but is now closed as of 2022-12-07 [[Access Form]](https://docs.google.com/forms/u/0/d/e/1FAIpQLSfMjC57wcXWUDV0UbS2Tn6VhjLEiCXaHvWZuWgWRa-Zx8-Few/closedform).

      '
    value: closed
  analysis: 'Model performance was evaluated using automated (Frechet Video Distance;
    Frechet Inception Distance) and human evaluation on two datasets (UCF-101, MSR-VTT)
    in the zero-shot setting.

    '
  created_date:
    explanation: 'The date that Make-A-Video was posted to arXiv [[arXiv]] (https://arxiv.org/abs/2209.14792).

      '
    value: 2022-09-29
  dependencies:
  - Make-A-Video dataset
  description: 'Make-A-Video is a model for Text-to-Video Generation without Text-Video
    Data.

    '
  feedback: none
  intended_uses: unknown
  license: none
  modality: text; video
  model_card: none
  monitoring: unknown
  name: Make-A-Video
  nationality: USA
  organization: Meta
  prohibited_uses: unknown
  quality_control:
    explanation: 'Authors do not report specific quality control steps taken in modeling,
      though filtering is done in producing the Make-A-Video dataset.

      '
    value: none
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2209.14792.pdf
- access: open
  analysis: ''
  created_date: 2023-02-24
  dependencies:
  - CommonCrawl
  - C4
  - Github
  - Wikipedia
  - BooksCorpus
  - arXiv
  - StackExchange
  description: LLaMA is a collection of foundation language models ranging from 7B
    to 65B parameters trained our on trillions of tokens. The LLaMA models show that
    it is possible to train state-of-the-art models using publicly available datasets
    exclusively, without resorting to proprietary and inaccessible datasets.
  feedback: ''
  intended_uses: ''
  license: LLaMa License (model weights), GPLv3 (code)
  modality: text; text
  model_card: ''
  monitoring: ''
  name: LLaMA
  nationality: USA
  organization: Meta
  prohibited_uses: ''
  quality_control: ''
  size: 65B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2302.13971
- access: open
  analysis: Evaluated on standard academic benchmarks and internal Meta libraries.
  created_date: 2023-07-18
  dependencies: []
  description: Llama 2 is an updated version of LLaMA trained on a new mix of publicly
    available data.
  feedback: ''
  intended_uses: Llama 2 is intended for commercial and research use in English. Tuned
    models are intended for assistant-like chat, whereas pretrained models can be
    adapted for a variety of natural language generation tasks.
  license:
    explanation: The license can be found at https://ai.meta.com/resources/models-and-libraries/llama-downloads/
    value: custom
  modality: text; text
  model_card: Can be found at appendix of paper at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/
  monitoring: ''
  name: Llama 2
  nationality: USA
  organization: Meta
  prohibited_uses: Use in any manner that violates applicable laws or regulations
    (including trade compliance laws). Use in languages other than English. Use in
    any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement
    for Llama 2.
  quality_control: ''
  size: 70B parameters (dense)
  training_emissions: 539 tCO2eq
  training_hardware: NVIDIA A100-80GB GPUs (TDP of 350-400W)
  training_time: ''
  type: model
  url: https://ai.meta.com/resources/models-and-libraries/llama/
- access: open
  analysis: ''
  created_date: 2022-12-22
  dependencies:
  - OPT
  - OPT-IML Bench
  description: ''
  feedback: ''
  intended_uses: ''
  license: OPT-IML 175B License
  modality: text; text
  model_card: ''
  monitoring: ''
  name: OPT-IML
  nationality: USA
  organization: Meta
  prohibited_uses: ''
  quality_control: ''
  size: 175B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2212.12017
- access:
    explanation: 'The full dataset can be downloaded at [[SA-1B Download]](https://ai.facebook.com/datasets/segment-anything-downloads/).
      A 50k image preview of the full dataset is available [[here]](https://segment-anything.com/dataset/index.html).

      '
    value: open
  analysis: ''
  created_date:
    explanation: The date the [[Meta blog post]](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/)
      was released.
    value: 2023-04-05
  datasheet:
    explanation: Datasheet can be found in the Appendix section of the Segment Anything
      paper.
    value: https://arxiv.org/pdf/2304.02643.pdf#page=25
  dependencies: []
  description: 'SA-1B (Segment Anything 1 Billion) is a dataset designed for training
    general-purpose object segmentation models from open world images. It consists
    of 11M diverse, high-resolution, privacy protecting images and 1.1B high-quality
    segmentation masks.

    '
  excluded:
    explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25)
    value: '"We withheld ~2k randomly selected images for testing purposes."  "Each
      image is accompanied by a short caption that describes the content and place
      of the photo in a free form text. Per our agreement with the photo provider
      we are not allowed to release these captions."

      '
  feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/)
    or by emailing at segment-anything at meta.com.
  included:
    explanation: According to section [[Segment Anything Dataset]](https://arxiv.org/pdf/2304.02643.pdf#section.5)
      of the paper and [[SA-1B website]](https://ai.facebook.com/datasets/segment-anything/).
    value: "SA-1B consists of 11M diverse, high-resolution (averaging 1500\xD72250\
      \ pixels), and privacy protecting images collected and licensed from a third\
      \ party photo company. The images are photos taken from a camera, i.e. not artwork.\
      \ The images vary in subject matter. Common themes of the images include: locations,\
      \ objects, scenes. The dataset includes 1.1B high-quality segmentation masks\
      \ collected with the Segment Anything Data Engine. SA-1B only includes automatically\
      \ generated masks (99.1%), as the authors conclude after experiments that the\
      \ automatic masks are high quality and effective for training models. The masks\
      \ range from large scale objects such as buildings to fine grained details such\
      \ as door handles. Masks are provided in the COCO run-length encoding (RLE)\
      \ annotation format.\n"
  intended_uses:
    explanation: See [[SA-1B website]](https://ai.facebook.com/datasets/segment-anything/)
    value: SA-1B is intended to be used for research purposes only. It allows access
      to a privacy protecting and copyright friendly large-scale image dataset. Researchers
      can use it to train and evaluate generic object segmentation models.
  license:
    explanation: SA-1B is released under a favorable license agreement for certain
      research uses and with protections for researchers. See [[SA-1B Dataset Research
      License]](https://ai.facebook.com/datasets/segment-anything-downloads/).
    value: SA-1B Dataset Research License
  modality: image
  monitoring:
    explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25)
    value: 'The dataset will be hosted at https://ai.facebook.com/datasets/segment-anything
      and maintained by Meta AI. "If a user observes objectionable image(s) in the
      dataset, we invite them to report the image(s) at segment-anything at meta.com
      for removal" "To aid reproducibility of research using SA-1B, the only updates
      (to the dataset) will be to remove reported images." "We encourage users to
      gather further annotations for SA-1B. Any users who generate annotations will
      be liable for hosting and distributing their annotations."

      '
  name: SA-1B
  nationality: USA
  organization: Meta
  prohibited_uses:
    explanation: See [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25)
    value: "Authors note the following limitations of the dataset:\n  The masks are\
      \ generated by a segmentation model, so there may be errors\nor inconsistencies\
      \ in the masks.\n  While no two images are the same, there are instances of\
      \ images of the same\nsubject taken close together in time.\n  The dataset contains\
      \ scenes of protests, or other gatherings that may suggest\nreligious beliefs,\
      \ political opinions or union memberships that may be offensive.\n"
  quality_control:
    explanation: According to sections [[Segment Anything Dataset]](https://arxiv.org/pdf/2304.02643.pdf#section.5)
      and [[Datasheet]](https://arxiv.org/pdf/2304.02643.pdf#page=25) of the paper.
    value: "- Dataset quality:\n  Due to potential accessibility and storage challenges,\
      \ the original high-resolution images (averaging 3300\xD74950 pixels) were downsampled\
      \ to an average resolution of 1500\xD72250 pixels. Authors note that despite\
      \ the downsampling, the images remain significantly higher in resolution than\
      \ those in many existing vision datasets, such as COCO, where images are typically\
      \ around 480\xD7640 pixels.\n  The images were processed to blur faces and license\
      \ plates to protect the identities of those in the image.\n  To estimate the\
      \ quality of the masks in the images, a random sample of 500 images (\u223C\
      50k masks) was taken and professional annotators were asked to improve the quality\
      \ of all masks in those images.\n- Safety measures:\n  Authors implemented two\
      \ safety measures to prevent objectionable content:\n    (1) Photos are licensed\
      \ from a photo provider and had to meet the terms of service of the photo provider.\
      \ Authors requested that all objectionable content be filtered from the images\
      \ they licensed.\n    (2) Users who observe objectionable images in the dataset\
      \ are invited to report them for removal at segment-anything@meta.com.\n  Despite\
      \ these measures, they observed that a small portion of images contain scenes\
      \ of protests or other gatherings that focus on a diverse spectrum of religious\
      \ beliefs or political opinions that may be considered offensive. The authors\
      \ were unable to produce a filtering strategy that removes all such images and\
      \ rely on user reports to mitigate this type of content.\n"
  sample: []
  size: 11M images, 1.1B mask annotations
  type: dataset
  url: https://ai.facebook.com/datasets/segment-anything/
- access:
    explanation: 'Inference code and model checkpoints are available on the model''s
      [[GitHub repository]](https://github.com/facebookresearch/segment-anything).
      Its training dataset SA-1B can be used for research purposes and is available
      for download [here](https://ai.facebook.com/datasets/segment-anything-downloads/).

      '
    value: open
  analysis:
    explanation: See [[Zero-Shot Transfer Experiments]](https://arxiv.org/pdf/2304.02643.pdf#section.7)
      for more details.
    value: '"We extensively evaluate SAM. First, using a diverse new suite of 23 segmentation
      datasets, we find that SAM produces high-quality masks from a single foreground
      point, often only slightly below that of the manually annotated ground truth.
      Second, we find consistently strong quantitative and qualitative results on
      a variety of downstream tasks under a zero-shot transfer protocol using prompt
      engineering, including edge detection, object proposal generation, instance
      segmentation, and a preliminary exploration of text-to-mask prediction."

      '
  created_date:
    explanation: The date the [[Meta blog post]](https://ai.facebook.com/blog/segment-anything-foundation-model-image-segmentation/)
      was released.
    value: 2023-04-05
  dependencies:
  - SA-1B
  description: SAM (Segment Anything Model) is a foundation model for image segmentation.
    The model is designed and trained to be promptable, and supports flexible prompts
    (point, box, mask and free-form text) to compute masks in real-time to allow interactive
    use.
  feedback: Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/)
    or by emailing at segment-anything at meta.com.
  intended_uses:
    explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
    value: '"SAM is intended to be used for any prompt-based segmentation task. We
      explored its use in segmenting objects from a point, edge detection, segmenting
      all objects, and segmenting detected objects. We explored how SAM can integrate
      with other vision models to segment objects from text."

      '
  license:
    explanation: See [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE)
    value: Apache 2.0
  modality: image, text; image
  model_card:
    explanation: Model card can be found in the Appendix section of the paper.
    value: https://arxiv.org/pdf/2304.02643.pdf#page=28
  monitoring: ''
  name: SAM
  nationality: USA
  organization: Meta
  prohibited_uses:
    explanation: See [[Discussion]](https://arxiv.org/pdf/2304.02643.pdf#section.8)
    value: "For out-of-scope use cases see terms of use in [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE).\
      \ Authors also discuss the following limitations of the model: \"While SAM performs\
      \ well in general, it is not perfect. It can miss fine structures, hallucinates\
      \ small disconnected components at times, and does not produce boundaries as\
      \ crisply as more computationally intensive methods that \u201Czoom-in\u201D\
      , e.g. [18]. In general, we expect dedicated interactive segmentation methods\
      \ to outperform SAM when many points are provided, e.g. [67]. Unlike these methods,\
      \ SAM is designed for generality and breadth of use rather than high IoU interactive\
      \ segmentation. Moreover, SAM can process prompts in real-time, but nevertheless\
      \ SAM's overall performance is not real-time when using a heavy image encoder.\
      \ Our foray into the text-to-mask task is exploratory and not entirely robust,\
      \ although we believe it can be improved with more effort. While SAM can perform\
      \ many tasks, it is unclear how to design simple prompts that implement semantic\
      \ and panoptic segmentation. Finally, there are domain-specific tools, such\
      \ as [7], that we expect to outperform SAM in their respective domains.\"\n"
  quality_control:
    explanation: See [[Segment Anything RAI Analysis]](https://arxiv.org/pdf/2304.02643.pdf#section.6)
      for more details.
    value: '"We perform a Responsible AI (RAI) analysis of our work by investigating
      potential fairness concerns and biases when using SA-1B and SAM. We focus on
      the geographic and income distribution of SA-1B and fairness of SAM across protected
      attributes of people."

      '
  size: unknown
  training_emissions:
    explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
    value: 2.8 metric tons of carbon dioxide
  training_hardware:
    explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
    value: 256 A100 GPUs
  training_time:
    explanation: See [[Model card]](https://arxiv.org/pdf/2304.02643.pdf#page=28)
    value: 68 hours
  type: model
  url: https://arxiv.org/pdf/2304.02643.pdf
- access: closed
  analysis: Evaluated on zero-shot text-to-speech benchmarks, with Voicebox outperforming
    the current state-of-the-art English model VALL-E.
  created_date: 2023-06-16
  dependencies: []
  description: Voicebox is the first generative AI model for speech to generalize
    across tasks with state-of-the-art performance.
  feedback: ''
  intended_uses: ''
  license: ''
  modality: audio; text
  model_card: ''
  monitoring: ''
  name: Voicebox
  nationality: USA
  organization: Meta
  prohibited_uses: ''
  quality_control: ''
  size: 330M parameters (dense)
  training_emissions: unknown
  training_hardware: 32 GPUs of unspecified type
  training_time: 750,000 iterations
  type: model
  url: https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/
- access: open
  analysis: PEER is evaluated on core research questions intended to gauge language
    understanding, proper use of citations, instruction following, and iterative use.
  created_date: 2022-08-24
  dependencies: []
  description: PEER is a collaborative language model that is trained to imitate the
    entire writing process itself. PEER can write drafts, add suggestions, propose
    edits and provide explanations for its actions.
  feedback: ''
  intended_uses: adapting LLMs to work with collaborative writing and updating.
  license: ''
  modality: text; text
  model_card: ''
  monitoring: ''
  name: PEER
  nationality: USA
  organization: Meta
  prohibited_uses: ''
  quality_control: Heuristics and edit filtering was used on data set, which consisted
    mostly of Wikipedia pages.
  size: 3B parameters (dense)
  training_emissions: ''
  training_hardware: 64 GPUs
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2208.11663.pdf
- access: open
  analysis: MusicGen was evaluated on standard music benchmarks of Frechet Audio Distance,
    Kullback-Leibler Divergence, and its CLAP score.
  created_date: 2023-08-02
  dependencies:
  - Meta Music Initative Sound Collection
  - Shutterstock music collection
  - Pond5 music collection
  description: MusicGen is a simple and controllable model for music generation that
    doesn't require self-supervised semantic representation
  feedback: https://huggingface.co/spaces/facebook/MusicGen/discussions
  intended_uses: The primary use of MusicGen is research on AI-based music generation
  license: MIT
  modality:
    explanation: text; audio
    value: audio, text; audio, text
  model_card: https://github.com/facebookresearch/audiocraft/blob/main/model_cards/MUSICGEN_MODEL_CARD.md
  monitoring: ''
  name: MusicGen
  nationality: USA
  organization: Meta
  prohibited_uses: The model should not be used on downstream applications without
    further risk evaluation and mitigation. The model should not be used to intentionally
    create or disseminate music pieces that create hostile or alienating environments
    for people. This includes generating music that people would foreseeably find
    disturbing, distressing, or offensive; or content that propagates historical or
    current stereotypes.
  quality_control: ''
  size: 3.3B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://huggingface.co/spaces/facebook/MusicGen/tree/main
- access: open
  analysis: Evaluated on Frechet Audio Distance and Kullback-Leibler Divergence as
    well as qualitative studies with human participants.
  created_date: 2023-08-02
  dependencies:
  - AudioSet
  - BBC sound effects
  - AudioCaps
  - Clotho v2
  - VGG-Sound
  - FSD50K
  - Free To Use Sounds
  - Sonniss Game Effects
  - WeSoundEffects
  - Paramount Motion - Odeon Cinematic Sound Effects
  description: AudioGen is an auto-regressive generative model that generates audio
    samples conditioned on text inputs
  feedback: https://huggingface.co/facebook/audiogen-medium/discussions
  intended_uses: The primary use of AudioGen is research on AI-based audio generation.
  license: MIT
  modality:
    explanation: text; audio
    value: audio, text; audio, text
  model_card: https://github.com/facebookresearch/audiocraft/blob/main/model_cards/AUDIOGEN_MODEL_CARD.md
  monitoring: ''
  name: AudioGen
  nationality: USA
  organization: Meta
  prohibited_uses: The model should not be used on downstream applications without
    further risk evaluation and mitigation. The model should not be used to intentionally
    create or disseminate audio pieces that create hostile or alienating environments
    for people. This includes generating audio that people would foreseeably find
    disturbing, distressing, or offensive; or content that propagates historical or
    current stereotypes.
  quality_control: ''
  size: 1.5B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://felixkreuk.github.io/audiogen/paper.pdf
- access: closed
  analysis: Emu significantly outperforms a publicly available state-of-the-art model
    SDXLv1.0 on visual appeal when compared on standard benchmarks.
  created_date: 2023-09-27
  dependencies:
  - CLIP
  - T5
  description: Emu is a pre-trained latent diffusion model on 1.1 billion image-text
    pairs and fine-tuned with only a few thousand carefully selected high-quality
    images.
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; image
  model_card: none
  monitoring: ''
  name: Emu
  nationality: USA
  organization: Meta
  prohibited_uses: ''
  quality_control: ''
  size: 1.5B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://ai.meta.com/research/publications/emu-enhancing-image-generation-models-using-photogenic-needles-in-a-haystack/
- access: open
  analysis: Evaluated on several code benchmarks like HumanEval and MBPP.
  created_date: 2023-08-24
  dependencies:
  - Llama 2
  description: Code Llama is a collection of pretrained and fine-tuned generative
    text models ranging in scale from 7 billion to 34 billion parameters.
  feedback: https://huggingface.co/allenai/codetulu-2-13b/discussions
  intended_uses: Code Llama and its variants is intended for commercial and research
    use in English and relevant programming languages.
  license: Llama 2
  modality: text; code, text
  model_card: https://huggingface.co/codellama/CodeLlama-34b-hf
  monitoring: ''
  name: Code LLaMA
  nationality: USA
  organization: Meta
  prohibited_uses: Use in any manner that violates applicable laws or regulations
    (including trade compliance laws). Use in languages other than English. Use in
    any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement
    for Code Llama and its variants.
  quality_control: ''
  size: 34B parameters (dense)
  training_emissions: 65.3 tCO2eq
  training_hardware: A100-80GB GPUs
  training_time: 400K GPU hours
  type: model
  url: https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/
- access: closed
  analysis: Analyzed against nearest neighbor model baseline and by extending the
    video length.
  created_date: 2023-11-16
  dependencies:
  - Emu
  - CLIP
  - T5
  description: Emu Video is a text-to-video generation model that factorizes the generation
    into two steps, first generating an image conditioned on the text, and then generating
    a video conditioned on the text and the generated image.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; video
  model_card: none
  monitoring: ''
  name: Emu Video
  nationality: USA
  organization: Meta
  prohibited_uses: ''
  quality_control: ''
  size: 6B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://emu-video.metademolab.com/
- access: closed
  analysis: Evaluated on test set of actions in comparison to SoTA image editing models.
  created_date: 2023-11-16
  dependencies:
  - Emu
  - CLIP
  - T5
  description: Emu Edit is a multi-task image editing model which sets state-of-the-art
    results in instruction-based image editing.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; image
  model_card: none
  monitoring: ''
  name: Emu Edit
  nationality: USA
  organization: Meta
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://emu-edit.metademolab.com/
- access: open
  analysis: Evaluated in comparison to CLIP.
  created_date: 2023-10-02
  dependencies:
  - Common Crawl
  description: MetaCLIP is a more transparent rendition of CLIP that aims to reveal
    CLIP's training data curation methods.
  feedback: none
  intended_uses: ''
  license: CC-BY-NC-4.0
  modality: text; text
  model_card: https://huggingface.co/facebook/metaclip-b32-400m
  monitoring: none
  name: MetaCLIP
  nationality: USA
  organization: Meta
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2103.00020.pdf
- access: open
  analysis: The models were evaluated based on their performance on standard benchmarks
    and real-world scenarios. These evaluations were performed using a high-quality
    human evaluation set containing 1,800 prompts covering multiple use cases. The
    models also went through red-teaming for safety, where human experts and automated
    methods were used to generate adversarial prompts to test for problematic responses.
  created_date: 2024-04-18
  dependencies: []
  description: Llama 3 is the third generation of Meta AI's open-source large language
    model. It comes with pretrained and instruction-fine-tuned language models with
    8B and 70B parameters that can support a broad range of use cases.
  feedback: Feedback is encouraged from users to improve the model, but the feedback
    mechanism is not explicitly described.
  intended_uses: Llama 3 is intended for a broad range of use cases, including AI
    assistance, content creation, learning, and analysis.
  license:
    explanation: Can be found at https://github.com/meta-llama/llama3/blob/main/LICENSE
    value: Llama 3
  modality: text; text
  model_card: https://github.com/meta-llama/llama3/blob/main/MODEL_CARD.md
  monitoring: Extensive internal and external performance evaluation and red-teaming
    approach for safety testing.
  name: Llama 3
  nationality: USA
  organization: Meta
  prohibited_uses: unknown
  quality_control: Extensive internal and external testing for safety, and design
    of new trust and safety tools.
  size: 70B parameters
  training_emissions: unknown
  training_hardware: 2 custom-built Meta 24K GPU clusters
  training_time: unknown
  type: model
  url: https://llama.meta.com/llama3/
- access: open
  analysis: Evaluated on a comprehensive range of tasks, including visual question
    answering, image captioning, text generation, image generation, and long-form
    mixed modal generation. Chameleon demonstrates broad and general capabilities,
    including state-of-the-art performance in image captioning tasks, outperforms
    Llama-2 in text-only tasks while being competitive with models such as Mixtral
    8x7B and Gemini-Pro.
  created_date: 2024-05-17
  dependencies: []
  description: Chameleon is a family of early-fusion token-based mixed-modal models
    capable of understanding and generating images and text in any arbitrary sequence.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: image, text; image, text
  model_card: none
  monitoring: ''
  name: Chameleon
  nationality: USA
  organization: Meta FAIR
  prohibited_uses: ''
  quality_control: ''
  size: 34B parameters
  training_emissions: unknown
  training_hardware: Meta's Research Super Cluster (powered by NVIDIA A100 80GB GPUs)
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2405.09818
- access: open
  analysis: The model was evaluated on over 150 benchmark datasets that span a wide
    range of languages. An experimental evaluation suggests that the model is competitive
    with leading foundation models across a range of tasks. Also, smaller models of
    Llama 3.1 405B are competitive with closed and open models that have a similar
    number of parameters.
  created_date: 2024-07-23
  dependencies:
  - Unknown
  description: Llama 3.1 405B is the first openly available model that rivals the
    top AI models when it comes to state-of-the-art capabilities in general knowledge,
    steerability, math, tool use, and multilingual translation. With the release of
    the 405B model, the Llama versions support advanced use cases, such as long-form
    text summarization, multilingual conversational agents, and coding assistants.
    It is the largest and most capable openly available foundation model.
  feedback: Unknown
  intended_uses: For advanced use cases, such as long-form text summarization, multilingual
    conversational agents, and coding assistants. May also be useful in the development
    of custom offerings and systems by developers.
  license: Unknown
  modality: text; text
  model_card: https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md
  monitoring: Unknown
  name: Llama 3.1 405B
  nationality: USA
  organization: Meta AI
  prohibited_uses: Unknown
  quality_control: The development process was focused on keeping the model scalable
    and straightforward. It adopted an iterative post-training procedure, where each
    round uses supervised fine-tuning and direct preference optimization. The model
    also underwent quality assurance and filtering for pre-and post-training data.
  size: 405B parameters (dense)
  training_emissions: Unknown
  training_hardware: Over 16 thousand H100 GPUs
  training_time: Unknown
  type: model
  url: https://ai.meta.com/blog/meta-llama-3-1/
- access:
    explanation: Future versions of the tuned models will be released as we improve
      model safety with community feedback.
    value: open
  analysis: Unknown
  created_date: 2024-12-06
  dependencies: []
  description: The Meta Llama 3.3 multilingual large language model (LLM) is an instruction
    tuned generative model in 70B (text in/text out).
  feedback: Instructions on how to provide feedback or comments on the model can be
    found in the model README.
  intended_uses: Intended for commercial and research use in multiple languages. Instruction
    tuned text only models are intended for assistant-like chat.
  license:
    explanation: A custom commercial license, the Llama 3.3 Community License Agreement
    value: Llama 3.3 Community License Agreement
  modality:
    explanation: The Llama 3.3 instruction tuned text only model is optimized for
      multilingual dialogue use cases.
    value: text; text
  model_card: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct
  monitoring: Unknown
  name: Llama 3.3
  nationality: USA
  organization: Meta
  prohibited_uses: Use in any manner that violates applicable laws or regulations
    (including trade compliance laws). Use in any other way that is prohibited by
    the Acceptable Use Policy and Llama 3.3 Community License.
  quality_control: Used "supervised fine-tuning (SFT) and reinforcement learning with
    human feedback (RLHF) to align with human preferences for helpfulness and safety."
  size:
    explanation: The Meta Llama 3.3 multilingual large language model (LLM) is an
      instruction tuned generative model in 70B (text in/text out).
    value: 70B parameters
  training_emissions:
    explanation: Training Greenhouse Gas Emissions Estimated total location-based
      greenhouse gas emissions were 11,390 tons CO2eq for training.
    value: 11,390 tons CO2eq
  training_hardware:
    explanation: Training utilized a cumulative of 39.3M GPU hours of computation
      on H100-80GB (TDP of 700W) type hardware.
    value: H100-80GB (TDP of 700W) type hardware
  training_time:
    explanation: Training utilized a cumulative of 39.3M GPU hours of computation
      on H100-80GB (TDP of 700W) type hardware.
    value: 39.3M GPU hours
  type: model
  url: https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct


================================================
FILE: assets/microsoft.yaml
================================================
- access:
    explanation: Microsoft does not provide VLMo to external researchers. One author
      commented that code would be pushed "soon" in [[November of 2021]](https://github.com/microsoft/unilm/issues/532),
      but the repository does not contain relevant changes.
    value: closed
  analysis: ''
  created_date:
    explanation: The date the model paper was released
    value: 2021-11-03
  dependencies:
  - Conceptual Captions
  - SBU Captions
  - COCO
  - Visual Genome
  - Wikipedia
  - BooksCorpus
  description: VLMo is a model for text-to-image generation
  feedback: ''
  intended_uses: ''
  license: none
  modality: text; image
  model_card: none
  monitoring: ''
  name: VLMo
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: 562M parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2111.02358
- access:
    explanation: Manual approval through early access request form required.
    value: limited
  analysis: ''
  created_date:
    explanation: Date model blog post was released
    value: 2022-09-28
  dependencies: []
  description: T-ULRv5 is a language model trained with two unique training objectives
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: T-ULRv5
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: 2.2B parameters (dense)
  training_emissions: ''
  training_hardware: 256 A100
  training_time: Less than two weeks
  type: model
  url: https://www.microsoft.com/en-us/research/blog/microsoft-turing-universal-language-representation-model-t-ulrv5-tops-xtreme-leaderboard-and-trains-100x-faster/
- access:
    explanation: Manual approval through early access request form required.
    value: limited
  analysis: ''
  created_date:
    explanation: Date model blog post was released
    value: 2021-12-02
  dependencies: []
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Turing NLR-v5
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: 5B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://www.microsoft.com/en-us/research/blog/efficiently-and-effectively-scaling-up-language-model-pretraining-for-best-language-representation-model-on-glue-and-superglue/?OCID=msr_blog_TNLRV5_tw
- access:
    explanation: Megatron-Turing NLG can be accessed through the [[Turing Academic
      Program]](https://www.microsoft.com/en-us/research/collaboration/microsoft-turing-academic-program/)
    value: limited
  analysis: ''
  created_date:
    explanation: 'The date of the Microsoft Research blog announcing MT-NLG [[Microsoft
      Research Blog]](https://www.microsoft.com/en-us/research/blog/using-deepspeed-and-megatron-to-train-megatron-turing-nlg-530b-the-worlds-largest-and-most-powerful-generative-language-model/).

      '
    value: 2022-01-28
  dependencies:
  - The Pile
  description: 'Megatron-Turing NLG is a 530B parameter autoregressive language model.

    '
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Megatron-Turing NLG
  nationality: USA
  organization: Microsoft, NVIDIA
  prohibited_uses: ''
  quality_control: ''
  size: 530B parameters (dense)
  training_emissions: ''
  training_hardware: 4480 A100s (560 x 8)
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2201.11990
- access:
    explanation: Microsoft does not provide public access to VALL-E
    value: closed
  analysis: ''
  created_date:
    explanation: The date the [[model paper]](https://arxiv.org/abs/2301.02111) was
      released
    value: 2023-01-05
  dependencies: []
  description: Vall-E is a neural code model for text-to-speech synthesis
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; audio
  model_card: none
  monitoring: ''
  name: VALL-E
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: 16 V100 32GB GPUs
  training_time: ''
  type: model
  url: https://valle-demo.github.io/
- access:
    explanation: 'The feature is available to developers in a restricted technical
      preview [[GitHub CoPilot]](https://copilot.github.com/).

      '
    value: limited
  adaptation: unknown
  created_date:
    explanation: 'Date of the blog post introducing CoPilot [[GitHub Blog Post]] (https://github.blog/2021-06-29-introducing-github-copilot-ai-pair-programmer/).

      '
    value: 2021-06-29
  dependencies:
  - Codex
  description: 'GitHub CoPilot is a coding pair programmer assisting programmers as
    they write code.

    '
  failures: unknown
  feedback: 'Feedback can be provided in the CoPilot feedback project [[CoPilot feedback]]
    (https://github.com/github/feedback/discussions/categories/copilot-feedback).

    '
  intended_uses: 'GitHub CoPilot is intended to be used as a coding assistant.

    '
  license: unknown
  monitoring: "value: unknown explanation: >\n  There may be internal monitoring mechanisms\
    \ unknown to the public.\n"
  monthly_active_users: 'GitHub Copilot reportedly has over 1 million sign-ups [[Tweet
    Source]](https://twitter.com/sama/status/1539737789310259200?s=21&t=YPaYd0ZueJzrR6rLslUqzg).

    '
  name: GitHub CoPilot
  nationality: USA
  organization: Microsoft
  output_space: Code completions
  prohibited_uses: 'Access to GPT-3 is governed by GitHub Acceptable Use Policies
    and Terms of Service, both of which list a set of prohibited uses [[Use Policies]]
    (https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies)
    [[Terms of Service]] (https://docs.github.com/en/site-policy/github-terms/github-terms-of-service).

    '
  quality_control: 'GitHub is working on a filter to detect and suppress code generations
    that are verbatim from the training set [[GitHub Research Recitation]] (https://docs.github.com/en/github/copilot/research-recitation).
    According to the FAQ, GitHub implemented a simple filter that blocks emails in
    standard formats to protect personally identifiable data that may be present in
    the training data [[GitHub CoPilot]](https://copilot.github.com/).

    '
  terms_of_service: 'https://docs.github.com/en/site-policy/github-terms/github-terms-of-service

    '
  type: application
  url: https://copilot.github.com/
  user_distribution: unknown
- access: open
  analysis: ''
  created_date: 2022-09-24
  dependencies:
  - PubMed
  description: ''
  feedback: ''
  intended_uses: ''
  license: MIT
  modality: text; text
  model_card: ''
  monitoring: ''
  name: BioGPT
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: 1.5B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://academic.oup.com/bib/article/23/6/bbac409/6713511?guestAccessKey=a66d9b5d-4f83-4017-bb52-405815c907b9&login=true
- access: limited
  adaptation: unknown
  created_date: 2023-02-07
  dependencies:
  - ChatGPT API
  description: AI-powered Bing search engine and Edge browser, available in preview
    now at Bing.com, to deliver better search, more complete answers, a new chat experience
    and the ability to generate content. We think of these tools as an AI copilot
    for the web.
  failures: ''
  feedback: 'Feedback can be submitted at [bing.com](bing.com).

    '
  intended_uses: Search engine
  license:
    explanation: 'Software license as described in the Terms of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Bing Search
  nationality: USA
  organization: Microsoft
  output_space: Search results
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.microsoft.com/legal/terms-of-use
  type: application
  url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/
  user_distribution: ''
- access: closed
  analysis: ''
  created_date: 2023-03-01
  dependencies:
  - The Pile
  - CommonCrawl
  - LAION-2B-en
  - LAION-400M
  - COYO-700M
  - Conceptual Captions
  description: KOSMOS-1 is a multimodal language model that is capable of perceiving
    multimodal input, following instructions, and performing in-context learning for
    not only language tasks but also multimodal tasks.
  feedback: ''
  intended_uses: ''
  license: MIT
  modality: image, text; image, text
  model_card: ''
  monitoring: ''
  name: KOSMOS-1
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: 1.6B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2302.14045.pdf
- access: closed
  analysis: ''
  created_date: 2023-02-07
  dependencies: []
  description: In the context of Bing, we have developed a proprietary way of working
    with the OpenAI model that allows us to best leverage its power. We call this
    collection of capabilities and techniques the Prometheus model. This combination
    gives you more relevant, timely and targeted results, with improved safety.
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: unknown
  model_card: ''
  monitoring: ''
  name: Prometheus
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/
- access: closed
  analysis: ''
  created_date: 2022-11-23
  dependencies:
  - FLD-900M
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; image
  model_card: ''
  monitoring: ''
  name: Florence
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: 900M parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2111.11432
- access: closed
  analysis: ''
  created_date: 2022-11-23
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: image, text
  monitoring: ''
  name: FLD-900M
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 900M image-text pairs
  type: dataset
  url: https://arxiv.org/abs/2111.11432
- access: limited
  adaptation: ''
  created_date: 2023-03-07
  dependencies:
  - Florence
  description: Cost-effective, production-ready computer vision services in Azure
    Cognitive Service for Vision. The improved Vision Services enables developers
    to create cutting-edge, market-ready, responsible computer vision applications
    across various industries.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'Software license in the Microsoft Terms of Use govern the license
      rules for Azure services as outlined in the Terms of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Azure Cognitive Services for Vision
  nationality: USA
  organization: Microsoft
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://azure.microsoft.com/en-us/support/legal/
  type: application
  url: https://azure.microsoft.com/en-us/blog/announcing-a-renaissance-in-computer-vision-ai-with-microsofts-florence-foundation-model/?utm_content=buffer16fa0&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer
  user_distribution: ''
- access: closed
  analysis: ''
  created_date: 2023-03-08
  dependencies:
  - OpenAI API
  description: ''
  feedback: ''
  intended_uses: ''
  license: none
  modality: text; image, text
  model_card: ''
  monitoring: ''
  name: VisualChatGPT
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2303.04671.pdf
- access: limited
  adaptation: ''
  created_date: 2023-03-16
  dependencies:
  - GPT-4 API
  description: It combines the power of language models with your data in the Microsoft
    Graph and the Microsoft 365 apps to turn your words into the most powerful productivity
    tool on the planet.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'Software license as described in the Terms of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Microsoft 365 Copilot
  nationality: USA
  organization: Microsoft
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.microsoft.com/legal/terms-of-use
  type: application
  url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/
  user_distribution: ''
- access: limited
  adaptation: ''
  created_date: 2023-03-16
  dependencies:
  - Microsoft 365 Copilot
  description: "Business Chat works across the langugae model, the Microsoft 365 apps,\
    \ and your data \u2014 your calendar, emails, chats, documents, meetings and contacts\
    \ \u2014 to do things you\u2019ve never been able to do before. You can give it\
    \ natural language prompts like \u201CTell my team how we updated the product\
    \ strategy,\u201D and it will generate a status update based on the morning\u2019\
    s meetings, emails and chat threads."
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'Software license as described in the Terms of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Microsoft Business Chat
  nationality: USA
  organization: Microsoft
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.microsoft.com/legal/terms-of-use
  type: application
  url: https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/
  user_distribution: ''
- access: open
  adaptation: ''
  created_date: unknown
  dependencies:
  - Microsoft 365 Copilot
  description: Microsoft Excel is the industry leading spreadsheet software program,
    a powerful data visualization and analysis tool.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'Software license as described in the Terms of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Microsoft Excel
  nationality: USA
  organization: Microsoft
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.microsoft.com/legal/terms-of-use
  type: application
  url: https://www.microsoft.com/en-us/microsoft-365/excel
  user_distribution: ''
- access: open
  adaptation: ''
  created_date: unknown
  dependencies:
  - Microsoft 365 Copilot
  description: Microsoft Outlook is a personal information manager software system
    from Microsoft, available as a part of the Microsoft Office and Microsoft 365
    software suites.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'Software license as described in the Terms of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Microsoft Outlook
  nationality: USA
  organization: Microsoft
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.microsoft.com/legal/terms-of-use
  type: application
  url: https://www.microsoft.com/en-us/microsoft-365/outlook/email-and-calendar-software-microsoft-outlook
  user_distribution: ''
- access: limited
  adaptation: ''
  created_date: unknown
  dependencies:
  - Microsoft 365 Copilot
  description: Microsoft Power Platform is a line of business intelligence, app development,
    and app connectivity software applications.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'Software license as described in the Terms of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Microsoft Power Platform
  nationality: USA
  organization: Microsoft
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.microsoft.com/legal/terms-of-use
  type: application
  url: https://powerplatform.microsoft.com/en-us/
  user_distribution: ''
- access: open
  adaptation: ''
  created_date: unknown
  dependencies:
  - Microsoft 365 Copilot
  description: Microsoft PowerPoint empowers you to create clean slideshow presentations
    and intricate pitch decks and gives you a powerful presentation maker.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'Software license as described in the Terms of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Microsoft PowerPoint
  nationality: USA
  organization: Microsoft
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.microsoft.com/legal/terms-of-use
  type: application
  url: https://www.microsoft.com/en-us/microsoft-365/powerpoint
  user_distribution: ''
- access: open
  adaptation: ''
  created_date: unknown
  dependencies:
  - Microsoft 365 Copilot
  - Microsoft Business Chat
  description: Microsoft Teams is a proprietary business communication platform developed
    by Microsoft, as part of the Microsoft 365 family of products.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'Software license as described in the Terms of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Microsoft Teams
  nationality: USA
  organization: Microsoft
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.microsoft.com/legal/terms-of-use
  type: application
  url: https://www.microsoft.com/en-us/microsoft-teams/group-chat-software
  user_distribution: ''
- access: open
  adaptation: ''
  created_date: unknown
  dependencies:
  - Microsoft 365 Copilot
  description: Microsoft Word is a word processing software developed by Microsoft
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'Software license as described in the Terms of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Microsoft Word
  nationality: USA
  organization: Microsoft
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.microsoft.com/legal/terms-of-use
  type: application
  url: https://www.microsoft.com/en-us/microsoft-365/word
  user_distribution: ''
- access: limited
  adaptation: unknown
  created_date: unknown
  dependencies: []
  description: 'Inside look is a Microsoft Office feature, composing document insights
    highlighting key points, expected time to read, and popularity among others.

    '
  failures: unknown
  feedback: unknown
  intended_uses: Providing document insights to users.
  license:
    explanation: 'Software license as described in the Terms of Service document.

      '
    value: custom
  monitoring: unknown
  monthly_active_users: unknown
  name: Microsoft Inside Look
  nationality: USA
  organization: Microsoft
  output_space: Document level insights for users.
  prohibited_uses: unknown
  quality_control: unknown
  terms_of_service: https://www.microsoft.com/legal/terms-of-use
  type: application
  url: https://support.microsoft.com/en-us/office/see-file-insights-before-you-open-a-file-87a23bbc-a516-42e2-a7b6-0ecb8259e026
  user_distribution: unknown
- access: limited
  adaptation: unknown
  created_date: unknown
  dependencies: []
  description: 'Suggested replies is a Microsoft Outlook feature that suggests responses
    to emails, available in: English, Spanish, Italian, French, German, Portuguese
    Chinese Simplified, Chinese Traditional, Swedish, Russian, Korean, Czech, Hungarian,
    Arabic, Hebrew, Thai, Turkish, Japanese, Dutch, Norwegian, Danish, and Polish.

    '
  failures: unknown
  feedback: unknown
  intended_uses: Suggesting email replies.
  license:
    explanation: 'Software license as described in the Terms of Service document.

      '
    value: custom
  monitoring: unknown
  monthly_active_users: unknown
  name: Microsoft Suggested Replies
  nationality: USA
  organization: Microsoft
  output_space: Suggested emails.
  prohibited_uses: unknown
  quality_control: unknown
  terms_of_service: https://www.microsoft.com/legal/terms-of-use
  type: application
  url: https://support.microsoft.com/en-us/office/use-suggested-replies-in-outlook-19316194-0434-43ba-a742-6b5890157379
  user_distribution: unknown
- access: limited
  adaptation:
    explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
    value: Security Copilot combines OpenAI's GPT-4 generative AI with a security-specific
      model from Microsoft. This security-specific model in turn incorporates a growing
      set of security-specific skills and is informed by Microsoft's unique global
      threat intelligence and more than 65 trillion daily signals.
  created_date:
    explanation: The date Security Copilot was announced in the [[Microsoft blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
    value: 2023-03-28
  dependencies:
  - GPT-4
  - Microsoft security-specific model
  description: 'Microsoft Security Copilot is an AI-powered security analysis tool
    that enables analysts to respond to threats quickly, process signals at machine
    speed, and assess risk exposure in minutes.

    '
  failures: unknown
  feedback: unknown
  intended_uses: Security Copilot is designed to enhance the capabilities of cybersecurity
    professionals. It leverages machine speed and scale to accelerate response to
    security incidents, discover and process threat signals, and assess risk exposure
    within minutes.
  license:
    explanation: 'Software license as described in the Terms of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: unknown
  name: Microsoft Security Copilot
  nationality: USA
  organization: Microsoft
  output_space:
    explanation: See [[product demo]](https://www.microsoft.com/en-us/security/business/ai-machine-learning/microsoft-security-copilot).
    value: Actionable responses to security-related questions (text and image). Security
      event, incident or threat reports (PowerPoint slide).
  prohibited_uses: unknown
  quality_control:
    explanation: See [[blog post]](https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/).
    value: Security Copilot employs a closed-loop learning system that learns from
      user interactions and feedback, enabling it to provide more coherent, relevant,
      and useful answers that continually improve over time. Security Copilot is committed
      to delivering safe, secure, and responsible AI solutions, ensuring that customers'
      data and AI models are protected with enterprise compliance and security controls.
      Customer data is owned and controlled by them, and not used to train AI models
      for anyone outside their organization.
  terms_of_service: https://www.microsoft.com/legal/terms-of-use
  type: application
  url: https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/
  user_distribution: unknown
- access: open
  analysis: Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks.
  created_date: 2019-10-01
  dependencies: []
  description: UniLM is a unified language model that can be fine-tuned for both natural
    language understanding and generation tasks.
  feedback: ''
  intended_uses: ''
  license: MIT
  modality: text; text
  model_card: ''
  monitoring: ''
  name: UniLM
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: 340M parameters (dense)
  training_emissions: unknown
  training_hardware: 8 NVIDIA Tesla V100 32GB GPUs
  training_time: 10,000 steps in 7 hours
  type: model
  url: https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf
- access: limited
  analysis: ''
  created_date: 2021-04-12
  dependencies: []
  description: Docugami is a LLM focused on writing business documents and data using
    generative AI.
  feedback: ''
  intended_uses: analyzing, writing, and connecting business documents and data
  license: ''
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Docugami
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: 20B parameters (dense)
  training_emissions: unknown
  training_hardware: ''
  training_time: unknown
  type: model
  url: https://www.docugami.com/generative-ai
- access: open
  analysis: Evaluated on a range of standardized vision benchmarks, and achieves state
    of the art performance on all experimentally.
  created_date: 2022-08-31
  dependencies:
  - Multiway Transformer network
  description: BEiT-3 is a general-purpose multimodal foundation model for vision
    and vision-language tasks.
  feedback: ''
  intended_uses: ''
  license: ''
  modality: image, text; image, text
  model_card: ''
  monitoring: ''
  name: BEiT-3
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: 1.9B parameters (dense)
  training_emissions: unknown
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2208.10442.pdf
- access: open
  analysis: Reports results on standard LLM benchmarks in comparison to other LLMs
    and test sets.
  created_date: 2023-04-24
  dependencies:
  - LLaMA
  - Evol-Instruct
  - Alpaca dataset
  description: Starting with an initial set of instructions, we use our proposed Evol-Instruct
    to rewrite them step by step into more complex instructions. Then, we mix all
    generated instruction data to fine-tune LLaMA. We call the resulting model WizardLM.
  feedback: https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions
  intended_uses: Creating large amounts of instruction data, particularly with high
    complexity
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/WizardLM/WizardLM-13B-1.0
  monitoring: ''
  name: WizardLM
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: ''
  training_hardware: 8 V100 GPUs
  training_time: 70 hours on 3 epochs
  type: model
  url: https://arxiv.org/pdf/2304.12244v1.pdf
- access: open
  analysis: Evaluated on four prominent code generation benchmarks HumanEval, HumanEval+,
    MBPP, and DS100.
  created_date: 2023-08-26
  dependencies:
  - Evol-Instruct
  - Alpaca dataset
  - StarCoder
  description:
    explanation: WizardCoder refers to both a set of models fine-tuned on StarCoder
      and a set of models fined-tuned on Code LLaMA. This node refers to the set fine-tuned
      on StarCoder.
    value: WizardCoder empowers Code LLMs with complex instruction fine-tuning, by
      adapting the Evol-Instruct method to the domain of code.
  feedback: https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0/discussions
  intended_uses: ''
  license:
    explanation: Model license can be found at https://github.com/nlpxucan/WizardLM/blob/main/WizardCoder/MODEL_WEIGHTS_LICENSE.
      Code license is under Apache 2.0
    value: BigCode Open Rail-M
  modality:
    explanation: text; text
    value: text; text
  model_card: https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0
  monitoring: ''
  name: WizardCoder
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: 34B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2306.08568.pdf
- access: closed
  analysis: Evaluated on standard image processing benchmarks
  created_date: 2023-11-10
  dependencies:
  - FLD-5B
  description: WizardCoder empowers Code LLMs with complex instruction fine-tuning,
    by adapting the Evol-Instruct method to the domain of code.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: image, text; text
  model_card: none
  monitoring: ''
  name: Florence-2
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: 771M parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2311.06242.pdf
- access: closed
  analysis: FLD-5B evaluated in comparison to datasets that power other large-scale
    image models on standard image benchmarks.
  created_date: 2023-11-10
  datasheet: ''
  dependencies: []
  description: FLD-5B is the dataset that powers Florence-2
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: image, text
  monitoring: ''
  name: FLD-5B
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 1.3B image-text annotations
  type: dataset
  url: https://arxiv.org/pdf/2311.06242.pdf
- access: open
  analysis: Models trained on OpenOrca compared to GPT-series on language benchmarks.
  created_date: 2023-06-05
  datasheet: https://huggingface.co/datasets/Open-Orca/OpenOrca
  dependencies:
  - GPT-3.5
  - GPT-4
  - Flan Collection
  description: The OpenOrca dataset is a collection of augmented FLAN Collection data.
    Currently ~1M GPT-4 completions, and ~3.2M GPT-3.5 completions. It is tabularized
    in alignment with the distributions presented in the ORCA paper and currently
    represents a partial completion of the full intended dataset, with ongoing generation
    to expand its scope.
  excluded: ''
  feedback: none
  included: ''
  intended_uses: training and evaluation in the field of natural language processing.
  license: MIT
  modality: text
  monitoring: ''
  name: OpenOrca
  nationality: USA
  organization: Microsoft
  prohibited_uses: none
  quality_control: ''
  sample: []
  size: 4.5M text queries
  type: dataset
  url: https://huggingface.co/datasets/Open-Orca/OpenOrca
- access: open
  analysis: LlongOrca evaluated on BigBench-Hard and AGIEval results.
  created_date: 2023-08-01
  dependencies:
  - OpenOrca
  - LLongMA-2
  description: LlongOrca is an attempt to make OpenOrca able to function in a Llong
    context.
  feedback: https://huggingface.co/Open-Orca/LlongOrca-7B-16k/discussions
  intended_uses: training and evaluation in the field of natural language processing.
  license: LLaMA 2
  modality: text; text
  model_card: https://huggingface.co/Open-Orca/LlongOrca-7B-16k
  monitoring: ''
  name: LlongOrca
  nationality: USA
  organization: Microsoft
  prohibited_uses: none
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: 8x A6000-48GB (first-gen) GPUs
  training_time: 37 hours
  type: model
  url: https://huggingface.co/Open-Orca/LlongOrca-7B-16k
- access: open
  analysis: Evaluated on common sense reasoning, language understanding, and multi-step
    reasoning compared to other SOTA language models.
  created_date: 2023-09-11
  dependencies:
  - phi-1
  description: Phi-1.5 is a large language transformer model.
  feedback: https://huggingface.co/microsoft/phi-1_5/discussions
  intended_uses: Phi-1.5 is best suited for answering prompts using the QA format,
    the chat format, and the code format.
  license:
    explanation: can be found via the license tab at top of https://huggingface.co/microsoft/phi-1_5
    value: MIT
  modality: text; text
  model_card: https://huggingface.co/microsoft/phi-1_5
  monitoring: none
  name: Phi-1.5
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: generic web-crawl data is removed from dataset.
  size: 1.3B parameters (dense)
  training_emissions: unknown
  training_hardware: 32 A100-40G GPUs
  training_time: 8 days
  type: model
  url: https://arxiv.org/pdf/2309.05463.pdf
- access: open
  analysis: Orca 2 has been evaluated on a large number of tasks ranging from reasoning
    to grounding and safety.
  created_date: 2023-11-21
  dependencies:
  - LLaMA 2
  description: Orca 2 is a finetuned version of LLAMA-2 for research purposes.
  feedback: https://huggingface.co/microsoft/Orca-2-13b/discussions
  intended_uses: Orca 2 is built for research purposes only. The main purpose is to
    allow the research community to assess its abilities and to provide a foundation
    for building better frontier models.
  license:
    explanation: can be found at https://huggingface.co/microsoft/Orca-2-13b/blob/main/LICENSE
    value: custom
  modality: text; text
  model_card: https://huggingface.co/microsoft/Orca-2-13b
  monitoring: unknown
  name: Orca 2
  nationality: USA
  organization: Microsoft
  prohibited_uses: Any purposes other than research.
  quality_control: ''
  size: 13B parameters (dense)
  training_emissions: unknown
  training_hardware: 32 NVIDIA A100 80GB GPUs
  training_time: 80 hours
  type: model
  url: https://arxiv.org/pdf/2311.11045.pdf
- access: open
  analysis: The model has been evaluated against benchmarks that test common sense,
    language understanding, mathematics, coding, long-term context, and logical reasoning.
    The Phi-3 Medium-128K-Instruct demonstrated robust and state-of-the-art performance.
  created_date: 2024-05-21
  dependencies: []
  description: Phi-3 is a 14 billion-parameter, lightweight, state-of-the-art open
    model trained using the Phi-3 datasets.
  feedback: https://huggingface.co/microsoft/Phi-3-medium-128k-instruct/discussions
  intended_uses: The model's primary use cases are for commercial and research purposes
    that require capable reasoning in memory or compute constrained environments and
    latency-bound scenarios. It can also serve as a building block for generative
    AI-powered features.
  license: MIT
  modality: text; text
  model_card: https://huggingface.co/microsoft/Phi-3-medium-128k-instruct
  monitoring: Issues like allocation, high-risk scenarios, misinformation, generation
    of harmful content and misuse should be monitored and addressed.
  name: Phi-3
  nationality: USA
  organization: Microsoft
  prohibited_uses: The model should not be used for high-risk scenarios without adequate
    evaluation and mitigation techniques for accuracy, safety, and fairness.
  quality_control: The model underwent post-training processes viz. supervised fine-tuning
    and direct preference optimization to increase its capability in following instructions
    and aligning to safety measures.
  size: 14B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/abs/2404.14219
- access: closed
  analysis: Evaluated by comparing climate predictions to actual happened events.
  created_date: 2024-05-28
  dependencies: []
  description: Aurora is a large-scale foundation model of the atmosphere trained
    on over a million hours of diverse weather and climate data.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; climate forecasts
  model_card: none
  monitoring: ''
  name: Aurora
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: 1.3B parameters
  training_emissions: unknown
  training_hardware: 32 A100 GPUs
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2405.13063
- access: closed
  analysis: Evaluated on a digital pathology benchmark comprising 9 cancer subtyping
    tasks and 17 pathomics tasks, with Prov-GigaPath demonstrating SoTA performance
    in 25 out of 26 tasks.
  created_date: 2024-05-22
  dependencies:
  - GigaPath
  description: "Prov-GigaPath is a whole-slide pathology foundation model pretrained\
    \ on 1.3 billion 256\u2009\xD7\u2009256 pathology image tiles."
  feedback: none
  intended_uses: ''
  license: unknown
  modality: image; embeddings
  model_card: none
  monitoring: ''
  name: Prov-GigaPath
  nationality: USA
  organization: Microsoft
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: 4 80GB A100 GPUs
  training_time: 2 days
  type: model
  url: https://www.nature.com/articles/s41586-024-07441-w
- access: open
  analysis: The model was evaluated across a variety of public benchmarks, comparing
    with a set of models including Mistral-Nemo-12B-instruct-2407, Llama-3.1-8B-instruct,
    Gemma-2-9b-It, Gemini-1.5-Flash, and GPT-4o-mini-2024-07-18. It achieved a similar
    level of language understanding and math as much larger models. It also displayed
    superior performance in reasoning capability, even with only 6.6B active parameters.
    It was also evaluated for multilingual tasks.
  created_date: 2024-09-08
  dependencies:
  - Phi-3 dataset
  description: Phi-3.5-MoE is a lightweight, state-of-the-art open model built upon
    datasets used for Phi-3 - synthetic data and filtered publicly available documents,
    with a focus on very high-quality, reasoning dense data. It supports multilingual
    and has a 128K context length in tokens. The model underwent a rigorous enhancement
    process, incorporating supervised fine-tuning, proximal policy optimization, and
    direct preference optimization to ensure instruction adherence and robust safety
    measures.
  feedback: Unknown
  intended_uses: The model is intended for commercial and research use in multiple
    languages. It is designed to accelerate research on language and multimodal models,
    and for use as a building block for generative AI powered features. It is suitable
    for general purpose AI systems and applications which require memory/computed
    constrained environments, latency bound scenarios, and strong reasoning.
  license: MIT
  modality: text; text
  model_card: https://huggingface.co/microsoft/Phi-3.5-MoE-instruct
  monitoring: Unknown
  name: Phi-3.5-MoE
  nationality: USA
  organization: Microsoft
  prohibited_uses: The model should not be used for downstream purposes it was not
    specifically designed or evaluated for. Developers should evaluate and mitigate
    for accuracy, safety, and fariness before using within a specific downstream use
    case, particularly for high risk scenarios.
  quality_control: The model was enhanced through supervised fine-tuning, proximal
    policy optimization, and direct preference optimization processes for safety measures.
  size: 61B parameters (sparse); 6.6B active parameters
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://huggingface.co/microsoft/Phi-3.5-MoE-instruct
- access:
    explanation: Phi-4 is available on Azure AI Foundry and on Hugging Face.
    value: open
  analysis: Phi-4 outperforms comparable and larger models on math related reasoning.
  created_date: 2024-12-13
  dependencies: []
  description: the latest small language model in Phi family, that offers high quality
    results at a small size (14B parameters).
  feedback: unknown
  intended_uses: Specialized in complex reasoning, particularly good at math problems
    and high-quality language processing.
  license: unknown
  modality:
    explanation: Today we are introducing Phi-4 , our 14B parameter state-of-the-art
      small language model (SLM) that excels at complex reasoning in areas such as
      math, in addition to conventional language processing.
    value: text; text
  model_card: unknown
  monitoring: Azure AI evaluations in AI Foundry enable developers to iteratively
    assess the quality and safety of models and applications using built-in and custom
    metrics to inform mitigations.
  name: Phi-4
  nationality: USA
  organization: Microsoft
  prohibited_uses: unknown
  quality_control: Building AI solutions responsibly is at the core of AI development
    at Microsoft. We have made our robust responsible AI capabilities available to
    customers building with Phi models.
  size:
    explanation: a small size (14B parameters).
    value: 14B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://techcommunity.microsoft.com/blog/aiplatformblog/introducing-phi-4-microsoft%E2%80%99s-newest-small-language-model-specializing-in-comple/4357090


================================================
FILE: assets/mila.yaml
================================================
- access: open
  analysis: Models of size 150k parameters trained on ToyMix and compared to models
    trained on its dependencies across GNN baselines.
  created_date: 2023-10-09
  datasheet: none
  dependencies:
  - QM9
  - TOX21
  - ZINC12K
  description: ToyMix is the smallest dataset of three extensive and meticulously
    curated multi-label datasets that cover nearly 100 million molecules and over
    3000 sparsely defined tasks.
  excluded: ''
  feedback: none
  included: ''
  intended_uses: The datasets are intended to be used in an academic setting for training
    molecular GNNs with orders of magnitude more parameters than current large models.
    Further, the ToyMix dataset is intended to be used in a multi-task setting, meaning
    that a single model should be trained to predict them simultaneously.
  license: CC BY-NC-SA 4.0
  modality: molecules, tasks
  monitoring: none
  name: ToyMix
  nationality: Canada
  organization: Mila-Quebec AI Institute
  prohibited_uses: none
  quality_control: ''
  sample: []
  size: 13B labels of quantum and biological nature.
  type: dataset
  url: https://arxiv.org/pdf/2310.04292.pdf
- access: open
  analysis: Models of size between 4M and 6M parameters trained for 200 epochs on
    LargeMix and compared to models trained on its dependencies across GNN baselines.
  created_date: 2023-10-09
  datasheet: none
  dependencies:
  - L1000 VCAP
  - L1000 MCF7
  - PCBA1328
  - PCQM4M_G25_N4
  description: LargeMix is the middle-sized dataset of three extensive and meticulously
    curated multi-label datasets that cover nearly 100 million molecules and over
    3000 sparsely defined tasks.
  excluded: ''
  feedback: none
  included: ''
  intended_uses: The datasets are intended to be used in an academic setting for training
    molecular GNNs with orders of magnitude more parameters than current large models.
    Further, the LargeMix dataset is intended to be used in a multi-task setting,
    meaning that a single model should be trained to predict them simultaneously.
  license: CC BY-NC-SA 4.0
  modality: molecules, tasks
  monitoring: none
  name: LargeMix
  nationality: Canada
  organization: Mila-Quebec AI Institute
  prohibited_uses: none
  quality_control: ''
  sample: []
  size: 13B labels of quantum and biological nature.
  type: dataset
  url: https://arxiv.org/pdf/2310.04292.pdf
- access: open
  analysis: Models of size between 4M and 6M parameters trained for 50 epochs on UltraLarge
    and compared to models trained on its dependencies across GNN baselines.
  created_date: 2023-10-09
  datasheet: none
  dependencies:
  - PM6_83M
  description: UltraLarge is the largest dataset of three extensive and meticulously
    curated multi-label datasets that cover nearly 100 million molecules and over
    3000 sparsely defined tasks.
  excluded: ''
  feedback: none
  included: ''
  intended_uses: The datasets are intended to be used in an academic setting for training
    molecular GNNs with orders of magnitude more parameters than current large models.
  license: CC BY-NC-SA 4.0
  modality: molecules, tasks
  monitoring: none
  name: UltraLarge
  nationality: Canada
  organization: Mila-Quebec AI Institute
  prohibited_uses: none
  quality_control: ''
  sample: []
  size: 13B labels of quantum and biological nature.
  type: dataset
  url: https://arxiv.org/pdf/2310.04292.pdf
- access: open
  analysis: Evaluated on previously unseen time series datasets.
  created_date: 2024-02-08
  dependencies: []
  description: Lag-LLaMA is a general-purpose foundation model for univariate probabilistic
    time series forecasting based on a decoder-only transformer architecture that
    uses lags as covariates.
  feedback: https://huggingface.co/time-series-foundation-models/Lag-Llama/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/time-series-foundation-models/Lag-Llama
  monitoring: unknown
  name: Lag-LLaMA
  nationality: unknown
  organization: Morgan Stanley, ServiceNow Research, University of Montreal, Mila-Quebec
    AI Institute
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: A single NVIDIA Tesla-P100 GPU
  training_time: unknown
  type: model
  url: https://time-series-foundation-models.github.io/lag-llama.pdf


================================================
FILE: assets/mistral.yaml
================================================
- access: open
  analysis: Evaluated in comparison to LLaMA series models on standard language benchmarks.
  created_date: 2023-09-27
  dependencies: []
  description: Mistral is a compact language model.
  feedback: https://huggingface.co/mistralai/Mistral-7B-v0.1/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/mistralai/Mistral-7B-v0.1
  monitoring: none
  name: Mistral
  nationality: USA
  organization: Mistral AI
  prohibited_uses: ''
  quality_control: ''
  size: 7.3B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://mistral.ai/news/announcing-mistral-7b/
- access: limited
  analysis: Evaluated on commonly used benchmarks in comparison to the current LLM
    leaders.
  created_date: 2024-02-26
  dependencies: []
  description: "Mistral Large is Mistral AI\u2019s new cutting-edge text generation\
    \ model."
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: none
  monitoring: ''
  name: Mistral Large
  nationality: USA
  organization: Mistral AI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://mistral.ai/news/mistral-large/
- access: limited
  adaptation: ''
  created_date: 2024-02-26
  dependencies:
  - Mistral
  - Mistral Large
  description: Le Chat is a first demonstration of what can be built with Mistral
    models and what can deployed in the business environment.
  failures: unknown
  feedback: none
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: unknown
  name: Le Chat
  nationality: USA
  organization: Mistral AI
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://mistral.ai/terms/#terms-of-use
  type: application
  url: https://mistral.ai/news/le-chat-mistral/
  user_distribution: unknown
- access: open
  analysis: Performance of Codestral is evaluated in Python, SQL, and additional languages,
    C++, bash, Java, PHP, Typescript, and C#. Fill-in-the-middle performance is assessed
    using HumanEval pass@1 in Python, JavaScript, and Java.
  created_date: 2024-05-29
  dependencies: []
  description: Codestral is an open-weight generative AI model explicitly designed
    for code generation tasks. It helps developers write and interact with code through
    a shared instruction and completion API endpoint. Mastering code and English,
    it can be used to design advanced AI applications for software developers. It
    is fluent in 80+ programming languages.
  feedback: none
  intended_uses: Helps developers write and interact with code, design advanced AI
    applications for software developers, integrated into LlamaIndex and LangChain
    for building applications, integrated in VSCode and JetBrains environments for
    code generation and interactive conversation.
  license: Mistral AI Non-Production License
  modality: text; code
  model_card: none
  monitoring: unknown
  name: Codestral
  nationality: USA
  organization: Mistral AI
  prohibited_uses: unknown
  quality_control: ''
  size: 22B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://mistral.ai/news/codestral/
- access: open
  analysis: The model underwent an advanced fine-tuning and alignment phase. Its performance
    was evaluated using GPT4o as a judge on official references. It was compared to
    recent open-source pre-trained models Gemma 2 9B, Llama 3 8B regarding multilingual
    performance and coding accuracy. Tekken tokenizer's compression ability was compared
    with previous tokenizers like SentencePiece and the Llama 3 tokenizer.
  created_date: 2024-07-18
  dependencies: []
  description: The Mistral NeMo model is a state-of-the-art 12B model built in collaboration
    with NVIDIA, offering a large context window of up to 128k tokens. The model is
    suitable for multilingual applications and exhibits excellent reasoning, world
    knowledge, and coding accuracy. It's easy to use and a drop-in replacement in
    a system that uses Mistral 7B. The model uses a new tokenizer, Tekken, based on
    Tiktoken, which is trained on over 100 languages. It compresses natural language
    text and source code more efficiently than previously used tokenizers.
  feedback: Problems should be reported to the Mistral AI team, though the specific
    method of reporting is unknown.
  intended_uses: The model can be used for multilingual applications, understanding
    and generating natural language as well as source code, handling multi-turn conversations,
    and providing more precise instruction following.
  license: Apache 2.0
  modality: text; text
  model_card: unknown
  monitoring: Unknown
  name: Mistral NeMo
  nationality: USA
  organization: Mistral AI, NVIDIA
  prohibited_uses: Unknown
  quality_control: The model underwent an advanced fine-tuning and alignment phase.
    Various measures such as accuracy comparisons with other models and instruction-tuning
    were implemented to ensure its quality.
  size: 12B parameters
  training_emissions: Unknown
  training_hardware: NVIDIA hardware, specifics unknown
  training_time: Unknown
  type: model
  url: https://mistral.ai/news/mistral-nemo/
- access: open
  analysis: The model has been tested for in-context retrieval capabilities up to
    256k tokens. It has been created with advanced code and reasoning capabilities,
    which enables it to perform on par with SOTA transformer-based models.
  created_date: 2024-07-16
  dependencies: []
  description: Codestral Mamba is a Mamba2 language model that is specialized in code
    generation. It has a theoretical ability to model sequences of infinite length
    and offers linear time inference. This makes it effective for extensive user engagement
    and is especially practical for code productivity use cases. Codestral Mamba can
    be deployed using the mistral-inference SDK or through TensorRT-LLM, and users
    can download the raw weights from HuggingFace.
  feedback: Problems with the model can be reported through the organization's website.
  intended_uses: The model is intended for code generation and can be utilized as
    a local code assistant.
  license: Apache 2.0
  modality: text; text
  model_card: unknown
  monitoring: Unknown
  name: Codestral Mamba
  nationality: USA
  organization: Mistral AI
  prohibited_uses: Unknown
  quality_control: Unknown
  size: 7.3B parameters
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://mistral.ai/news/codestral-mamba/
- access: open
  analysis: The model's performance has been evaluated on the MATH and MMLU industry-standard
    benchmarks. It scored notably higher on both these tests than the base model Mistral
    7B.
  created_date: 2024-07-16
  dependencies:
  - Mistral 7B
  description: "Math\u03A3tral is a 7B model designed for math reasoning and scientific\
    \ discovery. It achieves state-of-the-art reasoning capacities in its size category\
    \ across various industry-standard benchmarks. This model stands on the shoulders\
    \ of Mistral 7B and specializes in STEM subjects. It is designed to assist efforts\
    \ in advanced mathematical problems requiring complex, multi-step logical reasoning.\
    \ It particularly achieves 56.6% on MATH and 63.47% on MMLU."
  feedback: Feedback is likely expected to be given through the HuggingFace platform
    where the model's weights are hosted or directly to the Mistral AI team.
  intended_uses: The model is intended for use in solving advanced mathematical problems
    requiring complex, multi-step logical reasoning or any math-related STEM subjects
    challenges.
  license: Apache 2.0
  modality: text; text
  model_card: unknown
  monitoring: Unknown
  name: "Math\u03A3tral"
  nationality: USA
  organization: Mistral AI
  prohibited_uses: Unknown
  quality_control: This model has been fine-tuned from a base model and its inference
    and performance have been tested on several industry benchmarks.
  size: 7B parameters
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://mistral.ai/news/mathstral/
- access:
    explanation: The model is available under the Mistral Research License (MRL) for
      research and educational use; and the Mistral Commercial License for experimentation,
      testing, and production for commercial purposes.
    value: open
  analysis: We evaluate Pixtral Large against frontier models on a set of standard
    multimodal benchmarks, through a common testing harness.
  created_date: 2024-11-18
  dependencies:
  - Mistral Large 2
  description: Pixtral Large is the second model in our multimodal family and demonstrates
    frontier-level image understanding. Particularly, the model is able to understand
    documents, charts and natural images, while maintaining the leading text-only
    understanding of Mistral Large 2.
  feedback: unknown
  intended_uses: RAG and agentic workflows, making it a suitable choice for enterprise
    use cases such as knowledge exploration and sharing, semantic understanding of
    documents, task automation, and improved customer experiences.
  license:
    explanation: The model is available under the Mistral Research License (MRL) for
      research and educational use; and the Mistral Commercial License for experimentation,
      testing, and production for commercial purposes.
    value: Mistral Research License (MRL), Mistral Commercial License
  modality:
    explanation: Pixtral Large is the second model in our multimodal family and demonstrates
      frontier-level image understanding.
    value: text, image; text
  model_card: unknown
  monitoring: unknown
  name: Pixtral Large
  nationality: USA
  organization: Mistral AI
  prohibited_uses: unknown
  quality_control: unknown
  size:
    explanation: Today we announce Pixtral Large, a 124B open-weights multimodal model.
    value: 124B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://mistral.ai/news/pixtral-large/
- access:
    explanation: "The API is also available on Google Cloud\u2019s Vertex AI, in private\
      \ preview on Azure AI Foundry, and coming soon to Amazon Bedrock."
    value: closed
  analysis: Benchmarks We have benchmarked the new Codestral with the leading sub-100B
    parameter coding models that are widely considered to be best-in-class for FIM
    tasks.
  created_date: 2025-01-13
  dependencies: []
  description: Lightweight, fast, and proficient in over 80 programming languages,
    Codestral is optimized for low-latency, high-frequency usecases and supports tasks
    such as fill-in-the-middle (FIM), code correction and test generation.
  feedback: "We can\u2019t wait to hear your experience! Try it now Try it on Continue.dev\
    \ with VsCode or JetBrains"
  intended_uses: Highly capable coding companion, regularly boosting productivity
    several times over.
  license: unknown
  modality:
    explanation: it for free in Continue for VS Code or JetBrains
    value: text; text
  model_card: unknown
  monitoring: unknown
  name: Codestral 25.01
  nationality: USA
  organization: Mistral AI
  prohibited_uses: unknown
  quality_control: unknown
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://mistral.ai/news/codestral-2501/


================================================
FILE: assets/moonhub.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-10-11
  dependencies:
  - Cohere Base
  description: Moonhub Recruiter is the world's first AI-powered recruiter providing
    sourcing and recruiting services for startups and growing businesses.
  failures: ''
  feedback: ''
  intended_uses: recruiting candidates for business needs
  license:
    explanation: License information can be found in terms of service
    value: custom
  monitoring: ''
  monthly_active_users: unknown
  name: Moonhub Recruiter
  nationality: USA
  organization: Moonhub
  output_space: job candidate matches
  prohibited_uses: none
  quality_control: ''
  terms_of_service: https://www.moonhub.ai/terms
  type: application
  url: https://www.moonhub.ai/
  user_distribution: unknown


================================================
FILE: assets/moreh.yaml
================================================
- access: open
  analysis: unknown
  created_date: 2024-01-16
  dependencies:
  - Qwen
  - OpenOrca
  description: MoMo is a large language model fine-tuned from Qwen.
  feedback: https://huggingface.co/moreh/MoMo-72B-lora-1.8.7-DPO/discussions
  intended_uses: ''
  license: MIT
  modality: text; text
  model_card: https://huggingface.co/moreh/MoMo-72B-lora-1.8.7-DPO
  monitoring: unknown
  name: MoMo
  nationality: USA
  organization: Moreh
  prohibited_uses: ''
  quality_control: unknown
  size: 72B parameters (dense)
  training_emissions: unknown
  training_hardware: "AMD\u2019s MI250 GPU"
  training_time: unknown
  type: model
  url: https://huggingface.co/moreh/MoMo-72B-lora-1.8.7-DPO


================================================
FILE: assets/mosaic.yaml
================================================
- access: open
  analysis: Evaluated on a range of benchmarks and performed on par with LLaMA-7B.
  created_date: 2023-05-05
  dependencies:
  - RedPajama-Data
  - C4
  - The Stack
  - Multimodal C4
  description: MPT is a series of large language models seeking to address the limitations
    of other open source models like LLaMA and Pythia.
  feedback: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: ''
  monitoring: ''
  name: MPT
  nationality: USA
  organization: Mosaic
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: 440 A100 40GB GPUs
  training_time: 9.5 days
  type: model
  url: https://www.mosaicml.com/blog/mpt-7b
- access: open
  analysis: Compared to Stable Diffusion 2, a SOTA text-to-image model.
  created_date: 2023-10-25
  dependencies:
  - CommonCatalog
  description: CommonCanvas is a text-to-image model trained solely on Creative Commons
    licensed images.
  feedback: none
  intended_uses: ''
  license: Apache 2.0
  modality: text; image
  model_card: none
  monitoring: none
  name: CommonCanvas
  nationality: unknown
  organization: Cornell University, Mosaic
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: 128 A100 NVIDIA GPUs
  training_time: 6.79 days
  type: model
  url: https://arxiv.org/pdf/2310.16825.pdf
- access: open
  analysis: none
  created_date: 2023-10-25
  datasheet: https://github.com/mosaicml/diffusion/blob/main/assets/common-canvas.md
  dependencies:
  - YFCC100M
  - BLIP-2
  description: CommonCatalog is a curated dataset of CommonCrawl images and synthetic
    captions.
  excluded: images with non-derivative licenses
  feedback: none
  included: images with derivative licenses
  intended_uses: ''
  license: Apache 2.0
  modality: image-caption pairings
  monitoring: ''
  name: CommonCatalog
  nationality: USA
  organization: Mosaic
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 70M images
  type: dataset
  url: https://arxiv.org/pdf/2310.16825.pdf
- access: open
  analysis: unknown
  created_date: 2024-10-08
  dependencies: []
  description: XTTS-v2 is a voice generation model that allows voice cloning into
    different languages using a brief 6-second audio clip, supporting 17 languages
    with features like emotion and style transfer, cross-language voice cloning, and
    multi-lingual speech generation. It powers Coqui Studio and Coqui API, with improvements
    in architectural and prosody aspects for better audio quality.
  feedback: Users can join the Coqui community on Discord, engage on Twitter, or send
    emails to info@coqui.ai for feedback and queries.
  intended_uses: Voice cloning, multi-lingual speech generation, emotion and style
    transfer in speech.
  license: Coqui Public Model
  modality: audio; audio
  model_card: https://huggingface.co/coqui/XTTS-v2
  monitoring: unknown
  name: XTTS-v2
  nationality: USA
  organization: Coqui
  prohibited_uses: unknown
  quality_control: unknown
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/coqui/XTTS-v2


================================================
FILE: assets/nanyang.yaml
================================================
- access: open
  analysis: Evaluated on researcher experiments to test deeper understanding and advanced
    commonsense reasoning
  created_date: 2023-05-05
  dependencies:
  - MIMIC-IT
  - OpenFlamingo
  description: "Otter is a multi-modal model based on OpenFlamingo (open-sourced version\
    \ of DeepMind\u2019s Flamingo), trained on MIMIC-IT and showcasing improved instruction-following\
    \ ability and in-context learning."
  feedback: ''
  intended_uses: Following and executing new instructions with few in-context learning
    examples given image and textual input.
  license: MIT
  modality: image, text; text
  model_card: https://github.com/Luodian/Otter/blob/main/docs/model_card.md
  monitoring: ''
  name: Otter
  nationality: Singapore
  organization: Nanyang Technological University
  prohibited_uses: ''
  quality_control: ''
  size: 1.3B parameters (dense)
  training_emissions: ''
  training_hardware: 4 RTX-3090 GPUs
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2305.03726v1.pdf


================================================
FILE: assets/naver.yaml
================================================
- access:
    explanation: The model is not available for download or direct API use
    value: closed
  analysis: ''
  created_date:
    explanation: The date the model paper was submitted to arxiv
    value: 2021-05-21
  dependencies: []
  description: HyperClova is an autoregressive language model
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: none
  monitoring: ''
  name: HyperCLOVA
  nationality: South Korea
  organization: NAVER
  prohibited_uses: ''
  quality_control: ''
  size: 82B parameters
  training_emissions: ''
  training_hardware: 1024 A100 GPUs
  training_time: 130.4 days
  type: model
  url: https://arxiv.org/abs/2109.04650
- access: limited
  analysis: Evaluated on English and Korean benchmarks in comparison to open source
    English and multilingual LLMs, with HyperCLOVA X (closed) surpassing the models
    compared.
  created_date: 2024-04-13
  dependencies: []
  description: HyperCLOVA X is a family of large language models (LLMs) tailored to
    the Korean language and culture, along with competitive capabilities in English,
    math, and coding.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: none
  monitoring: ''
  name: HyperCLOVA X
  nationality: South Korea
  organization: NAVER
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2404.01954


================================================
FILE: assets/ncsoft.yaml
================================================
- access: closed
  analysis: Boasts the highest performance among the Korean LLMs of similar sizes
    that have been released to date, according to internal evaluations.
  created_date: 2023-08-16
  dependencies: []
  description: "VARCO-LLM is NCSOFT\u2019s large language model and is trained on\
    \ English and Korean."
  feedback: none
  intended_uses: Developing various NLP-based AI services such as Q&A, chatbot, summarization,
    information extraction
  license:
    explanation: Can be found at https://github.com/ncsoft/ncresearch/blob/main/LICENSE.txt
    value: custom
  modality: text; text
  model_card: none
  monitoring: ''
  name: VARCO-LLM
  nationality: South Korea
  organization: NCSOFT
  prohibited_uses: ''
  quality_control: ''
  size: 13B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/ncsoft/ncresearch


================================================
FILE: assets/neeva.yaml
================================================
- access: closed
  analysis: ''
  created_date: unknown
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: Neeva dataset
  nationality: USA
  organization: Neeva
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: unknown
  type: dataset
  url: https://neeva.com/index
- access: closed
  analysis: ''
  created_date: unknown
  dependencies:
  - Neeva dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Neeva model
  nationality: USA
  organization: Neeva
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://neeva.com/index
- access: open
  adaptation: ''
  created_date:
    explanation: The [[Blog post]](https://neeva.com/blog/introducing-neevaai) announcing
      NeevaAI.
    value: 2023-01-06
  dependencies:
  - Neeva model
  description: NeevaAI is an AI-powered search tool that combines the capabilities
    of LLMs with Neeva's independent in-house search stack to create a unique and
    transformative search experience.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: '"If you comply with these Terms, Neeva grants to you, during the
      term of these Terms, a limited non-exclusive, non-transferable license, with
      no right to sublicense, to download and install the App on your personal computers,
      mobile handsets, tablets, wearable devices, and/or other devices and to run
      the App solely for your own personal non-commercial purposes." - excerpt from
      the Terms of Service document.

      '
    value: Custom
  monitoring: ''
  monthly_active_users: ''
  name: NeevaAI
  nationality: USA
  organization: Neeva
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://neeva.com/terms
  type: application
  url: https://neeva.com/blog/introducing-neevaai
  user_distribution: ''


================================================
FILE: assets/nextdoor.yaml
================================================
- access: open
  adaptation: ''
  created_date: 2023-05-02
  dependencies:
  - ChatGPT
  description: AI chatbot on Nextdoor that helps users write more clear and conscientious
    posts.
  failures: ''
  feedback: ''
  intended_uses: to be used to help make the Nextdoor experience more positive for
    users
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: Nextdoor Assistant
  nationality: USA
  organization: Nextdoor
  output_space: natural language text guidance
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://help.nextdoor.com/s/article/Introducing-Assistant
  user_distribution: ''


================================================
FILE: assets/nolano.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-03-19
  dependencies: []
  description: Cformers is a set of transformers that act as an API for AI inference
    in code.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: MIT
  monitoring: ''
  monthly_active_users: ''
  name: Cformers
  nationality: USA
  organization: Nolano
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://www.nolano.org/services/Cformers/
  user_distribution: ''
- access:
    explanation: "T\xFClu3 is intended for research and educational use."
    value: limited
  analysis: The model can produce problematic outputs (especially when prompted to
    do so).
  created_date: 2024-11-21
  dependencies:
  - Llama 3.1
  description: "T\xFClu3 is a leading instruction following model family, offering\
    \ fully open-source data, code, and recipes designed to serve as a comprehensive\
    \ guide for modern post-training techniques."
  feedback: unknown
  intended_uses: "T\xFClu3 is intended for research and educational use."
  license:
    explanation: "All Llama 3.1 T\xFClu3 models are released under Meta's Llama 3.1\
      \ Community License Agreement."
    value: Llama 3.1 Community License Agreement
  modality:
    explanation: 'Language(s) (NLP): Primarily English'
    value: text; text
  model_card: https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B
  monitoring: unknown
  name: Llama 3.1 Tulu 3
  nationality: USA
  organization: Allen Institute for AI
  prohibited_uses: The model can produce problematic outputs (especially when prompted
    to do so).
  quality_control: "The T\xFClu3 models have limited safety training, but are not\
    \ deployed automatically with in-the-loop filtering of responses like ChatGPT."
  size:
    explanation: Final Models (RLVR) allenai/Llama-3.1-Tulu-3-8B allenai/Llama-3.1-Tulu-3-70B
    value: 70B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B


================================================
FILE: assets/notion.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-02-22
  dependencies:
  - Anthropic API
  description: "Notion AI is a connected assistant that helps you think bigger, work\
    \ faster, and augments your creativity, right inside the functional workspace\
    \ you\u2019re already familiar with."
  failures: ''
  feedback: ''
  intended_uses: ''
  license: ''
  monitoring: ''
  monthly_active_users: ''
  name: Notion AI
  nationality: USA
  organization: Notion
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://www.notion.so/help/guides/notion-ai-for-docs
  user_distribution: ''


================================================
FILE: assets/nous.yaml
================================================
- access: open
  analysis: Evaluated across standard benchmarks and generally performs better than
    Mixtral, which it was fine-tuned on.
  created_date: 2024-01-10
  dependencies:
  - Mixtral
  description: "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model\
    \ trained over the\_Mixtral 8x7B MoE LLM."
  feedback: https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; code, text
  model_card: https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO
  monitoring: unknown
  name: Nous Hermes 2
  nationality: Canada
  organization: Nous Research
  prohibited_uses: ''
  quality_control: unknown
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO
- access: open
  analysis: Evaluated across a variety of standard benchmarks in comparison to LLaMA
    2.
  created_date: 2023-11-01
  dependencies:
  - LLaMA 2
  description: YaRN LLaMA 2 is an adapted version of LLaMA 2 using the YaRN extension
    method.
  feedback: https://huggingface.co/NousResearch/Yarn-Llama-2-70b-32k/discussions
  intended_uses: ''
  license: LLaMA 2
  modality: text; text
  model_card: https://huggingface.co/NousResearch/Yarn-Llama-2-70b-32k
  monitoring: unknown
  name: YaRN LLaMA 2
  nationality: Canada/Switzerland
  organization: Nous Research, EleutherAI, University of Geneva
  prohibited_uses: ''
  quality_control: ''
  size: 70B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2309.00071.pdf
- access: open
  analysis: none
  created_date: 2023-11-13
  dependencies:
  - Yi
  description: The Capybara series is a series of LLMs and the first Nous collection
    of models made by fine-tuning mostly on data created by Nous in-house.
  feedback: https://huggingface.co/NousResearch/Nous-Capybara-34B/discussions
  intended_uses: ''
  license: MIT
  modality: text; text
  model_card: https://huggingface.co/NousResearch/Nous-Capybara-34B
  monitoring: unknown
  name: Nous Capybara
  nationality: Canada
  organization: Nous Research
  prohibited_uses: ''
  quality_control: ''
  size: 34B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/NousResearch/Nous-Capybara-34B
- access: open
  analysis: Evaluated across a variety of standard benchmarks in comparison to Mistral.
  created_date: 2023-11-01
  dependencies:
  - Mistral
  description: YaRN Mistral is an adapted version of Mistral using the YaRN extension
    method.
  feedback: https://huggingface.co/NousResearch/Yarn-Mistral-7b-128k/discussions
  intended_uses: ''
  license: MIT
  modality: text; text
  model_card: https://huggingface.co/NousResearch/Yarn-Mistral-7b-128k
  monitoring: unknown
  name: YaRN Mistral
  nationality: Canada/Switzerland
  organization: Nous Research, EleutherAI, University of Geneva
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2309.00071.pdf
- access: open
  analysis: Evaluated on common LLM benchmarks in comparison to other Mistral derivatives.
  created_date: 2023-11-03
  dependencies:
  - Mistral
  description: OpenHermes 2.5 Mistral 7B is a state of the art Mistral Fine-tune,
    a continuation of OpenHermes 2 model, trained on additional code datasets.
  feedback: https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B
  monitoring: unknown
  name: OpenHermes 2.5 Mistral
  nationality: Canada
  organization:
    explanation: developed as a personal project by Teknium, co-founder of Nous Research
    value: Nous Research
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B
- access: open
  analysis: The model was examined across a range of benchmarks including GPT4All,
    AGIEval, BigBench, TruthfulQA and in-house evaluations of function calling and
    JSON mode.
  created_date: 2024-03-10
  dependencies:
  - Mistral
  - OpenHermes 2.5 Dataset
  - Nous Hermes 2
  description: Hermes 2 Pro on Mistral 7B is an upgraded, retrained version of Nous
    Hermes 2. This improved version excels at function calling, JSON Structured Outputs,
    and several other areas, scoring positively on various benchmarks.
  feedback: https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B/discussions
  intended_uses: The model is intended for general task and conversation capabilities,
    function calling, and JSON structured outputs.
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B
  monitoring: unknown
  name: Hermes 2 Pro-Mistral
  nationality: unknown
  organization: Nous
  prohibited_uses: unknown
  quality_control: The model was evaluated across multiple tasks, displaying notable
    scores in GPT4All, AGIEval, BigBench, and TruthfulQA. It also has a high score
    on function calling and JSON mode, indicating the robustness of its capabilities.
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B
- access: open
  analysis: unknown
  created_date: 2024-03-07
  dependencies: []
  description: Genstruct is an instruction-generation model, designed to create valid
    instructions given a raw text corpus. This enables the creation of new, partially
    synthetic instruction finetuning datasets from any raw-text corpus. This work
    was inspired by Ada-Instruct and the model is also trained to generate questions
    involving complex scenarios that require detailed reasoning.
  feedback: https://huggingface.co/NousResearch/Genstruct-7B/discussions
  intended_uses: The model is intended for instruction-generation, creating questions
    involving complex scenarios and generating reasoning steps for those questions.
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/NousResearch/Genstruct-7B
  monitoring: unknown
  name: Genstruct
  nationality: unknown
  organization: Nous
  prohibited_uses: unknown
  quality_control: unknown
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/NousResearch/Genstruct-7B


================================================
FILE: assets/nucleus.yaml
================================================
- access: open
  analysis: Evaluated on the OpenLLM leaderboard, performing on par with similar-sized
    models.
  created_date: 2023-10-05
  dependencies:
  - RefinedWeb
  description: Nucleus is a 22B parameters causal decoder-only model built by Nucleus.AI
    and trained on 500B tokens of RefinedWeb along with curated corpora.
  feedback: https://huggingface.co/NucleusAI/nucleus-22B-token-500B/discussions
  intended_uses: Research on large language models; as a foundation for further specialization
    and finetuning for specific usecases (e.g., summarization, text generation, chatbot,
    etc.)
  license: MIT
  modality: text; text
  model_card: https://huggingface.co/NucleusAI/nucleus-22B-token-500B
  monitoring: unknown
  name: Nucleus
  nationality: USA
  organization: Nucleus.AI
  prohibited_uses: Production use without adequate assessment of risks and mitigation;
    any use cases which may be considered irresponsible or harmful.
  quality_control: ''
  size: 22B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: 2 weeks
  type: model
  url: https://www.withnucleus.ai/


================================================
FILE: assets/nvidia.yaml
================================================
- access:
    explanation: 'Neither the 8.3B parameter model trained to convergence nor the
      1 trillion paramter model is available for download

      '
    value: closed
  analysis: ''
  created_date:
    explanation: The date the paper for the 1 trillion parameter model was published
    value: 2021-04-09
  dependencies: []
  description: Megatron-LM is an autoregressive language model
  feedback: none
  intended_uses: none
  license: unknown
  modality: text; text
  model_card: none
  monitoring: none
  name: Megatron-LM
  nationality: USA
  organization: NVIDIA
  prohibited_uses: none
  quality_control: unknown
  size: 1T parameters (dense)
  training_emissions: unknown
  training_hardware: 3072 A100 GPUs
  training_time: 84 days
  type: model
  url: https://arxiv.org/abs/2104.04473
- access: open
  analysis: ''
  created_date: 2022-06-17
  datasheet: ''
  dependencies:
  - YouTube
  - Wikipedia
  - Reddit
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: MIT
  modality: text, video
  monitoring: ''
  name: MineDojo
  nationality: USA
  organization: NVIDIA
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 730k videos, 6k Wikipedia pages, 340k reddit posts
  type: dataset
  url: https://arxiv.org/abs/2206.08853
- access: open
  analysis: ''
  created_date: 2022-10-06
  datasheet: ''
  dependencies:
  - T5
  - Mask R-CNN
  - VIMA dataset
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: MIT
  modality: image, text
  monitoring: ''
  name: VIMA dataset
  nationality: unknown
  organization: NVIDIA, Stanford
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 200M parameters (dense model)
  type: dataset
  url: https://vimalabs.github.io/
- access: open
  analysis: ''
  created_date: 2022-10-06
  dependencies: []
  description: ''
  feedback: ''
  intended_uses: ''
  license: MIT
  modality: image, text; robotics trajectories
  model_card: ''
  monitoring: ''
  name: VIMA
  nationality: unknown
  organization: NVIDIA, Stanford
  prohibited_uses: ''
  quality_control: ''
  size: 200M parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://vimalabs.github.io/
- access: open
  analysis: Evaluated on standard LLM benchmarks across a range of fields like reasoning,
    code generation, and mathematical skills.
  created_date: 2024-02-27
  dependencies: []
  description: Nemotron 4 is a 15-billion-parameter large multilingual language model
    trained on 8 trillion text tokens.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; code, text
  model_card: none
  monitoring: unknown
  name: Nemotron 4
  nationality: USA
  organization: Nvidia
  prohibited_uses: ''
  quality_control: Deduplication and quality filtering techniques are applied to the
    training dataset.
  size: 15B parameters (dense)
  training_emissions: unknown
  training_hardware: 3072 H100 80GB SXM5 GPUs across 384 DGX H100 nodes
  training_time: 13 days
  type: model
  url: https://arxiv.org/pdf/2402.16819.pdf


================================================
FILE: assets/oasst.yaml
================================================
- access: open
  analysis: none
  created_date: 2023-08-23
  dependencies:
  - LLaMA 2
  description: OpenAssistant LLaMA 2 is an Open-Assistant fine-tuning of Meta's LLaMA
    2.
  feedback: https://huggingface.co/OpenAssistant/llama2-70b-oasst-sft-v10/discussions
  intended_uses: ''
  license: LLaMA 2
  modality: text; text
  model_card: https://huggingface.co/OpenAssistant/llama2-70b-oasst-sft-v10
  monitoring: unknown
  name: OpenAssistant LLaMA 2
  nationality: International
  organization: OpenAssistant
  prohibited_uses: ''
  quality_control: ''
  size: 70B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/OpenAssistant/llama2-70b-oasst-sft-v10


================================================
FILE: assets/ollama.yaml
================================================
- access: open
  analysis: Mainly evaluated on MT-Bench and AlpacaEval, which are GPT-4-based comparisons.
  created_date: 2023-11-02
  dependencies: []
  description: Starling is a large language model trained by reinforcement learning
    from AI feedback focused on improving chatbot helpfulness.
  feedback: https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha/discussions
  intended_uses: Academic research and free commercial usage
  license: CC BY NC 4.0
  modality: text; text
  model_card: https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha
  monitoring: none
  name: Starling
  nationality: USA
  organization: Ollama
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://starling.cs.berkeley.edu/


================================================
FILE: assets/openai.yaml
================================================
- access:
    explanation: The GPT-3 dataset isn't released to the public, but it may be available
      to Microsoft through the GPT-3 licensing agreement between OpenAI and Microsoft
      [[OpenAI Blog Post]] (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).
    value: closed
  analysis: The GPT-3 paper, which also introduces the GPT-3 dataset, provides a limited
    analysis on the GPT-3 dataset, reporting the dirtiness of the dataset after the
    it was filtered for text occurring in common benchmarking tasks. The authors report
    that "as the dataset becomes more contaminated, the variance of the clean over
    all fraction increases, but there is no apparent bias towards improved or degraded
    performance" [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).
  created_date:
    explanation: The date for the public announcement of GPT-3. The GPT-3 dataset
      didn''t have a specific release date separate from the model [[Open AI Blog
      Post]](https://openai.com/blog/openai-api/).
    value: 2020-06-11
  datasheet:
    explanation: No datasheet available as of 2022-04-04.
    value: none
  dependencies:
  - WebText
  description: The GPT-3 dataset is the text corpus that was used to train the GPT-3
    model. Information on the GPT-3 dataset is limited to discussion in the paper
    introducing GPT-3 [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).
  excluded: The Common Crawl dataset was processed using a classifier that kept high
    quality documents and filtered low quality documents. WebText was used as a proxy
    for high quality documents [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).
  feedback:
    explanation: There are no known (internal or external) feedback mechanisms for
      the GPT-3 dataset as of 2022-04-04.
    value: unknown
  included: The dataset is composed of several NLP corpora including Common Crawl
    (filtered, 60%), WebText2 (22%), Books1 (8%), Books2 (8%), Wikipedia (3%) [[Section
    2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).
  intended_uses: The intended use of the GPT-3 dataset is to train language models.
  license:
    explanation: There is no known license specific to the GPT-3 dataset, however,
      the governing organization, OpenAI, licensed GPT-3 to Microsoft, which makes
      it likely that the GPT-3 dataset was also licensed [[OpenAI Blog Post]] (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).
    value: unknown
  modality: text
  monitoring:
    explanation: There are no known (internal or external) monitoring mechanisms that
      are in place for the use of the GPT-3 dataset as of 2022-04-04.
    value: unknown
  name: GPT-3 dataset
  nationality: USA
  organization: OpenAI
  prohibited_uses:
    explanation: OpenAI didn't provide a list of prohibited uses specifically for
      the GPT-3 dataset. However, public OpenAI products are governed by the OpenAI
      Terms of Use, which may also apply to the OpenAI dataset. The OpenAI Terms of
      Use prohibit the following, (i) Illegal activities, such as child pornography,
      gambling, cybercrime, piracy, violating copyright, trademark or other intellectual
      property laws; (ii) Accessing or authorizing anyone to access the APIs from
      an embargoed country, region, or territory as prohibited by the U.S. government;
      (iii) Threatening, stalking, defaming, defrauding, degrading, victimizing or
      intimidating anyone for any reason [[Open AI Terms of Use]](https://openai.com/api/policies/terms/).
    value: unknown
  quality_control: In addition to excluding low quality documents from the Common
    Crawl dataset, the authors fuzzily deduplicated documents within each dataset,
    by removing documents that have high overlap with each other. The same procedure
    was followed to fuzzily deduplicate WebText from Common Crawl [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).
    Text occurring in benchmark datasets were also partially removed [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).
  sample: []
  size: 570 GB
  type: dataset
  url: https://arxiv.org/pdf/2005.14165.pdf
- access:
    explanation: 'HumanEval dataset is publicly available and comes with a an evaluation
      framework [[HumanEval GitHub Repository]](https://www.github.com/openai/human-eval).

      '''
    value: open
  analysis: none
  created_date:
    explanation: 'The date that Codex, the model evaluated on the HumanEval dataset,
      was announced to the public [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).

      '''
    value: 2021-08-10
  datasheet:
    explanation: No datasheet available as of 2022-04-10.
    value: none
  dependencies: []
  description: 'HumanEval is a dataset of 164 programming problems hand-written to
    evaluate their Codex model.

    '
  excluded: 'Code problems easily found on the internet.

    '
  feedback: 'Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).

    '
  included: '164 hand-written questions.

    '
  intended_uses: 'Evaluating code generation capabilities of models.

    '
  license:
    explanation: https://github.com/openai/human-eval/blob/master/LICENSE
    value: MIT
  modality: code
  monitoring: none
  name: HumanEval
  nationality: USA
  organization: OpenAI
  prohibited_uses: none
  quality_control: 'The evaluation dataset was handwritten to ensure that the evaluation
    problems do not exist in the Codex dataset [[Section 2.2]](https://arxiv.org/pdf/2107.03374.pdf#subsection.2.2).

    '
  sample:
  - "\n\ndef string_sequence(n: int) -> str:\n    \"\"\" Return a string containing\
    \ space-delimited numbers starting from 0 upto n inclusive.\n    >>> string_sequence(0)\n\
    \    '0'\n    >>> string_sequence(5)\n    '0 1 2 3 4 5'\n    \"\"\"\n"
  - "\n\ndef count_distinct_characters(string: str) -> int:\n    \"\"\" Given a string,\
    \ find out how many distinct characters (regardless of case) does it consist of\n\
    \    >>> count_distinct_characters('xyzXYZ')\n    3\n    >>> count_distinct_characters('Jerry')\n\
    \    4\n    \"\"\"\n"
  - "from typing import List\n\n\ndef parse_music(music_string: str) -> List[int]:\n\
    \    \"\"\" Input to this function is a string representing musical notes in a\
    \ special ASCII format.\n    Your task is to parse this string and return list\
    \ of integers corresponding to how many beats does each\n    not last.\n\n   \
    \ Here is a legend:\n    'o' - whole note, lasts four beats\n    'o|' - half note,\
    \ lasts two beats\n    '.|' - quater note, lasts one beat\n\n    >>> parse_music('o\
    \ o| .| o| o| .| .| .| .| o o')\n    [4, 2, 1, 2, 2, 1, 1, 1, 1, 4, 4]\n    \"\
    \"\"\n"
  - "\n\ndef how_many_times(string: str, substring: str) -> int:\n    \"\"\" Find\
    \ how many times a given substring can be found in the original string. Count\
    \ overlaping cases.\n    >>> how_many_times('', 'a')\n    0\n    >>> how_many_times('aaa',\
    \ 'a')\n    3\n    >>> how_many_times('aaaa', 'aa')\n    3\n    \"\"\"\n"
  - "from typing import List\n\n\ndef sort_numbers(numbers: str) -> str:\n    \"\"\
    \" Input is a space-delimited string of numberals from 'zero' to 'nine'.\n   \
    \ Valid choices are 'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven',\
    \ 'eight' and 'nine'.\n    Return the string with numbers sorted from smallest\
    \ to largest\n    >>> sort_numbers('three one five')\n    'one three five'\n \
    \   \"\"\"\n"
  size: 214 KB
  type: dataset
  url: https://arxiv.org/pdf/2107.03374.pdf
- access:
    explanation: 'The dataset might have been made available to Microsoft as part
      of OpenAI giving Microsoft access to its Codex model [GitHub Copilot](https://copilot.github.com/).

      '''
    value: closed
  analysis:
    explanation: The paper doesn't provide an analysis on the training dataset.
    value: none
  created_date:
    explanation: 'The date that Codex, the model trained on the Codex dataset, was
      announced to the public [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).

      '''
    value: 2021-08-10
  datasheet: none
  dependencies: []
  description: 'The dataset used to train the Codex model.

    '
  excluded: 'Following were filtered from the dataset: autogenerated files; files
    with average line length > 100, maximum line length > 1000, or few alphanumeric
    characters [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).

    '
  feedback: 'Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).

    '
  included: 'The dataset includes 54 million public software repositories hosted on
    GitHub as of an unspecified date in May 2020 [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).

    '
  intended_uses: Training language models on code.
  license:
    explanation: unknown
  modality: {}
  monitoring: unknown
  name: Codex dataset
  nationality: USA
  organization: OpenAI
  prohibited_uses: unknown
  quality_control: 'Dataset was filtered using simple heuristics, as outlined in the
    excluded field.

    '
  sample: []
  size:
    explanation: As reported by the authors [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).
    value: 159 GB
  type: dataset
  url: https://arxiv.org/pdf/2107.03374.pdf
- access:
    explanation: The dataset wasn't released to the public.
    value: closed
  analysis: 'The dataset contained some overlap with the test sets of the benchmarks
    used for evaluation, but the authors determined the impact to be small: "There
    is a median overlap of 2.2% and an average overlap of 3.2%. Due to this small
    amount of overlap, overall accuracy is rarely shifted by more than 0.1% with only
    7 datasets above this threshold" [[Section 5]](https://arxiv.org/pdf/2103.00020.pdf#section.5).

    '
  created_date:
    explanation: 'The date of the blog post announcing CLIP [[OpenAI Blog Post]](https://openai.com/blog/clip/).

      '''
    value: 2021-01-05
  datasheet: none
  dependencies: []
  description: 'CLIP dataset contains text-image pairs crawled from the internet.

    '
  excluded: none
  feedback:
    explanation: The feedback mechanisms in place are unknown.
    value: unknown
  included: 'Data crawled from the internet, without any filtering (including de-duplication)
    or curation.

    '
  intended_uses: Training multimodal vision models.
  license: unknown
  modality: image, text
  monitoring:
    explanation: The monitoring mechanisms in place are unknown.
    value: unknown
  name: CLIP dataset
  nationality: USA
  organization: OpenAI
  prohibited_uses:
    explanation: The prohibited uses of the dataset are unknown.
    value: unknown
  quality_control: 'The data was "only crawled websites that had policies against
    excessively violent and adult images and allowed us to filter out such content"
    [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).

    '
  sample: []
  size: 400M (image, text) pairs
  type: dataset
  url: https://arxiv.org/pdf/2103.00020.pdf
- access:
    explanation: The dataset wasn't released to the public.
    value: closed
  analysis: "The authors found that the dataset contained 21% of the images in the\
    \ MS-COCO validation set, but observed no significant changes in the performance\
    \ of the accompanying DALL\xB7E when tested on MS-COCO evaluation set with and\
    \ without the said images [[Section 3.1]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.1)."
  created_date:
    explanation: "The date of the blog post announcing DALL\xB7E [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n"
    value: 2021-01-05
  datasheet: none
  dependencies: []
  description: "DALL\xB7E dataset is the training set consisting of image and text\
    \ pairs collected to train the DALL\xB7E model.\n"
  excluded: 'MS-COCO was excluded from the dataset, but because MS-COCO was created
    from YFCC100M, some of the test images (not the captions) were included.

    '
  feedback:
    explanation: The feedback mechanisms in place are unknown.
    value: unknown
  included: 'Data from the internet, including Conceptual Captions and a filtered
    subset of YFCC100M.

    '
  intended_uses: Training multimodal vision models.
  license: unknown
  modality: image, text
  monitoring:
    explanation: The monitoring mechanisms in place are unknown.
    value: unknown
  name: "DALL\xB7E dataset"
  nationality: USA
  organization: OpenAI
  prohibited_uses:
    explanation: The prohibited uses of the dataset are unknown.
    value: unknown
  quality_control: "The data was de-duplicated [[Section 3.2]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.2).\
    \ The data collected from the internet was filtered using image, text and joint\
    \ image and text filters, which included: \"discarding instances whose captions\
    \ are too short, are classified as non-English by the Python package cld3, or\
    \ that consist primarily of boilerplate phrases such as \u201Cphotographed on\
    \ <date>\u201D, where <date> matches various formats for dates that we found in\
    \ the data\". The authors also discard \"instances whose images have aspect ratios\
    \ not in [1/2, 2]\" [[Appendix C]](https://arxiv.org/pdf/2102.12092.pdf#appendix.C).\n"
  sample: []
  size: '250M (image, text) pairs

    '
  type: dataset
  url: https://arxiv.org/abs/2102.12092
- access:
    explanation: 'The dataset isn''t released to the public.

      '''
    value: closed
  analysis: 'The Whisper paper provides limited details on preprocessing.

    '
  created_date:
    explanation: 'The date for the public announcement of Whisper. The dataset didn''''t
      have a specific release date separate from the model [[Open AI Blog Post]](https://openai.com/blog/whisper/).

      '''
    value: 2022-09-21
  datasheet:
    explanation: No datasheet available as of 2022-12-07.
    value: none
  dependencies: []
  description: 'The Whisper dataset is the speech corpus that was used to train the
    Whisper model. Information on the dataset is limited to discussion in the paper
    introducing Whisper. [[Section 2.1]](https://cdn.openai.com/papers/whisper.pdf).

    '
  excluded: 'Automated filtering was conducted.

    '
  feedback:
    explanation: 'There are no known (internal or external) feedback mechanisms for
      the dataset as of 2022-12-07.

      '''
    value: unknown
  included: 'The dataset is composed three major sources: multilingual speech recognition
    (17%), translation (18%), and English speech recognition (65%). [[Figure 11]](https://cdn.openai.com/papers/whisper.pdf).

    '
  intended_uses: The intended use is to train speech models.
  license: unknown
  modality: audio
  monitoring:
    explanation: 'There are no known (internal or external) monitoring mechanisms
      that are in place for the use of the dataset as of 2022-12-07.

      '''
    value: none
  name: Whisper dataset
  nationality: USA
  organization: OpenAI
  prohibited_uses:
    explanation: 'No uses are explicitly prohibited.

      '''
    value: none
  quality_control: 'In addition to filtering, basic text standardization was done.

    '
  sample: []
  size: 680k hours
  type: dataset
  url: https://cdn.openai.com/papers/whisper.pdf
- access: closed
  analysis: ''
  created_date:
    explanation: Due to the lack of information about the exact date, it is assumed
      to be the 1st of the known month of creation.
    value: 2019-11-01
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: WebText
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 40 GB
  type: dataset
  url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf
- access: open
  analysis: ''
  created_date:
    explanation: Due to the lack of information about the exact date, it is assumed
      to be the 1st of the known month of creation.
    value: 2019-11-01
  dependencies:
  - WebText
  description: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'GPT-2 uses a [[modified MIT License]](https://github.com/openai/gpt-2/blob/master/LICENSE).

      '''
    value: Modified MIT License
  modality: text; text
  model_card: https://github.com/openai/gpt-2/blob/master/model_card.md
  monitoring: ''
  name: GPT-2
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  size: 1.5B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf
- access:
    explanation: 'The GPT-3 model isn''t fully released to the public, but it was
      made available to Microsoft through the licencing agreement between OpenAI and
      Microsoft [[OpenAI Blog Post]] (https://openai.com/blog/openai-licenses-gpt-3-technology-to-microsoft/).
      The public can access the model through the Open AI API, which is available
      in supported countries [[Supported Countries]](https://beta.openai.com/docs/supported-countries)
      [[OpenAI API]](https://openai.com/api/).

      '''
    value: limited
  analysis: 'The GPT-3 model was evaluated on language modeling, closed-book question
    answering, translation, Winograd-style tasks, commonsense reasoning, reading comprehension,
    SuperGLUE, NLI, synthetic tasks, and generation [[Section 4]](https://arxiv.org/pdf/2005.14165.pdf#section.4);
    as well as on fairness and biases [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).

    '
  created_date:
    explanation: 'The date that GPT-3 was announced to the public [[OpenAI Blog Post]](https://openai.com/blog/openai-api/).

      '''
    value: 2020-06-11
  dependencies:
  - GPT-3 dataset
  description: 'GPT-3 is an autoregressive language model.

    '
  feedback: 'Feedback for GPT-3 can be provided on the feedback form linked in the
    model card [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).
    The form is especially meant to collect feedback on concerns about misuse, synthetic
    text detection, bias, and risk of generative language models.

    '
  intended_uses: 'GPT-3 was intended to be use through the OpenAI API by developers
    for language applications. Other intended use of GPT-3 include researchers accessing
    the model through the API to study its paradigms [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).

    '
  license: unknown
  modality: text; text
  model_card: https://github.com/openai/gpt-3/blob/master/model-card.md
  monitoring: 'OpenAI reviews all use cases of the model [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).

    '
  name: GPT-3
  nationality: USA
  organization: OpenAI
  prohibited_uses: 'Access to GPT-3 is governed by Open AI API Usage Guidelines and
    API Terms of Use, prohibiting the use of the API in a way that causes societal
    harm. [[Usage Guidelines]] (https://beta.openai.com/docs/usage-guidelines/content-policy)
    [[Terms of Use]](https://openai.com/api/policies/terms/). The list of disallowed
    applications can be found in the usage guidelines [[Disallowed Applications]]
    (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).

    '
  quality_control: 'One quality control method OpenAI employed was releasing GPT-3
    only through the OpenAI API. OpenAI states that it is easier to respond to misuse
    when the access to the model is gated through the API. It also hints that it plans
    to broaden the API access over time based on the amount of misuse [[OpenAI API
    Blog Post]](https://openai.com/blog/openai-api/). The authors identify potential
    misuses of GPT-3 in the paper and analyze it for fairness, bias and representation
    issues, but do not identify mitigation strategies [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).

    '
  size:
    explanation: 'GPT-3 comes in several sizes. Here we report the size of the Davinci
      model, the largest GPT-3 model served through the OpenAI API. Sizes of the other
      models can be found in the paper [[Table 2.1]](https://arxiv.org/pdf/2005.14165.pdf#table.caption.7).

      '
    value: 175B parameters (dense)
  training_emissions:
    explanation: 'Estimate of the CO2(e) emissions for GPT-3 were not provided by
      OpenAI, but they were provided by a follow up work investigating the CO2 equivalent
      emissions (CO2e) of GPT-3 [[Patterson et al.]] (https://arxiv.org/ftp/arxiv/papers/2104/2104.10350.pdf).

      '''
    value: 552.1 tCO2e
  training_hardware:
    explanation: 'The original paper doesn''''t specify the training hardware for
      GPT-3, but a follow up blog post indicates that it was trained on a cluster
      on Azure cluster, using 10000 GPUs with 400 Gbps [[Microsoft Blog Post]] (https://blogs.microsoft.com/ai/openai-azure-supercomputer/).

      '''
    value: Azure
  training_time:
    explanation: 'The time required to train different sized GPT-3 models are listed
      in [[Table D.1]](https://arxiv.org/pdf/2005.14165.pdf#table.caption.50). The
      time required to train the GPT-3 model with 175B parameters is reported as 3.64E+03
      petaflop/s-days.

      '''
    value: 3640 petaflop/s-days
  type: model
  url: https://arxiv.org/pdf/2005.14165.pdf
- access:
    explanation: 'The model is made available via the OpenAI API [[OpenAI API]](https://openai.com/api/)
      as code-cushman-001 according to the [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers).

      '''
    value: limited
  analysis: 'The model was evaluated using the HumanEval dataset with pass@k metric
    and BLEU scores [[Section 2]](https://arxiv.org/pdf/2107.03374.pdf#section.2).

    '
  created_date:
    explanation: 'The date that Codex was announced to the public [[OpenAI Blog Post]](https://openai.com/blog/openai-codex/).

      '''
    value: 2021-08-10
  dependencies:
  - GPT-3
  - Codex dataset
  - HumanEval
  description: 'Codex is a GPT language model fine-tuned on publicly available code
    from GitHub.

    '
  feedback: 'Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).

    '
  intended_uses: 'Codex is intended to be used for coding related language modelling
    tasks.

    '
  license: unknown
  modality: text; code, text
  model_card: none
  monitoring:
    explanation: 'There isn''t any known monitoring in place for the model, but there
      may be internal mechanisms.

      '''
    value: unknown
  name: Codex
  nationality: USA
  organization: OpenAI
  prohibited_uses:
    explanation: The prohibited uses of the model aren't specified.
    value: unknown
  quality_control: 'The model wasn''t fully released to the public as a quality control
    measure. The authors identify potential risks of Codex in their paper due to the
    following: over-reliance, misalignment, bias and representation, economic and
    labor market impacts, security implications, environmental impact and legal implications.
    They also make suggestions for some of these, but do not implement them in Codex
    [[Section 7]](https://arxiv.org/pdf/2107.03374.pdf#section.7).

    '
  size: 12B parameters (dense)
  training_emissions:
    explanation: Authors do not report the training emissions.
    value: unknown
  training_hardware:
    explanation: 'The paper specifies that Azure was used, but the underlying architecture
      wasn''''t specified.

      '''
    value: Azure
  training_time:
    explanation: 'Authors estimate hundreds of petaflop/s-days of compute [[Section
      7.6]](https://arxiv.org/pdf/2107.03374.pdf#subsection.7.6), but don''''t provide
      an exact number.

      '''
    value: 100-1000 petaflop/s-days
  type: model
  url: https://arxiv.org/pdf/2107.03374.pdf
- access:
    explanation: 'The model is made available via the OpenAI API but the specific
      endpoint (davinci-instruct-beta) is currently unavailable. [[Model Index]](https://platform.openai.com/docs/model-index-for-researchers).

      '''
    value: closed
  analysis: The model was evaluated on human ratings to the InstructGPT answers to
    the prompts submitted to the OpenAI API as well as on public NLP datasets spanning
    truthfulness, toxicity, and bias, question answering, reading comprehension, and
    summarization tasks.
  created_date:
    explanation: 'Date of the public announcement introducing InstructGPT [[OpenAI
      Blog Post]] (https://openai.com/blog/instruction-following/).

      '''
    value: 2022-01-27
  dependencies:
  - GPT-3
  - OpenAI API
  description: 'InstructGPT is a family of GPT-3 based models fine-tuned on human
    feedback, which allows for better instruction following capabilities than GPT-3.

    '
  feedback: 'Email the authors [[InstructGPT Paper]](https://arxiv.org/pdf/2203.02155.pdf).

    '
  intended_uses: 'As stated in the model card: "The intended direct users of InstructGPT
    are developers who access its capabilities via the OpenAI API. Through the OpenAI
    API, the model can be used by those who may not have AI development experience,
    to build and explore language modeling systems across a wide range of functions.
    We also anticipate that the model will continue to be used by researchers to better
    understand the behaviors, capabilities, biases, and constraints of large-scale
    language models" [[Model Card]](https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md).

    '
  license: unknown
  modality: text; code, text
  model_card: https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md
  monitoring:
    explanation: 'There isn''t any known monitoring in place for the model, but there
      may be internal mechanisms.

      '''
    value: unknown
  name: InstructGPT
  nationality: USA
  organization: OpenAI
  prohibited_uses: 'Access to InstructGPT is governed by Open AI API Usage Guidelines
    and API Terms of Use, prohibiting the use of the API in a way that causes societal
    harm. [[Usage Guidelines]] (https://beta.openai.com/docs/usage-guidelines/content-policy)
    [[Terms of Use]](https://openai.com/api/policies/terms/). The list of disallowed
    applications can be found in the usage guidelines [[Disallowed Applications]]
    (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).

    '
  quality_control: 'The model wasn''t fully released to the public as a quality control
    measure.

    '
  size: 175B parameters (dense)
  training_emissions:
    explanation: The authors do not estimate the emissions of the model.
    value: unknown
  training_hardware:
    explanation: The authors do not disclose the training hardware used.
    value: unknown
  training_time:
    explanation: '175B SFT model required 4.9 petaflops/s-days; 175B PPO-ptx model
      required 60 petaflops/s-days [[Section 5]](https://arxiv.org/pdf/2203.02155.pdf#section.5).

      '''
    value: 60 petaflops/s-days
  type: model
  url: https://arxiv.org/pdf/2203.02155.pdf
- access:
    explanation: 'Models are available at [https://github.com/openai/whisper](https://github.com/openai/whisper).

      '''
    value: open
  analysis: The model was evaluated for zero-shot English and multingual speech recognition,
    translation, language identification and robustness to noise.
  created_date:
    explanation: 'The date that Whisper was announced to the public [[OpenAI Blog
      Post]](https://openai.com/blog/whisper/).

      '''
    value: 2022-09-21
  dependencies:
  - Whisper dataset
  description: Whisper is an audio transcription software.
  feedback: 'The discussions page of the codebase is not formally cited as a place
    for feedback, but is being used in this way [[Discussions page]](https://github.com/openai/whisper/discussions)

    '
  intended_uses: 'Whisper is a general-purpose speech recognition model; it is a multi-task
    model that can perform multilingual speech recognition as well as speech translation
    and language identification.

    '
  license:
    explanation: https://github.com/openai/whisper](https://github.com/openai/whisper
    value: MIT
  modality: audio; text
  model_card: https://github.com/openai/whisper/blob/main/model-card.md
  monitoring:
    explanation: 'No monitoring is mentioned by the authors.

      '''
    value: none
  name: Whisper
  nationality: USA
  organization: OpenAI
  prohibited_uses: unknown
  quality_control: No specific quality control methods are documented.
  size: 1.5B parameters (dense)
  training_emissions: unknown
  training_hardware:
    explanation: 'The original paper doesn''''t specify the training hardware for
      Whisper, but we expect it is trained on Azure given other models from OpenAI
      are trained using Azure [[Microsoft Blog Post]] (https://blogs.microsoft.com/ai/openai-azure-supercomputer/).

      '''
    value: Azure
  training_time: unknown
  type: model
  url: https://cdn.openai.com/papers/whisper.pdf
- access:
    explanation: 'Model checkpoints and the helper code can be accessed at the official
      CLIP repository [[CLIP Repository]](https://github.com/openai/CLIP).

      '''
    value: open
  analysis: The model was evaluated on standard vision datasets (e.g. CIFAR10, ImageNet)
    and showed robust state of the art results.
  created_date:
    explanation: 'The date of the blog post announcing CLIP [[OpenAI Blog Post]](https://openai.com/blog/clip/).

      '''
    value: 2021-01-05
  dependencies:
  - CLIP dataset
  description: "\"CLIP (Contrastive Language-Image Pre-Training) is a neural network\
    \ trained on a variety of (image, text) pairs. It can be instructed in natural\
    \ language to predict the most relevant text snippet, given an image, without\
    \ directly optimizing for the task, similarly to the zero-shot capabilities of\
    \ GPT-2 and 3. We found CLIP matches the performance of the original ResNet50\
    \ on ImageNet \u201Czero-shot\u201D without using any of the original 1.28M labeled\
    \ examples, overcoming several major challenges in computer vision\" [[CLIP Repository]](https://github.com/openai/CLIP).\n"
  feedback: Questions can be shared at the feedback form linked in the CLIP model
    card [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen).
  intended_uses: 'The model is intended to be used by AI researchers to better understand
    "robustness, generalization, and other capabilities, biases, and constraints of
    computer vision models" [[CLIP Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).

    '
  license: MIT
  modality: image, text; text
  model_card: https://github.com/openai/CLIP/blob/main/model-card.md
  monitoring:
    explanation: There are no monitoring mechanisms in place for CLIP.
    value: none
  name: CLIP
  nationality: USA
  organization: OpenAI
  prohibited_uses: "\"Any deployed use case of the model - whether commercial or not\
    \ - is currently out of scope. Non-deployed use cases such as image search in\
    \ a constrained environment, are also not recommended unless there is thorough\
    \ in-domain testing of the model with a specific, fixed class taxonomy. This is\
    \ because our safety assessment demonstrated a high need for task specific testing\
    \ especially given the variability of CLIP\u2019s performance with different class\
    \ taxonomies. This makes untested and unconstrained deployment of the model in\
    \ any use case currently potentially harmful.\nCertain use cases which would fall\
    \ under the domain of surveillance and facial recognition are always out-of-scope\
    \ regardless of performance of the model. This is because the use of artificial\
    \ intelligence for tasks such as these can be premature currently given the lack\
    \ of testing norms and checks to ensure its fair use.\nSince the model has not\
    \ been purposefully trained in or evaluated on any languages other than English,\
    \ its use should be limited to English language use cases\" [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen).\n"
  quality_control: 'The authors found that the performance of the model depended heavily
    on which classes are included (and excluded) for a given task. They reported significant
    race and gender based disparities on the Fairface dataset, depending on how the
    classes were constructed. The authors also demonstrated that the model was capable
    of racial profiling with high accuracy [[Section 7]](https://arxiv.org/pdf/2103.00020.pdf#section.7).

    '
  size:
    explanation: 'The total size is unknown, but the largest CLIP model is a a combination
      of 63M-parameter (dense) text encoder and a 307M-parameter vision encoder.

      '
    value: unknown
  training_emissions: unknown
  training_hardware: NVIDIA V100 GPUs
  training_time:
    explanation: "The exact training time of CLIP depends on the vision and language\
      \ encoders used: \"The largest ResNet model, RN50x64, took 18 days to train\
      \ on 592 V100 GPUs while the largest Vision Transformer took 12 days on 256\
      \ V100 GPUs. For the ViT-L/14 we also pre-train at a higher 336 pixel resolution\
      \ for one additional epoch to boost performance ... Unless otherwise specified,\
      \ all results reported in this paper as \u201CCLIP\u201D use this model which\
      \ we found to perform best\" [[CLIP paper]](https://arxiv.org/pdf/2103.00020.pdf).\n\
      Using the GPU Time method outlined in the [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum),\
      \ we can compute the training time in petaflop/s-day for RN50x64 and ViT-L/14\
      \ with the following equation: Number of GPUs * (peta-flops/GPU) * days trained\
      \ * estimated utilization. We will assume that estimated utilization is 33%,\
      \ following [[OpenAI AI and Computer Blog]](https://openai.com/blog/ai-and-compute/#addendum).\
      \ The specific V100 GPU used isn't cleared from the paper, so we will assume\
      \ that V100 PCle was used. The V100 PCle GPUs have a single precision performance\
      \ of 15.7 teraflops [[V100 Datasheet]](https://images.nvidia.com/content/technologies/volta/pdf/volta-v100-datasheet-update-us-1165301-r5.pdf),\
      \ which is equal to 15.7 / 1000 = 0.0157 petaflops.\nFollowing the formula \"\
      Number of GPUs * (peta-flops/GPU) * days trained * estimated utilization\",\
      \ we can compute the petaflop/s-day for RN50x64 as follows: 592 * 0.0157 * 18\
      \ * 0.33. Similarly, for ViT-L/14, we can compute the same as: 256 * 0.0157\
      \ * 12 * 0.33. Adding the two, we estimate the total training time as 71.12\
      \ petaflop/s-day.\n"
    value: 71.12 petaflop/s-day
  type: model
  url: https://arxiv.org/pdf/2103.00020.pdf
- access:
    explanation: Available via the OpenAI API as of Q1 2023.
    value: limited
  analysis: "The model was evaluated against three prior approaches, AttnGAN, DM-GAN,\
    \ and DF-GAN using Inception Score and Fr\xE9chet Inception Distance on MS-COCO\
    \ as metrics. The model was also evaluated by humans and received the majority\
    \ of the votes in generating images that look realistic and better match the caption\
    \ when compared to the images generated by DF-GAN [[Section]](https://arxiv.org/pdf/2102.12092.pdf#section.3).\n"
  created_date:
    explanation: "The date of the blog post announcing DALL\xB7E [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n"
    value: 2021-01-05
  dependencies:
  - "DALL\xB7E dataset"
  description: "DALL\xB7E is a GPT-3 based model trained to generate images from text\
    \ descriptions. The authors found that it had \"a diverse set of capabilities,\
    \ including creating anthropomorphized versions of animals and objects, combining\
    \ unrelated concepts in plausible ways, rendering text, and applying transformations\
    \ to existing images\" [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).\n"
  feedback: 'Contact the paper author(s) specified on the paper [[Paper]](https://arxiv.org/pdf/2102.12092.pdf).

    '
  intended_uses: '"The model is intended for others to use for training their own
    generative models" [[Model Card]](https://github.com/openai/DALL-E/blob/master/model_card.md).

    '
  license: unknown
  modality: text; image
  model_card: https://github.com/openai/DALL-E/blob/master/model_card.md
  monitoring:
    explanation: "There are no monitoring mechanisms in place for DALL\xB7E."
    value: none
  name: "DALL\xB7E"
  nationality: USA
  organization: OpenAI
  prohibited_uses:
    explanation: The prohibited uses of the model are unknown.
    value: unknown
  quality_control: unknown
  size: 12B parameters (dense)
  training_emissions:
    explanation: The training emissions were not reported.
    value: unknown
  training_hardware: NVIDIA V100 GPUs
  training_time:
    explanation: The training emissions were not reported.
    value: unknown
  type: model
  url: https://arxiv.org/pdf/2102.12092.pdf
- access:
    explanation: The model can be downloaded from the [Github repository](https://github.com/openai/jukebox)
    value: open
  analysis: Evaluations in paper are primarily considering the fidelity and novelty
    of samples from Jukebox.
  created_date:
    explanation: The date the model paper was released
    value: 2020-04-30
  dependencies:
  - Jukebox Dataset
  description: Jukebox is a generative model that produces music
  feedback: none
  intended_uses: ''
  license:
    explanation: 'The license is provided in the [Github repository](https://github.com/openai/jukebox).

      '''
    value: Noncommercial Use License
  modality: text; audio
  model_card:
    explanation: No model card found as of 2023-01-09
    value: none
  monitoring: none
  name: Jukebox
  nationality: USA
  organization: OpenAI
  prohibited_uses: none
  quality_control: ''
  size: 5B parameters (dense)
  training_emissions: unknown
  training_hardware: 510 V100s
  training_time: 4 weeks
  type: model
  url: https://arxiv.org/abs/2005.00341
- access:
    explanation: "The model is available to OpenAI employees, researchers, creatives\
      \ and company friends. OpenAI opened a waitlist for DALL\xB7E 2 access. [[System\
      \ Card]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#access).\n"
    value: limited
  analysis: The model is capable of generating explicit content and the researchers
    found limited amount of spurious content generated. The researchers also found
    that visual synonyms can be used to prompt the model to surface unwanted generations
    [[Probes and Evaluations]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#probes-and-evaluations).
  created_date:
    explanation: 'OpenAI released in a blog post in April 2020 [[OpenAI Blog Post]](https://openai.com/dall-e-2/).

      '''
    value: 2022-04-13
  dependencies:
  - "DALL\xB7E dataset"
  - CLIP dataset
  description: "\"DALL\xB7E 2 is an artificial intelligence model that takes a text\
    \ prompt and/or existing image as an input and generates a new image as an output\"\
    \ [[System Card]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md).\
    \ The model wasn't fully released, but OpenAI released a version of the model\
    \ (DALL\xB7E 2 Preview) to a select group of testers.\n"
  feedback: Feedback can be provided at support at openai.com.
  intended_uses: "\"The intended use of the DALL\xB7E 2 Preview at this time is for\
    \ personal, non-commercial exploration and research purposes by people who are\
    \ interested in understanding the potential uses of these capabilities\" [[Use]]\
    \ (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#use).\n"
  license: unknown
  modality: text; image
  model_card: https://github.com/openai/dalle-2-preview/blob/main/system-card.md
  monitoring: 'Uses of the model are monitored. In the preview version, any user can
    flag content. The specific policies for monitoring are not disclosed, but possible
    measures include disabling of accounts violating the content policies [[Monitoring
    and Reporting]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#monitoring-and-reporting).

    '''
  name: "DALL\xB7E 2"
  nationality: USA
  organization: OpenAI
  prohibited_uses: Use of the model is governed by the OpenAI Content Policy, which
    prohibits posting of G rated content. Users are not allowed to utilize the model
    in commercial products in the preview version [[Content Policy]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#policies-and-enforcement).
  quality_control: The model is not fully released to the public as part of a quality
    control measure. The usage of the model by testers is monitored and user provided
    prompts are filtered [[Input filters]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#input-filters).
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/abs/2204.06125
- access:
    explanation: 'The OpenAI API is available to the public in supported countries
      [[Supported Countries]](https://beta.openai.com/docs/supported-countries) [[OpenAI
      API]](https://openai.com/api/).

      '''
    value: limited
  adaptation: The API exposes the models fairly direclty with a range of hyperparameters
    (e.g. temperature scaling).
  created_date:
    explanation: 'The date that OpenAI API was announced to the public [[Open AI Blog
      Post]](https://openai.com/blog/openai-api/).

      '''
    value: 2020-06-11
  dependencies:
  - GPT-3
  - Codex
  - code-davinci-002
  - text-davinci-002
  - text-davinci-003
  - gpt-3.5-turbo
  - Whisper
  - "DALL\xB7E"
  - GPT-4
  - GPT-4 Turbo
  description: 'OpenAI API is a general purpose "text in, text out" interface connecting
    users with a suite of language models. The API was initially released as a gateway
    to GPT-3, but it now supports access to other, more specialized OpenAI models.
    [[Open AI Blog Post]](https://openai.com/blog/openai-api/)

    '
  failures:
    explanation: 'There are no known documented failures of the OpenAI API at the
      time of writing.

      '''
    value: unknown
  feedback:
    explanation: 'There is no known specific feedback channel for the OpenAI API,
      but OpenAI support theme can be reached via email at support at openai.com.

      '''
    value: unknown
  intended_uses: 'OpenAI API was designed to be used by developers to empower applications,
    and researchers to study language models [[Section 3]](https://openai.com/api/policies/terms/).

    '
  license:
    explanation: Per the Terms of Use, a limited license is provided to the users
      during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
    value: custom
  monitoring: 'OpenAI may monitor the API use to ensure "quality and improve OpenAI
    systems, products and services; perform research; and ensure compliance" with
    the Terms of Service and all applicable laws. Users of the API will give OpenAI
    reasonable access to their application to monitor compliance with the terms listed
    in the Terms of Service [[Section 5(b)]](https://openai.com/api/policies/terms/).
    Apps using the OpenAI API should submit an application once they are deployed
    to real users. The review form takes 10 minutes to complete and over 97% of the
    applications are directly accepted or conditionally accepted. The applicants are
    notified of the decision within 2 business days [[App Review Guidelines]] (https://beta.openai.com/docs/usage-guidelines/app-review).

    '
  monthly_active_users:
    explanation: 'The number of monthly active users is not known publicly, but OpenAI
      mentioned that the API was being used by tens of thousands of developers in
      a blog post from 2021-11-18 [[OpenAI Blog Post]](https://openai.com/blog/api-no-waitlist/).

      '''
    value: unknown
  name: OpenAI API
  nationality: USA
  organization: OpenAI
  output_space: 'Given a prompting text, the OpenAI API provides access to text completions,
    and log probabilities. The support for text and code embeddings were added on
    2022-01-25 [[OpenAI Blog Post]] (https://openai.com/blog/introducing-text-and-code-embeddings/).

    '
  prohibited_uses: 'OpenAI API Terms of Use prohibits the use of the API in a way
    violating the applicable law, including: (i) "Illegal activities, such as child
    pornography, gambling, cybercrime, piracy, violating copyright, trademark or other
    intellectual property laws"; (ii) "Accessing or authorizing anyone to access the
    APIs from an embargoed country, region, or territory as prohibited by the U.S.
    government"; (iii) "Threatening, stalking, defaming, defrauding, degrading, victimizing
    or intimidating anyone for any reason". The usage requirements are detailed in
    the Terms of Use [[Section 3]](https://openai.com/api/policies/terms/).

    '
  quality_control: 'Given a prompt, OpenAI API checks whether a completion contains
    unsafe language using its filters and marks the completion accordingly if so.
    The API also provides developers with special endpoints that scope the API usage.
    OpenAI also developed user guidelines to help developers understand safety issues
    [[OpenAI API]](https://openai.com/api/).

    '
  terms_of_service: https://openai.com/api/policies/terms/
  type: application
  url: https://openai.com/api/
  user_distribution:
    explanation: 'The distribution of the users is not known, but we estimate majority
      of the users to be developers based in the United States.

      '''
    value: unknown
- access: open
  analysis: ''
  created_date: 2022-06-23
  dependencies:
  - web_clean
  description: ''
  feedback: ''
  intended_uses: ''
  license: MIT
  modality: video; video
  model_card: ''
  monitoring: ''
  name: VPT
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  size: 500M parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2206.11795
- access: closed
  analysis: ''
  created_date: 2022-06-23
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: video
  monitoring: ''
  name: web_clean
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 70k hours
  type: dataset
  url: https://arxiv.org/abs/2206.11795
- access: limited
  adaptation: ''
  created_date: 2022-11-30
  dependencies:
  - gpt-3.5-turbo
  - OpenAI toxicity classifier
  description: ChatGPT is an artificial intelligence chatbot developed by OpenAI.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: Per the Terms of Use, a limited license is provided to the users
      during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
    value: custom
  monitoring: ''
  monthly_active_users:
    explanation: https://www.reuters.com/technology/chatgpt-sets-record-fastest-growing-user-base-analyst-note-2023-02-01/
    value: 100M
  name: ChatGPT
  nationality: USA
  organization: OpenAI
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://openai.com/blog/chatgpt
  user_distribution: ''
- access: limited
  analysis: ''
  created_date: 2023-03-01
  dependencies:
  - gpt-3.5-turbo dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: Per the Terms of Use, a limited license is provided to the users
      during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
    value: custom
  modality: text; text
  model_card: ''
  monitoring: ''
  name: gpt-3.5-turbo
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://platform.openai.com/docs/models/gpt-3-5
- access: limited
  analysis: none
  created_date: 2023-11-06
  dependencies: []
  description: GPT-4 Turbo is a more capable version of GPT-4 and has knowledge of
    world events up to April 2023. It has a 128k context window so it can fit the
    equivalent of more than 300 pages of text in a single prompt.
  feedback: none
  intended_uses: ''
  license:
    explanation: Per the Terms of Use, a limited license is provided to the users
      during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
    value: custom
  modality: text; text
  model_card: none
  monitoring: unknown
  name: GPT-4 Turbo
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
- access: limited
  analysis: ''
  created_date: 2023-03-01
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: gpt-3.5-turbo dataset
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: unknown
  type: dataset
  url: https://platform.openai.com/docs/models/gpt-3-5
- access: limited
  analysis: ''
  created_date: 2022-05-01
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: code, text
  monitoring: ''
  name: code-davinci-002 dataset
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: unknown
  type: dataset
  url: https://platform.openai.com/docs/model-index-for-researchers
- access: limited
  analysis: ''
  created_date: 2022-05-01
  dependencies:
  - code-davinci-002 dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; code, text
  model_card: ''
  monitoring: ''
  name: code-davinci-002
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://platform.openai.com/docs/model-index-for-researchers
- access: limited
  analysis: ''
  created_date: 2022-05-01
  dependencies:
  - code-davinci-002
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: text-davinci-002
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://platform.openai.com/docs/model-index-for-researchers
- access: limited
  analysis: ''
  created_date: 2022-11-30
  dependencies:
  - text-davinci-002
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: text-davinci-003
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://platform.openai.com/docs/model-index-for-researchers
- access: open
  adaptation: ''
  created_date: 2023-03-01
  dependencies:
  - Whisper
  description: API to query OpenAI's Whisper model.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: Per the Terms of Use, a limited license is provided to the users
      during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Whisper API
  nationality: USA
  organization: OpenAI
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
  user_distribution: ''
- access: open
  adaptation: ''
  created_date: 2023-03-01
  dependencies:
  - ChatGPT
  description: API to query OpenAI's ChatGPT model.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: Per the Terms of Use, a limited license is provided to the users
      during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: ChatGPT API
  nationality: USA
  organization: OpenAI
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
  user_distribution: ''
- access: open
  adaptation: ''
  created_date: 2022-08-10
  dependencies:
  - OpenAI toxicity classifier
  description: "This endpoint provides OpenAI API developers with free access to GPT-based\
    \ classifiers that detect undesired content\u2014an instance of using AI systems\
    \ to assist with human supervision of these systems."
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: Per the Terms of Use, a limited license is provided to the users
      during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: OpenAI Moderation API
  nationality: USA
  organization: OpenAI
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://openai.com/blog/new-and-improved-content-moderation-tooling
  user_distribution: ''
- access: closed
  analysis: ''
  created_date: 2023-01-18
  dependencies:
  - OpenAI toxicity dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: OpenAI toxicity classifier
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT.
- access: closed
  analysis: ''
  created_date: 2023-01-18
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: OpenAI toxicity dataset
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: unknown
  type: dataset
  url: https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT.
- access: limited
  adaptation: ''
  created_date: 2023-02-03
  dependencies:
  - Sage
  description: A chatbot language model available via Quora's Poe
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: Sage API
  nationality: USA
  organization: OpenAI
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://quorablog.quora.com/Poe-1
  user_distribution: ''
- access: limited
  adaptation: ''
  created_date: 2023-02-03
  dependencies:
  - Dragonfly
  description: A chatbot language model available via Quora's Poe
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: Dragonfly API
  nationality: USA
  organization: OpenAI
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://quorablog.quora.com/Poe-1
  user_distribution: ''
- access: limited
  analysis: ''
  created_date: 2023-02-03
  dependencies: []
  description: A chatbot language model available via Quora's Poe
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Sage
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://quorablog.quora.com/Poe-1
- access: limited
  analysis: ''
  created_date: 2023-02-03
  dependencies: []
  description: A chatbot language model available via Quora's Poe
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Dragonfly
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://quorablog.quora.com/Poe-1
- access: limited
  adaptation: ''
  created_date: 2023-03-07
  dependencies:
  - ChatGPT API
  description: "The app integrates ChatGPT\u2019s powerful AI technology to deliver\
    \ instant conversation summaries, research tools, and writing assistance directly\
    \ in Slack to help millions of companies work more productively."
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: ChatGPT for Slack
  nationality: unknown
  organization: OpenAI, Salesforce
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://www.salesforce.com/news/stories/chatgpt-app-for-slack/
  user_distribution: ''
- access: limited
  analysis: ''
  created_date: 2023-03-14
  dependencies: []
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: image, text; image, text
  model_card: ''
  monitoring: ''
  name: GPT-4
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2303.08774
- access: limited
  adaptation: ''
  created_date: 2023-03-14
  dependencies:
  - GPT-4
  description: "GPT-4 is OpenAI\u2019s most advanced system, producing safer and more\
    \ useful responses"
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: Per the Terms of Use, a limited license is provided to the users
      during their use of the API [[Section 2]](https://openai.com/api/policies/terms/).
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: GPT-4 API
  nationality: USA
  organization: OpenAI
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://openai.com/product/gpt-4
  user_distribution: ''
- access: limited
  adaptation: ''
  created_date: 2023-08-28
  dependencies:
  - GPT-4
  description: ChatGPT Enterprise offers enterprise-grade security and privacy, unlimited
    higher-speed GPT-4 access, longer context windows for processing longer inputs,
    advanced data analysis capabilities, and customization options compared to OpenAI's
    previous offerings.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: Per the Terms of Use, a limited license is provided to the users
      during their use of the API [[Section 2]](https://openai.com/policies/terms-of-use).
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: ChatGPT Enterprise
  nationality: USA
  organization: OpenAI
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://openai.com/policies/terms-of-use
  type: application
  url: https://openai.com/enterprise
  user_distribution: ''
- access:
    explanation: "DALL\xB7E 3 is now in research preview, and will be available to\
      \ ChatGPT Plus and Enterprise customers in October, via the API and in Labs\
      \ later this fall."
    value: limited
  analysis: The model is capable of generating explicit content and the researchers
    found limited amount of spurious content generated.
  created_date:
    explanation: "OpenAI announced that DALL\xB7E 3 was coming soon in a tweet on\
      \ 2023-09-20. Users could begin experimenting with DALL\xB7E 3 in research preview\
      \ in early October."
    value: 2023-09-20
  dependencies:
  - "DALL\xB7E 2 dataset"
  - CLIP dataset
  - ChatGPT
  description: "DALL\xB7E 3 is an artificial intelligence model that takes a text\
    \ prompt and/or existing image as an input and generates a new image as an output\
    \ The model is now in research preview, and will be available to ChatGPT Plus\
    \ and Enterprise customers in October."
  feedback: Feedback can be provided at openai.com
  intended_uses: "The intended use of the DALL\xB7E 3 Preview at this time is for\
    \ personal, non-commercial exploration and research purposes by people who are\
    \ interested in understanding the potential uses of these capabilities"
  license:
    explanation: License information can be found at https://openai.com/policies/terms-of-use
    value: custom
  modality: text; image
  model_card: none
  monitoring: Uses of the model are monitored. In the preview version, any user can
    flag content. The specific policies for monitoring are not disclosed, but possible
    measures include disabling of accounts violating the content
  name: "DALL\xB7E 3"
  nationality: USA
  organization: OpenAI
  prohibited_uses: Use of the model is governed by the OpenAI Content Policy, which
    prohibits posting of G rated content. Users are not allowed to utilize the model
    in commercial products in the preview version.
  quality_control: "DALL\xB7E 3 has mitigations to decline requests that ask for a\
    \ public figure by name. We improved safety performance in risk areas like generation\
    \ of public figures and harmful biases related to visual over/under-representation,\
    \ in partnership with red teamers\u2014domain experts who stress-test the model\u2014\
    to help inform our risk assessment and mitigation efforts in areas like propaganda\
    \ and misinformation."
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://openai.com/dall-e-3
- access: limited
  analysis: none
  created_date: 2024-02-15
  dependencies: []
  description: Sora is an AI model that can create realistic and imaginative scenes
    from text instructions.
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; image, video
  model_card: none
  monitoring: unknown
  name: Sora
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://openai.com/sora
- access: limited
  analysis: When evaluated on standard performance benchmarks, achieves similar levels
    of performance to GPT-4 Turbo.
  created_date: 2024-05-13
  dependencies: []
  description: GPT-4o is OpenAI's new flagship model, as of release, that can reason
    across audio, vision, and text in real time.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: audio, image, text, video; audio, image, text
  model_card: none
  monitoring: Internal monitoring of risk for non-text outputs before a public release
    (currently only image, text inputs and text outputs are available).
  name: GPT-4o
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: Training data filtering and post-training refinement act as additional
    guardrails for preventing harmful outputs.
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://openai.com/index/hello-gpt-4o/


================================================
FILE: assets/openbmb.yaml
================================================
- access: open
  analysis: Evaluated on English and Chinese language benchmarks.
  created_date: 2023-05-27
  dependencies: []
  description: CPM-Bee is a fully open-source, commercially-usable Chinese-English
    bilingual base model with a capacity of ten billion parameters.
  feedback: https://huggingface.co/openbmb/cpm-bee-10b/discussions
  intended_uses: You can use the raw model for many NLP tasks like text generation
    or fine-tune it to a downstream task.
  license:
    explanation: can be found at https://github.com/OpenBMB/CPM-Bee/blob/main/README_en.md#modellicense
    value: custom
  modality: text; text
  model_card: https://huggingface.co/openbmb/cpm-bee-10b
  monitoring: unknown
  name: CPM Bee
  nationality: International
  organization: OpenBMB
  prohibited_uses: ''
  quality_control: ''
  size: 10B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/OpenBMB/CPM-Bee
- access: open
  analysis: Randomly chosen models trained on UltraFeedback evaluated across standard
    benchmarks.
  created_date: 2023-09-26
  datasheet: https://huggingface.co/datasets/openbmb/UltraFeedback
  dependencies: []
  description: UltraFeedback is a large-scale, fine-grained, diverse preference dataset,
    used for training powerful reward models and critic models.
  excluded: ''
  feedback: https://huggingface.co/datasets/openbmb/UltraFeedback/discussions
  included: ''
  intended_uses: ''
  license: MIT
  modality: text
  monitoring: unknown
  name: UltraFeedback
  nationality: International
  organization: OpenBMB
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 256k samples
  type: dataset
  url: https://github.com/OpenBMB/UltraFeedback
- access: open
  analysis: Evaluated on open-sourced general benchmarks in comparison to SotA LLMs.
  created_date: 2024-02-01
  dependencies: []
  description: MiniCPM is an End-Side LLM developed by ModelBest Inc. and TsinghuaNLP,
    with only 2.4B parameters excluding embeddings (2.7B in total).
  feedback: https://huggingface.co/openbmb/MiniCPM-V/discussions
  intended_uses: ''
  license:
    explanation: can be found at https://github.com/OpenBMB/General-Model-License/tree/main
    value: custom
  modality: text; text
  model_card: https://huggingface.co/openbmb/MiniCPM-V
  monitoring: unknown
  name: MiniCPM
  nationality: International
  organization: OpenBMB
  prohibited_uses: ''
  quality_control: ''
  size: 2.4B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/OpenBMB/MiniCPM/
- access: open
  analysis: The model was comprehensively benchmarked across 12 tests covering five
    tasks. Eurus achieved the best overall performance among open-source models of
    similar sizes and even outperformed specialized models in many cases.
  created_date: 2024-04-02
  dependencies:
  - Eurus SFT
  - UltraInteract
  - UltraFeedback
  description: Eurus is a suite of large language models (LLMs) optimized for reasoning.
  feedback: https://huggingface.co/openbmb/Eurus-70b-nca/discussions
  intended_uses: The model can be used for reasoning tasks and is especially tailored
    for coding and math following specific prompts.
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/openbmb/Eurus-70b-nca
  monitoring: unknown
  name: Eurus
  nationality: International
  organization: OpenBMB
  prohibited_uses: none
  quality_control: none
  size: 70B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/abs/2404.02078


================================================
FILE: assets/openlemur.yaml
================================================
- access: open
  analysis: Evaluated on text and code benchmarks in comparison to other models.
  created_date: 2023-10-10
  dependencies:
  - LLaMA 2
  - The Stack
  - RefinedWeb
  - RedPajama
  - Common Crawl
  - Wikipedia
  - ArXiv
  description: Lemur is an openly accessible language model optimized for both natural
    language and coding capabilities to serve as the backbone of versatile language
    agents.
  feedback: https://huggingface.co/OpenLemur/lemur-70b-v1/discussions
  intended_uses: ''
  license: LLaMA2
  modality: code, text; code, text
  model_card: https://huggingface.co/OpenLemur/lemur-70b-v1
  monitoring: ''
  name: Lemur
  nationality: USA
  organization: OpenLemur
  prohibited_uses: ''
  quality_control: ''
  size: 70B parameters (dense)
  training_emissions: unknown
  training_hardware: TPUv4-512 pod
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2310.06830.pdf
- access: open
  analysis: Evaluated on text and code benchmarks in comparison to other models.
  created_date: 2023-10-10
  dependencies:
  - Lemur
  - OpenAssistant 1
  - OpenOrca
  - ShareGPT & ChatLogs
  - Evol-CodeAlpaca data
  description: Lemur-Chat is an openly accessible language model optimized for both
    natural language and coding capabilities to serve as the backbone of versatile
    language agents.
  feedback: https://huggingface.co/OpenLemur/lemur-70b-chat-v1/discussions
  intended_uses: ''
  license: CC-BY-NC-4.0
  modality: text; text
  model_card: https://huggingface.co/OpenLemur/lemur-70b-chat-v1
  monitoring: ''
  name: Lemur-Chat
  nationality: USA
  organization: OpenLemur
  prohibited_uses: ''
  quality_control: ''
  size: 70B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2310.06830.pdf


================================================
FILE: assets/openx.yaml
================================================
- access: open
  analysis: Analyzed on breakdown of types of robot trajectory in dataset, and overall
    coverage.
  created_date: 2023-10-03
  datasheet: All data can be found at https://robotics-transformer-x.github.io/.
  dependencies:
    explanation: data compiled from unknown datasets in over 21 different institutions,
      list of institutions can be found at https://robotics-transformer-x.github.io/paper.pdf
    value: []
  description: The Open X-Embodiment dataset is a dataset of robot movements assembled
    from 22 different robots collected through a collaboration between 21 institutions,
    demonstrating 527 skills (160266 tasks)
  excluded: N/A
  feedback: none
  included: N/A
  intended_uses: Further research on X-embodiment models.
  license: Apache 2.0
  modality: robot trajectories
  monitoring: unknown
  name: Open X-Embodiment dataset
  nationality: International
  organization: Open X-Embodiment
  prohibited_uses: none
  quality_control: unknown
  sample: []
  size: 160K tasks
  type: dataset
  url: https://robotics-transformer-x.github.io/
- access: open
  analysis: Evaluated on in-distribution robotics skills, and outperforms its predecessor
    RT-1 by 50% in emergent skill evaluations.
  created_date: 2023-10-03
  dependencies:
  - Open X-Embodiment dataset
  - ImageNet EfficientNet
  - USE
  description: RT-1-X is a model trained on the Open X-Embodiment dataset that exhibits
    better generalization and new capabilities compared to its predecessor RT-1, an
    efficient Transformer-based architecture designed for robotic control.
  feedback: none
  intended_uses: Further research on X-embodiment models.
  license: Apache 2.0
  modality: images, text; robot trajectories
  model_card: none
  monitoring: unknown
  name: RT-1-X
  nationality: unknown
  organization: Open X-Embodiment, Google Deepmind
  prohibited_uses: none
  quality_control: unknown
  size: 35M parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://robotics-transformer-x.github.io/
- access: closed
  analysis: Evaluated on in-distribution robotics skills, and outperforms its predecessor
    RT-2 by 3x in emergent skill evaluations.
  created_date: 2023-10-03
  dependencies:
  - Open X-Embodiment dataset
  - ViT (unknown size)
  - UL2
  description: RT-2-X is a model trained on the Open X-Embodiment dataset that exhibits
    better generalization and new capabilities compared to its predecessor RT-2, a
    large vision-language model co-fine-tuned to output robot actions as natural language
    tokens.
  feedback: none
  intended_uses: Further research on X-embodiment models.
  license: unknown
  modality: images, text, robot trajectories; robot trajectories
  model_card: none
  monitoring: unknown
  name: RT-2-X
  nationality: unknown
  organization: Open X-Embodiment, Google Deepmind
  prohibited_uses: none
  quality_control: unknown
  size: 55B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://robotics-transformer-x.github.io/
- access:
    explanation: we are sharing the GPT-4o System Card, which includes our Preparedness
      Framework evaluations.
    value: limited
  analysis: GPT-4o underwent evaluations that included the Preparedness Framework,
    external red teaming, and third-party assessments to ensure safe and aligned deployment.
    The evaluations focused on identifying and mitigating potential risks across its
    capabilities, especially speech-to-speech functionality.
  created_date: 2024-08-08
  dependencies:
  - Shutterstock
  description: GPT-4o is an autoregressive omni model that accepts a combination of
    text, audio, image, and video as input and produces any combination of text, audio,
    and image outputs. It is trained end-to-end across text, vision, and audio, focusing
    on multimodal capabilities.
  feedback: unknown
  intended_uses: Use in multimodal applications requiring understanding and generation
    of combinations of text, audio, and image outputs, better performance on non-English
    languages, and enhanced vision and audio understanding.
  license: unknown
  modality:
    explanation: '...accepts as input any combination of text, audio, image, and video
      and generates any combination of text, audio, and image outputs.'
    value: text, audio, image, video; text, audio, image
  model_card: unknown
  monitoring: Continuous monitoring and enforcement, providing moderation tools and
    transparency reports, and gathering feedback from users.
  name: GPT-4o
  nationality: USA
  organization: OpenAI
  prohibited_uses: Uses that could involve bias, discrimination, harmful content,
    or violation of usage policies.
  quality_control: Quality and safety measures included prior risk assessments, post-training
    mitigation, moderation tools, advanced data filtering, and external red teaming
    efforts with experts to evaluate potential risks like bias, discrimination, and
    information harms.
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2410.21276
- access: limited
  analysis: Evaluated on challenging benchmark tasks in physics, chemistry, and biology.
    In a qualifying exam for the International Mathematics Olympiad (IMO), GPT-4o
    correctly solved only 13% of problems, while the reasoning model o1 scored 83%.
  created_date: 2024-09-12
  dependencies: []
  description: OpenAI o1 is a new series of AI models designed to spend more time
    thinking before they respond. They can reason through complex tasks and solve
    harder problems than previous models in science, coding, and math.
  feedback: unknown
  intended_uses: "These enhanced reasoning capabilities may be particularly useful\
    \ if you\u2019re tackling complex problems in science, coding, math, and similar\
    \ fields. For example, o1 can be used by healthcare researchers to annotate cell\
    \ sequencing data, by physicists to generate complicated mathematical formulas\
    \ needed for quantum optics, and by developers in all fields to build and execute\
    \ multi-step workflows."
  license: unknown
  modality: text; text
  model_card: unknown
  monitoring: ''
  name: o1
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: "To match the new capabilities of these models, OpenAI has bolstered\
    \ safety work, internal governance, and federal government collaboration. This\
    \ includes rigorous testing and evaluations using their Preparedness Framework\u2060\
    (opens in a new window), best-in-class red teaming, and board-level review processes,\
    \ including by OpenAI's Safety & Security Committee."
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://openai.com/o1/
- access: limited
  analysis: Makes significance process on the ARC-AGI evaluation framework compared
    to all existing models.
  created_date: 2024-10-20
  dependencies: []
  description: OpenAI o1 is, as of release, the latest model in OpenAI's o-model reasoning
    series.
  feedback: unknown
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: unknown
  monitoring: ''
  name: o3
  nationality: USA
  organization: OpenAI
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://x.com/OpenAI/status/1870186518230511844


================================================
FILE: assets/orion.yaml
================================================
- access: open
  analysis: Evaluated on multilingual and NLP benchmarks in comparison with SoTA models
    of comparable size.
  created_date: 2024-01-20
  dependencies: []
  description: Orion series models are open-source multilingual large language models
    trained from scratch by OrionStarAI.
  feedback: https://huggingface.co/OrionStarAI/Orion-14B-Base/discussions
  intended_uses: ''
  license:
    explanation: Model license can be found at https://github.com/OrionStarAI/Orion/blob/master/ModelsCommunityLicenseAgreement.
      Code license is under Apache 2.0
    value: custom
  modality: text; text
  model_card: https://huggingface.co/OrionStarAI/Orion-14B-Base
  monitoring: unknown
  name: Orion
  nationality: China
  organization: OrionStarAI
  prohibited_uses: ''
  quality_control: unknown
  size: 14B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/OrionStarAI/Orion


================================================
FILE: assets/osu.yaml
================================================
- access: open
  analysis: Evaluated on MATH, a competition-level dataset, and achieves a 46% accuracy,
    higher than accuracy produced by GPT-4's chain of thought.
  created_date: 2023-09-11
  dependencies:
  - MathInstruct
  - LLaMA
  - Code LLaMA
  description: MAmmoTH is a series of open-source large language models (LLMs) specifically
    tailored for general math problem-solving.
  feedback: ''
  intended_uses: ''
  license: MIT
  modality: text; text
  model_card: ''
  monitoring: ''
  name: MAmmoTH
  nationality: USA
  organization: Ohio State University
  prohibited_uses: ''
  quality_control: ''
  size: 34B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2309.05653.pdf


================================================
FILE: assets/othersideai.yaml
================================================
- access: limited
  adaptation: unknown
  created_date: unknown
  dependencies:
  - OpenAI API
  description: 'HyperWrite is a writing assistant that generates text based on a user''s
    request, as well as style and tone choices.

    '
  failures: unknown
  feedback: unknown
  intended_uses: 'HyperWrite is intended to be used as a writing assistant.

    '
  license:
    explanation: '"The Software, including software embedded in the Software, is licensed,
      not sold, to you by Otherside only under the terms of this Agreement; and Otherside
      reserves all rights not expressly granted under this Agreemen" - excerpt from
      the Terms of Service.

      '
    value: custom
  monitoring: unknown
  monthly_active_users: unknown
  name: HyperWrite
  nationality: USA
  organization: OthersideAI
  output_space: Generation
  prohibited_uses: unknown
  quality_control:
    explanation: 'There is a HyperWrite blog warning the users about the risks of
      writing with an AI assistant, but the company doesn''t list the quality control
      measures taken to prevent the listed risks, if any [[HyperWrite Blog]] (https://blog.hyperwrite.ai/what-are-the-risks-of-ai-writing/).

      '
    value: unknown
  terms_of_service: https://hyperwriteai.com/terms
  type: application
  url: https://hyperwriteai.com/
  user_distribution: unknown


================================================
FILE: assets/oxford.yaml
================================================
- access:
    explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/).
      CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F).
    value: open
  analysis: unknown
  created_date:
    explanation: 'The date the [[paper]](https://arxiv.org/abs/2104.00650) was released.

      '
    value: 2021-04-01
  datasheet: none
  dependencies: []
  description: 'WebVid-10M is a large-scale dataset of short videos with textual descriptions
    sourced from stock footage sites.

    '
  excluded: unknown
  feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk
  included:
    explanation: Authors note in the paper:"We scrape the web for a new dataset of
      videos with textual description annotations, called WebVid-2M. The data was
      scraped from the web following a similar procedure to Google Conceptual Captions"
    value: Video URLs and textual description annotations
  intended_uses:
    explanation: See [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)
    value: '"Provided you keep to these rules, the University grants you (the researcher)
      a non-exclusive and non-transferable licence to use the content free of charge
      strictly for non-commercial research (i.e., whose output artefacts are not incorporated
      in commercial products) for 12 months."

      '
  license:
    explanation: '[WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md)

      '
    value: WebVid Dataset Terms
  modality: text, video
  monitoring: ''
  name: WebVid-10M
  nationality: UK
  organization: University of Oxford
  prohibited_uses: '- Authors note the following prohibited uses: "You must not use
    the content other than for the Permitted Purpose in strict conformity with these
    terms and any other reasonable instructions of the University. You must not, except
    as may be strictly necessary for carrying out the Permitted Purpose, provide or
    otherwise make available content to any third party or allow use of it or them
    by or on behalf of any third party, in whole or in part, whether by way of sale,
    resale, loan, transfer, hire or any other form of exploitation; or attempt to
    identify any living or deceased individual from the content." [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)

    - Authors also note the following limitations of the dataset: "We note that data
    sourced from the web may be prone to biases and may contain graphic content. Please
    be careful of unintended societal, gender, racial and other biases when training
    or deploying models trained on this data." [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)

    '
  quality_control: unknown
  sample: []
  size: 10.7M video-text pairs, 52K hours video
  type: dataset
  url: https://m-bain.github.io/webvid-dataset/
- access:
    explanation: The captions and video url's can be found at the [[dataset page]](https://m-bain.github.io/webvid-dataset/).
      CLIP ViT-B/32 features of this dataset can be found at the [[WebVid github repository]](https://github.com/m-bain/webvid#download-clip-features-%EF%B8%8F).
    value: open
  analysis: unknown
  created_date:
    explanation: 'The date the [[paper]](https://arxiv.org/abs/2104.00650) was released.

      '
    value: 2021-04-01
  datasheet: none
  dependencies:
  - WebVid-10M
  description: 'WebVid-2M is a large-scale dataset of 2.5M short videos with textual
    descriptions sourced from stock footage sites. A subset of the WebVid-10M dataset.

    '
  excluded: unknown
  feedback: Feedback can be given by emailing at maxbain at robots.ox.ac.uk
  included:
    explanation: Authors note in the paper:"We scrape the web for a new dataset of
      videos with textual description annotations, called WebVid-2M. The data was
      scraped from the web following a similar procedure to Google Conceptual Captions"
    value: Video URLs and textual description annotations
  intended_uses:
    explanation: See [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)
    value: '"Provided you keep to these rules, the University grants you (the researcher)
      a non-exclusive and non-transferable licence to use the content free of charge
      strictly for non-commercial research (i.e., whose output artefacts are not incorporated
      in commercial products) for 12 months."

      '
  license:
    explanation: '[WebVid Dataset Terms](https://github.com/m-bain/webvid/blob/main/TERMS.md)

      '
    value: WebVid Dataset Terms
  modality: text, video
  monitoring: ''
  name: WebVid-2M
  nationality: UK
  organization: University of Oxford
  prohibited_uses: '- Authors note the following prohibited uses: "You must not use
    the content other than for the Permitted Purpose in strict conformity with these
    terms and any other reasonable instructions of the University. You must not, except
    as may be strictly necessary for carrying out the Permitted Purpose, provide or
    otherwise make available content to any third party or allow use of it or them
    by or on behalf of any third party, in whole or in part, whether by way of sale,
    resale, loan, transfer, hire or any other form of exploitation; or attempt to
    identify any living or deceased individual from the content." [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)

    - Authors also note the following limitations of the dataset: "We note that data
    sourced from the web may be prone to biases and may contain graphic content. Please
    be careful of unintended societal, gender, racial and other biases when training
    or deploying models trained on this data." [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)

    '
  quality_control: unknown
  sample: []
  size: 2.5M video-text pairs, 13K hours video
  type: dataset
  url: https://m-bain.github.io/webvid-dataset/


================================================
FILE: assets/paladin.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-02-01
  dependencies: []
  description: UnderwriteGPT is the world's first generative AI underwriting tool.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: ''
  monitoring: ''
  monthly_active_users: ''
  name: UnderwriteGPT
  nationality: USA
  organization: Paladin Group and Dais Technology
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://dais.com/underwritegpt/
  user_distribution: ''


================================================
FILE: assets/peking.yaml
================================================
- access: open
  analysis: Compared with other multi-task, instruction-following agents.
  created_date: 2023-11-10
  dependencies: []
  description: JARVIS-1 is an open-world agent that can perceive multimodal input
    (visual observations and human instructions), generate sophisticated plans, and
    perform embodied control, all within the popular yet challenging open-world Minecraft
    universe.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; in-game actions
  model_card: none
  monitoring: none
  name: JARVIS-1
  nationality: China
  organization: Peking University Institute for Artificial Intelligence
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2311.05997.pdf


================================================
FILE: assets/perplexity.yaml
================================================
- access: open
  adaptation: ''
  created_date:
    explanation: Perplexity Ask was announced in a [[Twitter post]](https://twitter.com/perplexity_ai/status/1600551871554338816?s=20)
      by the company.
    value: 2022-12-07
  dependencies:
  - GPT-3.5
  - Bing Search
  description: Perplexity Ask is a new search interface that uses advanced artificial
    intelligence technologies
  failures: ''
  feedback: ''
  intended_uses: ''
  license: none
  monitoring: ''
  monthly_active_users: ''
  name: Perplexity Ask
  nationality: USA
  organization: Perplexity
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://www.perplexity.ai/
  user_distribution: ''
- access: closed
  adaptation: ''
  created_date: 2022-12-15
  dependencies:
  - Perplexity Ask
  - OpenAI API
  description: Twitter search interface that is powered by Perplexity's structured
    search engine.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: none
  monitoring: ''
  monthly_active_users: ''
  name: Bird SQL
  nationality: USA
  organization: Perplexity
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://www.perplexity.ai/sql
  user_distribution: ''
- access: open
  adaptation: ''
  created_date: 2023-10-27
  dependencies: []
  description: Perplexity chat is an AI chatbot trained in-house by Perplexity.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: none
  monitoring: ''
  monthly_active_users: ''
  name: Perplexity Chat
  nationality: USA
  organization: Perplexity
  output_space: Chatbot output in response to user queries
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://blog.perplexity.ai/legal/terms-of-service
  type: application
  url: https://labs.perplexity.ai/
  user_distribution: ''


================================================
FILE: assets/pleias.yaml
================================================
- access: open
  analysis: unknown
  created_date: 2024-03-20
  datasheet: ''
  dependencies: []
  description: Common Corpus is the largest public domain dataset released for training
    Large Language Models (LLMs). This dataset includes 500 billion words from a diverse
    range of cultural heritage initiatives and is the largest corpus in English, French,
    Dutch, Spanish, German and Italian. It supports efforts to train fully open LLMs
    on sources without copyright concerns.
  excluded: The data excluded are those that have copyright issues.
  feedback: unknown
  included: The dataset includes 500 billion words from a wide diversity of cultural
    heritage initiatives. It also has the largest English-speaking dataset to date
    with 180 billion words, including a major US collection of 21 million digitized
    newspapers and large monographs datasets collected by digital historian Sebastian
    Majstorovic. It also contains a huge volume of data in French (110 billion words),
    German (30 billion words), Spanish, Dutch and Italian, as well as data in low-resource
    languages that are currently underrepresented.
  intended_uses: The dataset is intended to support open and reproducible AI research,
    enhancing accessibility, diversity, and democracy in AI by enabling everyone to
    explore large models.
  license: none
  modality: text
  monitoring: unknown
  name: Common Corpus
  nationality: USA
  organization: Pleias
  prohibited_uses: It should not be used for tasks that infringe on copyright laws.
  quality_control: All data included in the corpus are from fully open and auditable
    sources, ensuring they are copyright-free.
  sample: []
  size: 500 billion words
  type: dataset
  url: https://huggingface.co/blog/Pclanglais/common-corpus


================================================
FILE: assets/portkey.yaml
================================================
- access: open
  adaptation: ''
  created_date: 2023-05-06
  dependencies: []
  description: Portkey is a hosted middleware that allows users to create generative
    AI applications
  failures: ''
  feedback: ''
  intended_uses: ''
  license: ''
  monitoring: ''
  monthly_active_users: ''
  name: Portkey
  nationality: USA
  organization: Portkey
  output_space: generative AI apps
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://portkey.ai/terms
  type: application
  url: https://portkey.ai/
  user_distribution: ''


================================================
FILE: assets/quizlet.yaml
================================================
- access: open
  adaptation: ''
  created_date: 2023-03-01
  dependencies:
  - ChatGPT API
  description: Quizlet is introducing Q-Chat, a fully-adaptive AI tutor that engages
    students with adaptive questions based on relevant study materials delivered through
    a fun chat experience.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: none
  monitoring: ''
  monthly_active_users: ''
  name: Q-Chat
  nationality: USA
  organization: Quizlet
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://quizlet.com/tos
  type: application
  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
  user_distribution: ''


================================================
FILE: assets/quora.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-02-03
  dependencies:
  - ChatGPT API
  - GPT-4 API
  - Claude API
  - Dragonfly API
  - Sage API
  description: Poe lets people ask questions, get instant answers, and have back-and-forth
    conversations with several AI-powered bots. It is initially available on iOS,
    but we will be adding support for all major platforms in the next few months,
    along with more bots.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: none
  monitoring: ''
  monthly_active_users: ''
  name: Poe
  nationality: USA
  organization: Quora
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://poe.com/tos
  type: application
  url: https://quorablog.quora.com/Poe-1
  user_distribution: ''


================================================
FILE: assets/qwen_team.yaml
================================================
- access: open
  analysis: Models have been evaluated on a series of math benchmarks, demonstrating
    outperformance of the state-of-the-art models in both the English and Chinese
    language.
  created_date: 2024-08-08
  dependencies: []
  description: Qwen2-Math is a series of specialized math language models built upon
    the Qwen2 large language models, with a focus on enhancing the reasoning and mathematical
    capabilities. Their intended use is for solving complex mathematical problems.
    They significantly outperform both open-source and closed-source models in terms
    of mathematical capabilities.
  feedback: Problems with the model should be reported to the Qwen Team via their
    official channels.
  intended_uses: These models are intended for solving complex mathematical problems.
  license: Tongyi Qianwen
  modality: text; text
  model_card: https://huggingface.co/Qwen/Qwen2-Math-72B
  monitoring: Unknown
  name: Qwen2-Math
  nationality: China
  organization: Qwen Team
  prohibited_uses: Uses that go against the ethical usage policies of Qwen Team.
  quality_control: The models were tested with few-shot chain-of-thought prompting
    and evaluated across mathematical benchmarks in both English and Chinese.
  size: 72B parameters
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://qwenlm.github.io/blog/qwen2-math/


================================================
FILE: assets/rakuten.yaml
================================================
- access: open
  analysis: RakutenAI achieves the highest average score in both Japanese and English
    LM-Harness metrics, outperforming other similarly-sized Japanese language models.
  created_date: 2024-03-21
  dependencies:
  - Mistral
  description: RakutenAI-7B is a model developed with a focus on Japanese language
    understanding. It offers competitive performance on English tests as well.
  feedback: https://huggingface.co/Rakuten/RakutenAI-7B/discussions
  intended_uses: The model can be used for text generation tasks in both Japanese
    and English.
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/Rakuten/RakutenAI-7B
  monitoring: unknown
  name: RakutenAI
  nationality: Japan
  organization: Rakuten
  prohibited_uses: unknown
  quality_control: unknown
  size: 7B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://global.rakuten.com/corp/news/press/2024/0321_01.html


================================================
FILE: assets/reexpress.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-03-21
  dependencies: []
  description: Reexpress One offers a means of document classification, semantic search,
    and uncertainty analysis on-device.
  failures: unknown
  feedback: https://github.com/ReexpressAI/support
  intended_uses: ''
  license: unknown
  monitoring: unknown
  monthly_active_users: unknown
  name: Reexpress One
  nationality: USA
  organization: Reexpress AI
  output_space: data analyses
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: hhttps://re.express/tos.html
  type: application
  url: https://re.express/index.html
  user_distribution: unknown


================================================
FILE: assets/reka.yaml
================================================
- access: limited
  analysis: Evaluated on MMLU, GSM8K, HumanEval, and GPQA benchmarks, among others.
  created_date: 2024-02-12
  dependencies: []
  description: Reka Flash is a multimodal, multilingual, state-of-the-art 21B model
    trained entirely from scratch.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: image, text, video; text
  model_card: none
  monitoring: unknown
  name: Reka Flash
  nationality: USA
  organization: Reka
  prohibited_uses: ''
  quality_control: ''
  size: 21B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://reka.ai/reka-flash-an-efficient-and-capable-multimodal-language-model/
- access: limited
  analysis: Reka Core was evaluated against leading models such as OpenAIs GPT-4,
    Claude-3 Opus, and Gemini Ultra on a variety of tasks and metrics including multimodal
    and human evaluation conducted by a third party. It was found to be competitive
    or even surpassing these models.
  created_date: 2024-04-15
  dependencies: []
  description: Reka Core is a frontier-class multimodal language model comparable
    to industry leaders. It has powerful capabilities including multimodal understanding
    (including images, videos, and audio), superb reasoning abilities, code generation,
    and multilinguality with proficiency in 32 languages.
  feedback: unknown
  intended_uses: Reka Core can be used in e-commerce, social media, digital content
    and video games, healthcare, robotics, and other industries for tasks that require
    multimodal understanding, coding, complex reasoning, and more.
  license: unknown
  modality: audio, image, text, video; text
  model_card: none
  monitoring: unknown
  name: Reka Core
  nationality: USA
  organization: Reka
  prohibited_uses: unknown
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: thousands of GPUs
  training_time: few months
  type: model
  url: https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model


================================================
FILE: assets/robin.yaml
================================================
- access: limited
  adaptation: ''
  created_date: unknown
  dependencies:
  - Anthropic API
  description: Robin AI uses Claude and Anthropic's models to understand language
    - including in technical domains like legal language. It's also very confident
    at drafting, summarising, translations, and explaining complex concepts in simple
    terms
  failures: ''
  feedback: ''
  intended_uses: ''
  license: none
  monitoring: ''
  monthly_active_users: ''
  name: Robin AI
  nationality: USA
  organization: Robin AI
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.robinai.co.uk/terms
  type: application
  url: https://www.robinai.co.uk/
  user_distribution: ''


================================================
FILE: assets/runway_ai,_inc..yaml
================================================
- access: open
  analysis: Unknown
  created_date: 2024-06-17
  dependencies: []
  description: Gen-3 Alpha is a foundation model trained for large-scale multimodal
    tasks. It is a major improvement in fidelity, consistency, and motion over the
    previous generation, Gen-2. Gen-3 Alpha can power various tools, such as Text
    to Video, Image to Video, and Text to Image. The model excels at generating expressive
    human characters with a wide range of actions, gestures, and emotions, and is
    capable of interpreting a wide range of styles and cinematic terminology. It is
    also a step towards building General World Models. It has been designed for use
    by research scientists, engineers, and artists, and can be fine-tuned for customization
    according to specific stylistic and narrative requirements.
  feedback: Companies interested in fine-tuning and custom models can reach out to
    Runway AI, Inc. using a form on their website.
  intended_uses: Can be used to create expressive human characters, interpret a wide
    range of styles and cinematic terminology, and power tools for Text to Video,
    Image to Video, and Text to Image tasks.
  license: Terms of Use listed on Runway AI, Inc.'s website, specific license unknown
  modality: text, image, video; video
  model_card: unknown
  monitoring: The model includes a new and improved in-house visual moderation system.
  name: Gen-3 Alpha
  nationality: USA
  organization: Runway AI, Inc.
  prohibited_uses: Unknown
  quality_control: It will be released with a set of new safeguards, including an
    improved in-house visual moderation system and C2PA provenance standards.
  size: Unknown
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://runwayml.com/research/introducing-gen-3-alpha?utm_source=xinquji


================================================
FILE: assets/rwkv.yaml
================================================
- access: open
  analysis: none
  created_date: 2023-05-03
  dependencies: []
  description: RWKV World 4 is an RNN with GPT-level LLM performance, which can also
    be directly trained like a GPT transformer (parallelizable).
  feedback: https://huggingface.co/RWKV/rwkv-4-world-7b/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/RWKV/rwkv-4-world-7b
  monitoring: unknown
  name: RWKV World 4
  nationality: China
  organization: RWKV
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/RWKV/rwkv-4-world-7b
- access: open
  analysis: none
  created_date: 2023-05-15
  dependencies: []
  description: RWKV 4 Pile is an RNN with GPT-level LLM performance, which can also
    be directly trained like a GPT transformer (parallelizable).
  feedback: https://huggingface.co/RWKV/rwkv-4-14b-pile/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/RWKV/rwkv-4-14b-pile
  monitoring: unknown
  name: RWKV 4 Pile
  nationality: China
  organization: RWKV
  prohibited_uses: ''
  quality_control: ''
  size: 14B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/RWKV/rwkv-4-14b-pile
- access: open
  analysis: none
  created_date: 2023-12-16
  dependencies: []
  description: RWKV World 5 is an RNN with GPT-level LLM performance, which can also
    be directly trained like a GPT transformer (parallelizable).
  feedback: https://huggingface.co/RWKV/rwkv-5-world-3b/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/RWKV/rwkv-5-world-3b
  monitoring: unknown
  name: RWKV World 5
  nationality: China
  organization: RWKV
  prohibited_uses: ''
  quality_control: ''
  size: 3B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/RWKV/rwkv-5-world-3b


================================================
FILE: assets/salesforce.yaml
================================================
- access:
    explanation: Model weights are available for download with links in the [[GitHub
      repo]](https://github.com/salesforce/CodeGen)
    value: open
  analysis: ''
  created_date:
    explanation: Date model paper was released
    value: 2022-03-25
  dependencies: []
  description: CodeGen is a language model for code
  feedback: ''
  intended_uses: ''
  license: none (model weights), BSD-3-Clause (code)
  modality: code, text; code, text
  model_card: ''
  monitoring: ''
  name: CodeGen
  nationality: USA
  organization: Salesforce
  prohibited_uses: ''
  quality_control: ''
  size: 16B parameters (dense)
  training_emissions: ''
  training_hardware: Unspecified Salesforce Compute (TPU-V4s)
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2203.13474
- access: open
  analysis: ''
  created_date: 2022-01-28
  dependencies:
  - ViT-B
  - BERT
  - COCO
  - Visual Genome
  - Conceptual Captions
  - Conceptual 12M
  - SBU Captions
  - LAION-115M
  description: ''
  feedback: ''
  intended_uses: ''
  license: BSD-3-Clause
  modality: text; image
  model_card: ''
  monitoring: ''
  name: BLIP
  nationality: USA
  organization: Salesforce
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2201.12086
- access: open
  analysis: ''
  created_date: 2022-01-28
  datasheet: ''
  dependencies:
  - LAION-400M
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: BSD-3-Clause
  modality: image, text
  monitoring: ''
  name: LAION-115M
  nationality: USA
  organization: Salesforce
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 115M image-text pairs
  type: dataset
  url: https://arxiv.org/abs/2201.12086
- access: limited
  adaptation: ''
  created_date: 2023-03-07
  dependencies:
  - ChatGPT API
  description: EinsteinGPT is generative AI for customer relationship management (CRFM).
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: EinsteinGPT
  nationality: USA
  organization: Salesforce
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://www.salesforce.com/products/einstein/overview/?d=cta-body-promo-8
  user_distribution: ''
- access: open
  analysis: BLIP-2 achieves state-of-the-art performance on various vision-language
    tasks, despite having significantly fewer trainable parameters than existing methods
  created_date: 2023-01-30
  dependencies:
  - OPT
  description: BLIP-2 is a model that employs a generic and efficient pretraining
    strategy that bootstraps vision-language pre-training from off-the-shelf frozen
    pre-trained image encoders and frozen large language models.
  feedback: https://huggingface.co/Salesforce/blip2-opt-2.7b/discussions
  intended_uses: ''
  license: MIT
  modality: text; image
  model_card: https://huggingface.co/Salesforce/blip2-opt-2.7b
  monitoring: ''
  name: BLIP-2
  nationality: USA
  organization: Salesforce
  prohibited_uses: ''
  quality_control: ''
  size: 2.7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: less than 9 days
  type: model
  url: https://arxiv.org/pdf/2301.12597.pdf
- access: open
  analysis: Moirai has undergone a comprehensive evaluation in both in-distribution
    and out-of-distribution settings. It demonstrated its capabilities as a zero-shot
    forecaster and delivered competitive or superior performance compared to full-shot
    models.
  created_date: 2024-03-19
  dependencies:
  - LOTSA
  description: Moirai is a cutting-edge time series foundation model, offering universal
    forecasting capabilities. It stands out as a versatile time series forecasting
    model capable of addressing diverse forecasting tasks across multiple domains,
    frequencies, and variables in a zero-shot manner.
  feedback: https://huggingface.co/Salesforce/moirai-1.0-R-large/discussions
  intended_uses: Moirai can be used for time series forecasting in multiple domains.
    It offers robust zero-shot forecasting capabilities and eliminates the need for
    additional data, extensive computational resources, and expert input for achieving
    accurate forecasts.
  license: Apache 2.0
  modality: time-series; time-series
  model_card: https://huggingface.co/Salesforce/moirai-1.0-R-large
  monitoring: unknown
  name: Moirai
  nationality: USA
  organization: Salesforce
  prohibited_uses: unknown
  quality_control: The performance of Moirai was evaluated through in-distribution
    and out-of-distribution settings.
  size: 311M parameters
  training_emissions: unknown
  training_hardware: NVIDIA A100 40G GPUs
  training_time: unknown
  type: model
  url: https://blog.salesforceairesearch.com/moirai/
- access: open
  analysis: ''
  created_date: 2024-03-19
  datasheet: https://huggingface.co/datasets/Salesforce/lotsa_data
  dependencies: []
  description: LOTSA is the largest collection of open time series datasets with 27B
    observations across nine domains.
  excluded: ''
  feedback: https://huggingface.co/datasets/Salesforce/lotsa_data/discussions
  included: domain observations in energy, transport, climate, cloudops, web, sales,
    nature, econ/finance, and healthcare
  intended_uses: pre-training Large Time Series Models
  license: Apache 2.0
  modality: time-series
  monitoring: ''
  name: LOTSA
  nationality: USA
  organization: Salesforce
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 27B observations
  type: dataset
  url: https://arxiv.org/pdf/2402.02592.pdf


================================================
FILE: assets/samba.yaml
================================================
- access: open
  analysis: Evaluated on open source multilingual model benchmarks.
  created_date: 2024-02-26
  dependencies:
  - Llama 2
  description: SambaLingo is a suite of models that adapt Llama 2 to a diverse set
    of 9 languages.
  feedback:
    explanation: The Arabic language model feedback is given, but there exists one
      for each of the other 8 languages in the collection.
    value: https://huggingface.co/sambanovasystems/SambaLingo-Arabic-Base/discussions
  intended_uses: ''
  license: LLaMA 2
  modality: text; text
  model_card:
    explanation: The Arabic language model card is given, but there exist one for
      each of the other 8 languages in the collection.
    value: https://huggingface.co/sambanovasystems/SambaLingo-Arabic-Base
  monitoring: ''
  name: SambaLingo
  nationality: USA
  organization: Samba Nova Systems
  prohibited_uses: SambaLingo should not be used for mission-critical applications,
    applications involving the safety of others, and highly critical decisions.
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://sambanova.ai/blog/sambalingo-open-source-language-experts
- access: limited
  analysis: unknown
  created_date: 2024-02-28
  dependencies:
  - Llama 2
  - Mistral
  - Falcon-180B
  - Deepseek
  - BLOOM
  - LLaVA
  - CLIP
  description: Samba 1 is a trillion parameter generative AI model using a Composition
    of Experts architecture.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: none
  monitoring: unknown
  name: Samba 1
  nationality: USA
  organization: Samba Nova Systems
  prohibited_uses: ''
  quality_control: ''
  size: 1T parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://sambanova.ai/blog/samba-1-composition-of-experts-mode


================================================
FILE: assets/sana.yaml
================================================
- access: limited
  adaptation: 'Customized GPT-3, fine-tuned on private data [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).

    '
  created_date:
    explanation: 'The company was founded in 2016 according to [[Crunchbase]](https://www.crunchbase.com/organization/sana-2).
      It is unclear when Sana adapted the OpenAI API to their products.

      '
    value: unknown
  dependencies:
  - OpenAI API
  description: '"Sana is your all-in-one, AI-assisted, online learning platform (LMS).
    Author employee training courses and measure team development with Sana''s powerful
    analytics. Sana partners with the world''s most important organizations and fastest-growing
    startups to make personalized, adaptive learning available for everyone, everywhere"
    [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).

    '
  failures: unknown
  feedback: unknown
  intended_uses: 'Sana is intended to be used by employers to provide a learning service
    for their employees.

    '
  license:
    explanation: "\"Sana Labs grants Subscriber a limited non-exclusive, non-transferable,\
      \ non-sublicensable license to use Sana Labs' web-based personalized and collaborative\
      \ learning platform (\u201CSana Platform\u201D) solely in connection with the\
      \ subscribed Services in accordance with the Agreement.\" - excerpt from the\
      \ Terms of Service.\n"
    value: custom
  monitoring: unknown
  monthly_active_users: unknown
  name: Sana
  nationality: USA
  organization: Sana
  output_space: question and answer, summarization, sentiment analysis,topic identification
  prohibited_uses:
    explanation: '"Sana provides an extensive set of legal documents [[Sana Legal]](https://www.sanalabs.com/legal/),
      but missing from the documents are prohibited uses of the Sana platform, beyond
      a mention of what is impermissible under relevant law." - exceprt from the Terms
      of Service document.

      '
    value: none
  quality_control: unknown
  terms_of_service: https://www.sanalabs.com/legal/
  type: application
  url: https://www.sanalabs.com/
  user_distribution: unknown


================================================
FILE: assets/sciphi.yaml
================================================
- access: open
  analysis: none
  created_date: 2023-11-07
  dependencies:
  - Mistral
  description: SciPhi Mistral is a Large Language Model (LLM) fine-tuned from Mistral.
  feedback: https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k/discussions
  intended_uses: ''
  license: MIT
  modality: text; text
  model_card: https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k
  monitoring: unknown
  name: SciPhi Mistral
  nationality: USA
  organization: SciPhi
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k


================================================
FILE: assets/shanghai.yaml
================================================
- access: open
  analysis: ''
  created_date: 2022-12-06
  dependencies:
  - Kinetics-400
  - WebVid-2M
  - WebVid-10M
  - HowTo100M
  - AVA
  - Something-Something-v2
  - Kinetics-710
  description: ''
  feedback: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text, video; video
  model_card: ''
  monitoring: ''
  name: InternVideo
  nationality: China
  organization: Shanghai AI Laboratory
  prohibited_uses: ''
  quality_control: ''
  size: 1.3B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2212.03191.pdf
- access: open
  analysis: Evaluated based on own constructed dataset covering 433 languages.
  created_date: 2023-05-29
  dependencies:
  - OPUS
  description: Lego-MT is a multilingual large language model which uses a more efficient
    approach of being an effective detachable model.
  feedback: ''
  intended_uses: ''
  license: ''
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Lego-MT
  nationality: China
  organization: Shanghai AI Laboratory
  prohibited_uses: ''
  quality_control: ''
  size: 1.2B parameters (dense)
  training_emissions: unknown
  training_hardware: 32 A100 GPUs
  training_time: 15 days
  type: model
  url: https://arxiv.org/pdf/2212.10551.pdf
- access: open
  analysis: Evaluated on GSM8K and the competition-level MATH dataset.
  created_date: 2023-10-05
  dependencies:
  - GPT-4
  - LLaMA 2
  description: MathCoder is a family of models capable of generating code-based solutions
    for solving challenging math problems.
  feedback: none
  intended_uses: bridging the gap between natural language understanding and computational
    problem-solving
  license: unknown
  modality: text; text
  model_card: none
  monitoring: none
  name: MathCoder
  nationality: China
  organization: Shanghai AI Laboratory
  prohibited_uses: none
  quality_control: none
  size: 70B parameters (dense)
  training_emissions: unknown
  training_hardware: 32 NVIDIA A800 80GB GPUs
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2310.03731.pdf
- access: open
  analysis: Evaluated in comparison to LLaMA series models on standard benchmarks.
  created_date: 2023-09-20
  dependencies: []
  description: InternLM is a high-quality language model proficient in English, Chinese,
    and code.
  feedback: https://huggingface.co/internlm/internlm-20b/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: code, text; code, text
  model_card: https://huggingface.co/internlm/internlm-20b
  monitoring: none
  name: InternLM
  nationality: China
  organization: Shanghai AI Laboratory
  prohibited_uses: ''
  quality_control: ''
  size: 20B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/InternLM/InternLM
- access: open
  analysis: Evaluated across a range of video-related tasks and compared to other
    multimodal models like CLIP, VideoPrism, and VideoCoCa. InternVideo 2 generally
    performs among the best of such models on these benchmarks.
  created_date: 2024-03-22
  dependencies:
  - InternVL
  - VideoMAEv2
  - LAION
  - WebVid
  - InternVid
  - LLaVA
  - KMash
  description: InternVideo2 is a new video foundation model (ViFM) that achieves the
    state-of-the-art performance in action recognition, video-text tasks, and video-centric
    dialogue.
  feedback: none
  intended_uses: ''
  license: MIT
  modality: text, video; text
  model_card: none
  monitoring: unknown
  name: InternVideo2
  nationality: China
  organization: Shanghai AI Laboratory, Nanjing University, Zhejiang University
  prohibited_uses: ''
  quality_control: ''
  size: 6B parameters
  training_emissions: unknown
  training_hardware: 256 NVIDIA A100 GPUs for 32 days, and 64 GPUs for 3 days
  training_time: 35 days
  type: model
  url: https://github.com/OpenGVLab/InternVideo2
- access: open
  analysis: The model was compared with SOTAs and has shown good performance in generating
    high-quality human images.
  created_date: 2024-04-01
  dependencies:
  - CosmicMan-HQ 1.0
  description: CosmicMan is a text-to-image foundation model specialized for generating
    high-fidelity human images with meticulous appearance, reasonable structure, and
    precise text-image alignment.
  feedback: unknown
  intended_uses: The model is intended to generate high-quality, photorealistic human
    images from text descriptions. Applications include avatar generation and potentially
    virtual reality and video game character creation.
  license: unknown
  modality: text; image
  model_card: none
  monitoring: unknown
  name: CosmicMan
  nationality: China
  organization: Shanghai AI Laboratory
  prohibited_uses: unknown
  quality_control: The quality control measures taken include modeling the relationship
    between dense text descriptions and image pixels in a decomposed manner and enforcing
    attention refocusing without adding extra modules.
  size: unknown
  training_emissions: unknown
  training_hardware: 32 80G NVIDIA A100 GPUs
  training_time: 1 week
  type: model
  url: https://cosmicman-cvpr2024.github.io/
- access: open
  analysis: Compared to other human image datasets on data quantity, image quality,
    and annotations.
  created_date: 2024-04-28
  datasheet: none
  dependencies: []
  description: CosmicMan-HQ 1.0 is a large-scale dataset with 6 million high-quality,
    real-world human images.
  excluded: ''
  feedback: none
  included: ''
  intended_uses: ''
  license: unknown
  modality: image
  monitoring: unknown
  name: CosmicMan-HQ 1.0
  nationality: China
  organization: Shanghai AI Laboratory
  prohibited_uses: ''
  quality_control: unknown
  sample: []
  size: 6 million images
  type: dataset
  url: https://arxiv.org/pdf/2404.01294


================================================
FILE: assets/shop.yaml
================================================
- access: open
  adaptation: ''
  created_date: 2023-03-01
  dependencies:
  - ChatGPT API
  description: "When shoppers search for products, the shopping assistant makes personalized\
    \ recommendations based on their requests. Shop\u2019s new AI-powered shopping\
    \ assistant will streamline in-app shopping by scanning millions of products to\
    \ quickly find what buyers are looking for\u2014or help them discover something\
    \ new."
  failures: ''
  feedback: ''
  intended_uses: ''
  license: ''
  monitoring: ''
  monthly_active_users: ''
  name: Shop Assistant
  nationality: USA
  organization: Shop
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
  user_distribution: ''


================================================
FILE: assets/singapore.yaml
================================================
- access: open
  analysis: Performance assessed on BIG-bench arithmetic sub-task, and various elementary
    arithmetic tasks.
  created_date: 2023-05-23
  dependencies:
  - LLaMA
  - GOAT dataset
  description: GOAT is a fine-tuned LLaMA model which uses the tokenization of numbers
    to significantly outperform benchmark standards on a range of arithmetic tasks.
  feedback: ''
  intended_uses: Integration into other instruction-tuned LLMs to further enhance
    arithmetic reasoning abilities in solving math word problems.
  license: Apache 2.0
  modality: text; text
  model_card: none
  monitoring: ''
  name: GOAT
  nationality: Singapore
  organization: National University of Singapore
  prohibited_uses: ''
  quality_control: Number data is randomly generated from log space to reduce likelihood
    of redundancy and range of magnitudes.
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: 24 GB VRAM GPU
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2305.14201.pdf
- access: open
  analysis: Evaluated on relatively simple established benchmarks.
  created_date: 2024-01-12
  dependencies:
  - RedPajama
  - The Stack
  description: OpenMoE is a series of fully open-sourced and reproducible decoder-only
    MoE LLMs.
  feedback: https://huggingface.co/OrionZheng/openmoe-base/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/OrionZheng/openmoe-base
  monitoring: unknown
  name: OpenMoE
  nationality: unknown
  organization: National University of Singapore, University of Edinburgh, ETH Zurich
  prohibited_uses: ''
  quality_control: unknown
  size: 34B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/XueFuzhao/OpenMoE


================================================
FILE: assets/skt.yaml
================================================
- access: closed
  analysis: none
  created_date: 2023-09-26
  dependencies: []
  description: A.X is SK Telecom's proprietary LLM, which has been trained on the
    Korean language.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: none
  monitoring: ''
  name: A.X
  nationality: South Korea
  organization: SK Telecom
  prohibited_uses: ''
  quality_control: ''
  size: 39B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.sktelecom.com/en/press/press_detail.do?idx=1582


================================================
FILE: assets/snap.yaml
================================================
- access: open
  adaptation: ''
  created_date: 2023-03-01
  dependencies:
  - ChatGPT API
  description: My AI offers Snapchatters a friendly, customizable chatbot at their
    fingertips that offers recommendations, and can even write a haiku for friends
    in seconds. Snapchat, where communication and messaging is a daily behavior, has
    750 million monthly Snapchatters.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: '"Snap grants you a worldwide, royalty-free, non-assignable, non-exclusive,  revocable,
      and non-sublicensable license to use the Services." - excerpt from the Terms
      of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: My AI for Snapchat
  nationality: USA
  organization: Snap
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://snap.com/terms
  type: application
  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
  user_distribution: ''


================================================
FILE: assets/soochow.yaml
================================================
- access: open
  analysis: Evaluated across different text benchmarks in English and Chinese.
  created_date: 2023-10-01
  dependencies: []
  description: OpenBA is an open-sourced 15B bilingual (English + Chinese) asymmetric
    seq2seq model.
  feedback: https://huggingface.co/OpenBA/OpenBA-LM/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/OpenBA/OpenBA-LM
  monitoring: none
  name: OpenBA
  nationality: China
  organization: Soochow University
  prohibited_uses: ''
  quality_control: ''
  size: 15B parameters (dense)
  training_emissions: 6.5 tCO2eq
  training_hardware: 8 NVIDIA A100-80GB GPUs
  training_time: 38k GPU hours
  type: model
  url: https://arxiv.org/pdf/2309.10706.pdf


================================================
FILE: assets/speak.yaml
================================================
- access: open
  adaptation: ''
  created_date: 2023-03-01
  dependencies:
  - Whisper API
  description: Speak is an AI-powered language learning app focused on building the
    best path to spoken fluency and is the the fastest-growing English app in South
    Korea.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: ''
  monitoring: ''
  monthly_active_users: ''
  name: Speak
  nationality: USA
  organization: Speak
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://openai.com/blog/introducing-chatgpt-and-whisper-apis
  user_distribution: ''


================================================
FILE: assets/spotify.yaml
================================================
- access: limited
  adaptation: ''
  created_date: 2023-02-23
  dependencies:
  - ChatGPT API
  - Sonantic AI
  description: "The DJ is a personalized AI guide that knows you and your music taste\
    \ so well that it can choose what to play for you. This feature, first rolling\
    \ out in beta, will deliver a curated lineup of music alongside commentary around\
    \ the tracks and artists we think you\u2019ll like in a stunningly realistic voice."
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: '"The Spotify software applications and the Content are licensed,
      not sold or transferred to you, and Spotify and its licensors retain ownership
      of all copies of the Spotify software applications and Content even after installation
      on your Devices." - excerpt from the Terms of Service document.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: AI DJ
  nationality: Sweden
  organization: Spotify
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.spotify.com/us/legal/end-user-agreement/
  type: application
  url: https://newsroom.spotify.com/2023-02-22/spotify-debuts-a-new-ai-dj-right-in-your-pocket/
  user_distribution: ''


================================================
FILE: assets/stability.yaml
================================================
- access: open
  analysis: Evaluated on the COCO dataset.
  created_date: 2023-04-28
  dependencies:
  - LAION-5B
  description: A text-to-image cascaded pixel diffusion model released in conjunction
    with AI research lab DeepFloyd.
  feedback: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions
  intended_uses: ''
  license:
    explanation: License agreement can be found on model card https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
    value: custom
  modality: text; image
  model_card: https://huggingface.co/DeepFloyd/IF-I-XL-v1.0
  monitoring: ''
  name: DeepFloyd IF
  nationality: UK
  organization: Stability AI
  prohibited_uses: ''
  quality_control: ''
  size: 4.3B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://stability.ai/blog/deepfloyd-if-text-to-image-model
- access: open
  analysis: ''
  created_date: 2023-04-20
  dependencies:
  - StableLM-Alpha dataset
  - Alpaca dataset
  - gpt4all dataset
  - ShareGPT52K dataset
  - Dolly dataset
  - HH dataset
  description: Large language models trained on up to 1.5 trillion tokens.
  feedback: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: ''
  monitoring: ''
  name: StableLM
  nationality: UK
  organization: Stability AI
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://github.com/Stability-AI/StableLM
- access: open
  adaptation: ''
  created_date: 2022-08-22
  dependencies: []
  description: Stable Diffusion is a generative software that creates images from
    text prompts.
  failures: ''
  feedback: https://huggingface.co/CompVis/stable-diffusion/discussions
  intended_uses: ''
  license:
    explanation: License agreement can be found on https://github.com/CompVis/stable-diffusion/blob/main/LICENSE
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Stable Diffusion
  nationality: UK
  organization: Stability AI
  output_space: image
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://stability.ai/blog/stable-diffusion-public-release
  user_distribution: ''
- access: open
  adaptation: ''
  created_date: 2023-07-26
  dependencies: []
  description: Stable Diffusion XL is an updated version of Stable Diffusion, and
    creates descriptive images with shorter prompts and generate words within images.
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: License can be found at https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/LICENSE.md
    value: Open Rail++
  monitoring: ''
  monthly_active_users: ''
  name: Stable Diffusion XL
  nationality: UK
  organization: Stability AI
  output_space: image
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://stability.ai/stablediffusion
  user_distribution: ''
- access: limited
  analysis: Evaluated via a user study comparing preferences between Stable Video
    Diffusion and competing text-to-video models.
  created_date: 2023-11-21
  dependencies:
  - Large Video Dataset
  description: Stable Video Diffusion is a latent diffusion model trained to generate
    short video clips from an image conditioning.
  feedback: https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/discussions
  intended_uses: Intended for research purposes only.
  license:
    explanation: License can be found at https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/blob/main/LICENSE
    value: custom
  modality: text; video
  model_card: https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt
  monitoring: ''
  name: Stable Video Diffusion
  nationality: UK
  organization: Stability AI
  prohibited_uses: Using the model to generate representations of real-world people
    or events.
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://static1.squarespace.com/static/6213c340453c3f502425776e/t/655ce779b9d47d342a93c890/1700587395994/stable_video_diffusion.pdf
- access: closed
  analysis: Large Video Dataset compared to publicly available research datasets on
    general statistics before and after filtering.
  created_date: 2023-11-21
  datasheet: ''
  dependencies:
  - WebVid-10M
  - CoCa
  - V-BLIP
  description: Large Video Dataset is the dataset that trained Stable Video Diffusion,
    consisting of over 212 years of content.
  excluded: ''
  feedback: none
  included: ''
  intended_uses: ''
  license: unknown
  modality: video with caption
  monitoring: ''
  name: Large Video Dataset
  nationality: UK
  organization: Stability AI
  prohibited_uses: ''
  quality_control: Dataset annotated with dense optical flow, and low optical flow
    videos are removed.
  sample: []
  size: 580M annotated video clip pairs
  type: dataset
  url: https://static1.squarespace.com/static/6213c340453c3f502425776e/t/655ce779b9d47d342a93c890/1700587395994/stable_video_diffusion.pdf
- access: open
  adaptation: ''
  created_date: 2023-11-01
  dependencies: []
  description: Sky Replacer is an exciting new tool that allows users to replace the
    color and aesthetic of the sky in their original photos with a selection of nine
    alternatives to improve the overall look and feel of the image.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: Sky Replacer
  nationality: UK
  organization: Stability AI
  output_space: image
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://stability.ai/terms-of-use
  type: application
  url: https://clipdrop.co/real-estate/sky-replacer
  user_distribution: ''
- access: open
  analysis: Evaluated on standard LLM benchmarks and in multilingual tasks compared
    to similarly sized open-source models.
  created_date: 2024-01-19
  dependencies:
  - RedPajama-Data
  - The Pile
  - RefinedWeb
  - The Stack
  - OpenWebText
  - OpenWebMath
  description: StableLM 2 is a state-of-the-art 1.6 billion parameter small language
    model trained on multilingual data in English, Spanish, German, Italian, French,
    Portuguese, and Dutch.
  feedback: https://huggingface.co/stabilityai/stablelm-2-1_6b/discussions
  intended_uses: The model is intended to be used as a foundational base model for
    application-specific fine-tuning. Developers must evaluate and fine-tune the model
    for safe performance in downstream applications.
  license:
    explanation: License agreement can be found on https://huggingface.co/stabilityai/stablelm-2-1_6b/blob/main/LICENSE
    value: custom
  modality: text; text
  model_card: https://huggingface.co/stabilityai/stablelm-2-1_6b
  monitoring: unknown
  name: StableLM 2
  nationality: UK
  organization: Stability AI
  prohibited_uses: ''
  quality_control: ''
  size: 1.6B parameters (dense)
  training_emissions: 11 tCO2eq
  training_hardware: 512 NVIDIA A100 40GB GPUs
  training_time: 92k GPU hours
  type: model
  url: https://stability.ai/news/introducing-stable-lm-2
- access: open
  analysis: Evaluated on image generation benchmarks in comparison to equal and smaller-sized
    models.
  created_date: 2024-01-16
  dependencies: []
  description: "Stable Cascade is built upon the W\xFCrstchen architecture and its\
    \ main difference to other models, like Stable Diffusion, is that it is working\
    \ at a much smaller latent space."
  feedback: https://huggingface.co/stabilityai/stable-cascade/discussions
  intended_uses: The model is intended for research purposes for now.
  license:
    explanation: License agreement can be found on https://github.com/Stability-AI/StableCascade/blob/master/WEIGHTS_LICENSE
    value: custom
  modality: text; image
  model_card: https://huggingface.co/stabilityai/stable-cascade
  monitoring: unknown
  name: Stable Cascade
  nationality: UK
  organization: Stability AI
  prohibited_uses: The model was not trained to be factual or true representations
    of people or events, and therefore using the model to generate such content is
    out-of-scope for the abilities of this model. The model should not be used in
    any way that violates Stability AI's Acceptable Use Policy.
  quality_control: ''
  size: unknown
  training_emissions: 2,276 kgCO2eq
  training_hardware: Some number of A100 GPUs
  training_time: 24,602 A100 GPU hours
  type: model
  url: https://huggingface.co/stabilityai/stable-cascade
- access: open
  analysis: unknown
  created_date: 2024-03-18
  dependencies:
  - Objaverse
  description: Stable Video 3D (SV3D) is a generative model based on Stable Video
    Diffusion that takes in a still image of an object as a conditioning frame, and
    generates an orbital video of that object.
  feedback: https://huggingface.co/stabilityai/sv3d/discussions
  intended_uses: This model is intended to be used for generating orbital videos of
    objects from still images.
  license: StabilityAI Non-Commercial Research Community License
  modality: image; video
  model_card: https://huggingface.co/stabilityai/sv3d
  monitoring: unknown
  name: Stable Video 3D
  nationality: UK
  organization: Stability AI
  prohibited_uses: The model should not be used for generating factual or true representations
    of people or events, or in any way that violates Stability AIs Acceptable Use
    Policy.
  quality_control: unknown
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://stability.ai/news/introducing-stable-video-3d
- access: open
  analysis: none
  created_date: 2024-04-03
  dependencies:
  - AudioSparx
  description: Stable Audio 2.0 sets a new standard in AI-generated audio, producing
    high-quality, full tracks with coherent musical structure up to three minutes
    in length at 44.1kHz stereo.
  feedback: none
  intended_uses: It can be used to generate melodies, backing tracks, stems, and sound
    effects.
  license: unknown
  modality: audio, text; audio
  model_card: none
  monitoring: Advanced content recognition is used to maintain compliance and prevent
    copyright infringement.
  name: Stable Audio 2.0
  nationality: UK
  organization: Stability AI
  prohibited_uses: Uploading copyrighted material for transformation.
  quality_control: To protect creator copyrights, for audio uploads, Stability AI
    partners with Audible Magic to use their content recognition (ACR) technology
    to power real-time content matching and prevent copyright infringement. Opt-out
    requests were honored during the training phase.
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://stability-ai.squarespace.com/news/stable-audio-2-0


================================================
FILE: assets/stability_ai.yaml
================================================
- access: open
  analysis: The model was tested extensively internally and externally. It has developed
    and implemented numerous safeguards to prevent harms. They have also received
    user feedback to make continuous improvements.
  created_date: 2024-06-12
  dependencies: []
  description: "Stable Diffusion 3 Medium is Stability AI\u2019s advanced text-to-image\
    \ open model. It's suitable for running on consumer PCs and laptops as well as\
    \ enterprise-tier GPUs. The model is known for its overall Quality and Photorealism,\
    \ prompt understanding, typography, being resource-efficient, and being fine-tuned.\
    \ The model in collaboration with NVIDIA and AMD has enhanced performance."
  feedback: Feedback can be given through Twitter, Instagram, LinkedIn, or Discord
    Community.
  intended_uses: The model can be used by professional artists, designers, developers,
    and AI enthusiasts for creating high-quality image outputs from text inputs.
  license: Stability Community License
  modality: text; image
  model_card: unknown
  monitoring: Continuous collaboration with researchers, experts, and the community
    to ensure that the model is being used appropriately.
  name: Stable Diffusion 3 Medium
  nationality: UK
  organization: Stability AI
  prohibited_uses: Large-scale commercial use requires contacting the organization
    for licensing details. The model should not be used for any purpose that does
    not adhere to the usage guidelines.
  quality_control: They have conducted extensive internal and external testing of
    this model and have implemented numerous safeguards to prevent harms. Safety measures
    were implemented from the start of training the model and continued throughout
    testing, evaluation, and deployment.
  size: 2B parameters
  training_emissions: Unknown
  training_hardware: unknown
  training_time: Unknown
  type: model
  url: https://stability.ai/news/stable-diffusion-3-medium
- access: open
  analysis: Consistency across the spatial and temporal axes greatly improves with
    this model. Stable Video 4D is able to generate novel view videos that are more
    detailed, faithful to the input video, and are consistent across frames and views
    compared to existing works.
  created_date: 2024-07-24
  dependencies:
  - Stable Video Diffusion Model
  description: Stable Video 4D is our latest AI model for dynamic multi-angle video
    generation. It allows users to upload a single video and receive novel-view videos
    of eight new angles/views. This advancement moves from image-based video generation
    to full 3D dynamic video synthesis. Users can specify camera angles, tailoring
    the output to meet specific creative needs. The model is currently available on
    Hugging Face and can generate 5-frame videos across the 8 views in about 40 seconds.
  feedback: Feedback and reports about the progress should be shared via their social
    channels like Twitter, Instagram, LinkedIn or their Discord Community.
  intended_uses: This model can be used for creating dynamic multi-angle videos, with
    applications in game development, video editing, and virtual reality. It allows
    professionals in these fields to visualize objects from multiple angles, enhancing
    the realism and immersion of their products.
  license: Stability Community License
  modality: video; video
  model_card: unknown
  monitoring: Continuous monitoring by the Stability AI team for improvements and
    refinements.
  name: Stable Video 4D
  nationality: UK
  organization: Stability AI
  prohibited_uses: Unknown
  quality_control: The Stability AI team is dedicated to continuous innovation and
    exploration of real-world use-cases for this model and others. They are actively
    working to refine and optimize the model beyond the current synthetic datasets
    it has been trained on.
  size: Unknown
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://stability.ai/news/stable-video-4d
- access: open
  analysis: The model was evaluated on its ability to quickly and accurately transform
    a single image into a detailed 3D asset. This evaluation highlighted the model's
    unprecedented speed and quality, marking it as a valuable tool for rapid prototyping
    in 3D work. Compared to the previous SV3D model, Stable Fast 3D offers significantly
    reduced inference times--0.5 seconds versus 10 minutes--while maintaining high-quality
    output.
  created_date: 2024-08-01
  dependencies:
  - TripoSR
  description: Stable Fast 3D is a ground-breaking model in 3D asset generation technology.
    It can transform a single input image into a highly detailed 3D asset in around
    half a second, setting new standards in terms of speed and quality in the realm
    of 3D reconstruction. Users start the process by uploading an image of an object.
    Stable Fast 3D then swiftly generates a complete 3D asset, which includes, UV
    unwrapped mesh, material parameters, albedo colors with reduced illumination bake-in,
    and optional quad or triangle remeshing. This model has various applications,
    notably for game and virtual reality developers, as well as professionals in retail,
    architecture, design, and other graphic-intensive professions.
  feedback: Information on any downstream issues with the model can be reported to
    Stability AI through their support request system.
  intended_uses: The model is intended for use in game development, virtual reality,
    retail, architecture, design and other graphically intense professions. It allows
    for rapid prototyping in 3D work, assisting both enterprises and indie developers.
    It's also used in movie production for creating static assets for games and 3D
    models for e-commerce, as well as fast model creation for AR/VR.
  license: Stability Community License
  modality: image; 3D
  model_card: https://huggingface.co/stabilityai/stable-fast-3d
  monitoring: Unknown
  name: Stable Fast 3D
  nationality: UK
  organization: Stability AI
  prohibited_uses: Use by individuals or organizations with over $1M in annual revenue
    without obtaining an Enterprise License.
  quality_control: Unknown
  size: unknown
  training_emissions: Unknown
  training_hardware: unknown
  training_time: Unknown
  type: model
  url: https://stability.ai/news/introducing-stable-fast-3d
- access:
    explanation: This open release includes multiple model variants, including Stable
      Diffusion 3.5 Large and Stable Diffusion 3.5 Large Turbo, and as of October
      29th, Stable Diffusion 3.5 Medium.
    value: open
  analysis: Our analysis shows that Stable Diffusion 3.5 Large leads the market in
    prompt adherence and rivals much larger models in image quality.
  created_date: 2023-10-29
  dependencies: []
  description: Stable Diffusion 3.5 reflects our commitment to empower builders and
    creators with tools that are widely accessible, cutting-edge, and free for most
    use cases.
  feedback: We look forward to hearing your feedback on Stable Diffusion 3.5 and seeing
    what you create with the models.
  intended_uses: This model is ideal for professional use cases at 1 megapixel resolution.
  license:
    explanation: This open release includes multiple variants that are customizable,
      run on consumer hardware, and are available for use under the permissive Stability
      AI Community License.
    value: Stability AI Community
  modality:
    explanation: Capable of generating a wide range of styles and aesthetics like
      3D, photography, painting, line art, and virtually any visual style imaginable.
    value: text; image
  model_card: unknown
  monitoring: unknown
  name: Stable Diffusion 3.5
  nationality: UK
  organization: Stability AI
  prohibited_uses: unknown
  quality_control: We believe in safe, responsible AI practices and take deliberate
    measures to ensure Integrity starts at the early stages of development.
  size:
    explanation: At 8.1 billion parameters, with superior quality and prompt adherence,
      this base model is the most powerful in the Stable Diffusion family.
    value: 8.1B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://stability.ai/news/introducing-stable-diffusion-3-5


================================================
FILE: assets/stanford.yaml
================================================
- access: open
  analysis: ''
  created_date: 2022-12-15
  dependencies:
  - The Pile
  description: ''
  feedback: ''
  intended_uses: ''
  license: bigscience-bloom-rail-1.0
  modality: text; text
  model_card: ''
  monitoring: ''
  name: BioMedLM
  nationality: USA
  organization: Stanford
  prohibited_uses: ''
  quality_control: ''
  size: 2.7B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://crfm.stanford.edu/2022/12/15/pubmedgpt.html
- access: open
  analysis: Evaluated on own framework that tests domain-specific tasks in medical
    field.
  created_date: 2022-11-23
  dependencies:
  - Stable Diffusion
  - RoentGen radiology dataset
  description: RoentGen is a generative medical imaging model that can create visually
    convincing X-ray images.
  feedback: ''
  intended_uses: ''
  license: ''
  modality: text; image
  model_card: ''
  monitoring: ''
  name: RoentGen
  nationality: USA
  organization: Stanford
  prohibited_uses: ''
  quality_control: ''
  size: 330M parameters (dense)
  training_emissions: unknown
  training_hardware: 64 A100 GPUs
  training_time: 60k training steps per day
  type: model
  url: https://arxiv.org/pdf/2211.12737.pdf
- access: open
  analysis: Evaluated on three physical control tasks, drawing, steering, and human
    body movement on various dynamics
  created_date: 2023-06-12
  dependencies:
  - GPT-2
  - BABEL
  - text-davinci-003
  description: Model trained to generate language corrections for physical control
    tasks.
  feedback: ''
  intended_uses: ''
  license: MIT
  modality: human trajectories; text
  model_card: ''
  monitoring: ''
  name: CORGI
  nationality: USA
  organization: Stanford
  prohibited_uses: ''
  quality_control: ''
  size: 124M parameters (dense)
  training_emissions: ''
  training_hardware: one NVIDIA A40 GPU
  training_time:
    explanation: The authors do not report the training time, but do report that they
      train for 200 epochs.
    value: unknown
  type: model
  url: https://arxiv.org/pdf/2306.07012.pdf
- access:
    explanation: The dataset can be downloaded from [[Hugging Face]](https://huggingface.co/datasets/tatsu-lab/alpaca).
      The code for generating data is available on the [[GitHub repository]](https://github.com/tatsu-lab/stanford_alpaca#data-generation-process).
    value: open
  analysis: ''
  created_date:
    explanation: 'The date the [[blog post]](https://crfm.stanford.edu/2023/03/13/alpaca.html)
      was released.

      '
    value: 2023-03-13
  datasheet: https://huggingface.co/datasets/tatsu-lab/alpaca
  dependencies:
  - text-davinci-003
  description: 'Alpaca dataset consistes of 52,000 instruction-following demonstrations
    generated in the style of the [Self-Instruct framework](https://github.com/yizhongw/self-instruct)
    using OpenAI''s text-davinci-003 engine. This instruction data can be used to
    conduct instruction-tuning for language models and make the language model follow
    instruction better.

    '
  excluded: ''
  feedback: Feedback can be provided on [[GitHub Issues]](https://github.com/tatsu-lab/stanford_alpaca/issues).
  included: ''
  intended_uses: Alpaca is intended and licensed for research use only.
  license: CC BY-NC 4.0
  modality: text (English)
  monitoring: ''
  name: Alpaca dataset
  nationality: USA
  organization: Stanford
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 52K instruction-following demonstrations
  type: dataset
  url: https://crfm.stanford.edu/2023/03/13/alpaca.html
- access:
    explanation: The weight diff between Alpaca-7B and LLaMA-7B is located on the
      [[Hugging Face]](https://huggingface.co/tatsu-lab/alpaca-7b-wdiff). To recover
      the original Alpaca-7B weights, follow the steps given [[here]](https://github.com/tatsu-lab
      stanford_alpaca#recovering-alpaca-weights). Training and data generation code
      can be found on the [[GitHub repository]](https://github.com/tatsu-lab/stanford_alpaca).
      An [[online demo]](https://chat.lmsys.org/?model=alpaca-13b) is also available.
    value: open
  analysis: ''
  created_date:
    explanation: 'The date the [[blog post]](https://crfm.stanford.edu/2023/03/13/alpaca.html)
      was released.

      '
    value: 2023-03-13
  dependencies:
  - LLaMa
  - Alpaca dataset
  description: 'Alpaca-7B is an instruction-following model fine-tuned from the LLaMA
    7B model on 52K instruction-following demonstrations.

    '
  feedback: Feedback can be provided on [[GitHub Issues]](https://github.com/tatsu-lab/stanford_alpaca/issues).
  intended_uses: Alpaca is intended and licensed for research use only.
  license: CC BY NC 4.0 (model weights)
  modality: text (English)
  model_card: ''
  monitoring: ''
  name: Alpaca
  nationality: USA
  organization: Stanford
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense model)
  training_emissions: unknown
  training_hardware: ''
  training_time: ''
  type: model
  url: https://crfm.stanford.edu/2023/03/13/alpaca.html
- access: open
  analysis: Merlin has been comprehensively evaluated on 6 task types and 752 individual
    tasks. The non-adapted (off-the-shelf) tasks include zero-shot findings classification,
    phenotype classification, and zero-shot cross-modal retrieval, while model adapted
    tasks include 5-year chronic disease prediction, radiology report generation,
    and 3D semantic segmentation. It has undergone internal validation on a test set
    of 5,137 CTs, and external validation on 7,000 clinical CTs and on two public
    CT datasets (VerSe, TotalSegmentator).
  created_date: 2024-09-08
  dependencies: []
  description: Merlin is a 3D Vision Language Model that's designed for interpretation
    of abdominal computed tomography (CT) scans. It uses both structured Electronic
    Health Record (EHR) and unstructured radiology reports for supervision without
    requiring additional manual annotations. The model was trained on a high-quality
    clinical dataset of paired CT scans, EHR diagnosis codes, and radiology reports
    and was evaluated on 6 task types and 752 individual tasks.
  feedback: Feedback and reports for problems with the model should likely be routed
    to Stanford Center for Artificial Intelligence in Medicine and Imaging, or the
    corresponding author of the research (louis.blankemeier@stanford.edu).
  intended_uses: This model is intended for use in the interpretation of abdominal
    computed tomography (CT) scans, chronic disease prediction, radiology report generation,
    and 3D semantic segmentation.
  license: Unknown
  modality: image; text
  model_card: unknown
  monitoring: Unknown
  name: Merlin
  nationality: unknown
  organization: Stanford Center for Artificial Intelligence in Medicine and Imaging,
    Stanford University
  prohibited_uses: The model should not be used outside of healthcare-related context,
    such as for personal or non-medical commercial purposes.
  quality_control: The model has undergone extensive evaluations and also internal
    and external validation tests.
  size: Unknown
  training_emissions: Unknown
  training_hardware: Single GPU.
  training_time: Unknown
  type: model
  url: https://arxiv.org/pdf/2406.06512


================================================
FILE: assets/stonybrook.yaml
================================================
- access:
    explanation: The dataset is available for download on the [[SBU Captions Dataset
      website]](https://www.cs.rice.edu/~vo9/sbucaptions/), along with additional
      resources.
    value: open
  analysis:
    explanation: See [[Section 5]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf)
    value: 'Authors perform two quantitative evaluations for image captioning - direct
      user ratings of relevance and BLEU score. They also propose a new evaluation
      task: "we propose a new evaluation task where a user is presented with two photographs
      and one caption. The user must assign the caption to the most relevant image.
      For evaluation we use a query image, a random image and a generated caption."

      '
  created_date:
    explanation: 'The date the [[paper]](https://papers.nips.cc/paper_files/paper/2011/hash/5dd9db5e033da9c6fb5ba83c7a7ebea9-Abstract.html)
      was published.

      '
    value: 2011-12-12
  datasheet: none
  dependencies:
  - Flickr
  description: 'SBU Captions Dataset is a collection of 1 million images and associated
    captions from Flickr, filtered so that the descriptions are likely to refer to
    visual content.

    '
  excluded:
    explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf)
    value: '"This produces a very large, but noisy initial set of photographs with
      associated text. We filter this set of photos so that the descriptions attached
      to a picture are relevant and visually descriptive."

      '
  feedback: ''
  included:
    explanation: See [[Section 2]](https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf)
    value: "\"To encourage visual descriptiveness in our collection, we select only\
      \ those images with descriptions of satisfactory length based on observed lengths\
      \ in visual descriptions. We also enforce that retained descriptions contain\
      \ at least 2 words belonging to our term lists and at least one prepositional\
      \ word, e.g. \u201Con\u201D, \u201Cunder\u201D which often indicate visible\
      \ spatial relationships.\"\n"
  intended_uses: ''
  license: none
  modality: image, text
  monitoring: none
  name: SBU Captions
  nationality: USA
  organization: Stony Brook University
  prohibited_uses: ''
  quality_control: unknown
  sample: []
  size: 1M image-text pairs
  type: dataset
  url: https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf


================================================
FILE: assets/suno.yaml
================================================
- access: open
  analysis: ''
  created_date: 2023-04-20
  dependencies:
  - AudioLM
  description: Bark is a text-to-audio model that can generate multilingual speech
    as well as other noises.
  feedback: https://huggingface.co/spaces/suno/bark/discussions
  intended_uses: ''
  license: MIT
  modality: text; audio
  model_card: https://github.com/suno-ai/bark/blob/main/model-card.md
  monitoring: ''
  name: Bark
  nationality: USA
  organization: Suno
  prohibited_uses: ''
  quality_control: ''
  size: ''
  training_emissions: unknown
  training_hardware: ''
  training_time: unknown
  type: model
  url: https://github.com/suno-ai/bark


================================================
FILE: assets/tiger.yaml
================================================
- access: open
  analysis: Evaluated across a range of domain tasks across standard benchmarks in
    comparison to predecessor Llama 2.
  created_date: 2023-10-19
  dependencies:
    explanation: 7, 13, and 70B sizes trained on LLaMA 2. 180B model trained on BLOOM.
    value:
    - Llama 2
    - BLOOM
  description: TigerBot is an open source multilingual multitask LLM.
  feedback: https://huggingface.co/TigerResearch/tigerbot-180b-base-v2/discussions
  intended_uses: ''
  license:
    explanation: Should also consult with BLOOM and Llama 2 open-source licenses due
      to pre-training.
    value: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/TigerResearch/tigerbot-180b-base-v2
  monitoring: unknown
  name: TigerBot
  nationality: International
  organization: TigerResearch
  prohibited_uses: ''
  quality_control: Safety filtering performed to mitigate risk and remove toxic content.
  size: 180B parameters (dense)
  training_emissions: unknown
  training_hardware: 32 A100-40G GPUs
  training_time: unknown
  type: model
  url: https://arxiv.org/pdf/2312.08688.pdf


================================================
FILE: assets/together.yaml
================================================
- access: open
  analysis: ''
  created_date: 2022-11-29
  dependencies:
  - GPT-J
  - P3
  - NaturalInstructions-v2
  description: ''
  feedback: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: ''
  monitoring: ''
  name: GPT-JT
  nationality: USA
  organization: Together
  prohibited_uses: ''
  quality_control: ''
  size: 6B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai
- access: open
  analysis: ''
  created_date: 2023-03-10
  dependencies:
  - GPT-NeoX
  - OIG-43M
  description: ''
  feedback: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: ''
  monitoring: ''
  name: GPT-NeoXT-Chat-Base
  nationality: USA
  organization: Together
  prohibited_uses: ''
  quality_control: ''
  size: 20B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://www.together.xyz/blog/openchatkit
- access: open
  analysis: ''
  created_date: 2023-03-10
  dependencies:
  - GPT-JT
  - OIG-moderation
  description: ''
  feedback: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: ''
  monitoring: ''
  name: OpenChatKit moderation model
  nationality: USA
  organization: Together
  prohibited_uses: ''
  quality_control: ''
  size: 6B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://www.together.xyz/blog/openchatkit
- access: open
  analysis: ''
  created_date: 2023-03-10
  datasheet: ''
  dependencies:
  - P3
  - NaturalInstructions-v2
  - FLAN dataset
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text
  monitoring: ''
  name: OIG-43M
  nationality: International
  organization: Together, LAION, Ontocord
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 43M instructions
  type: dataset
  url: https://laion.ai/blog/oig-dataset/
- access: open
  analysis: ''
  created_date: 2023-03-10
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text
  monitoring: ''
  name: OIG-moderation
  nationality: International
  organization: Together, LAION, Ontocord
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: unknown
  type: dataset
  url: https://laion.ai/blog/oig-dataset/
- access: open
  analysis: ''
  created_date: 2022-04-17
  datasheet: https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T
  dependencies:
  - GitHub
  - Wikipedia
  description: The RedPajama base dataset is a 1.2 trillion token fully-open dataset
    created by following the recipe described in the LLaMA paper
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: Apache 2.0
  modality: text
  monitoring: ''
  name: RedPajama-Data
  nationality: USA
  organization: Together
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 1.2 trillion tokens
  type: dataset
  url: https://www.together.xyz/blog/redpajama
- access: open
  analysis: Model evaluated over AlpacaEval, Rouge score over BookSum, and accuracy
    over MQA.
  created_date: 2023-08-18
  dependencies:
  - BookSum dataset
  - MQA dataset
  - Together API
  - LLaMA 2
  description: Llama-2-7B-32K-Instruct is an open-source, long-context chat model
    finetuned from Llama-2-7B-32K, over high-quality instruction and chat data.
  feedback: https://huggingface.co/togethercomputer/Llama-2-7B-32K-Instruct/discussions
  intended_uses: ''
  license: LLaMA 2
  modality:
    explanation: text; text
    value: text; text
  model_card: https://huggingface.co/togethercomputer/Llama-2-7B-32K-Instruct
  monitoring: ''
  name: Llama-2-7B-32K-Instruct
  nationality: USA
  organization: Together
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://together.ai/blog/llama-2-7b-32k-instruct
- access: open
  analysis: none
  created_date: 2023-10-30
  datasheet: ''
  dependencies:
  - Common Crawl
  description: RedPajama-Data-v2 is a new version of the RedPajama dataset, with 30
    trillion filtered and deduplicated tokens (100+ trillions raw) from 84 CommonCrawl
    dumps covering 5 languages, along with 40+ pre-computed data quality annotations
    that can be used for further filtering and weighting.
  excluded: ''
  feedback: Feedback can be sent to Together via https://www.together.ai/contact
  included: documents in English, German, French, Spanish, and Italian.
  intended_uses: To be used as the start of a larger, community-driven development
    of large-scale datasets for LLMs.
  license: Apache 2.0
  modality: text
  monitoring: ''
  name: RedPajama-Data-v2
  nationality: USA
  organization: Together
  prohibited_uses: ''
  quality_control: tokens filtered and deduplicated
  sample: []
  size: 30 trillion tokens
  type: dataset
  url: https://together.ai/blog/redpajama-data-v2
- access: open
  analysis: Model evaluated on a suite of short-context task benchmarks.
  created_date: 2023-12-08
  dependencies:
  - Hyena
  - RedPajama-Data
  description: StripedHyena is an LLM and the first alternative model competitive
    with the best open-source Transformers in short and long-context evaluations,
    according to Together.
  feedback: https://huggingface.co/togethercomputer/StripedHyena-Hessian-7B/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/togethercomputer/StripedHyena-Hessian-7B
  monitoring: ''
  name: StripedHyena
  nationality: USA
  organization: Together
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.together.ai/blog/stripedhyena-7b
- access: open
  analysis: Model evaluated on a suite of short-context task benchmarks.
  created_date: 2023-12-08
  dependencies:
  - Hyena
  - RedPajama-Data
  description: StripedHyena Nous is an LLM and chatbot, along with the first alternative
    model competitive with the best open-source Transformers in short and long-context
    evaluations, according to Together.
  feedback: https://huggingface.co/togethercomputer/StripedHyena-Nous-7B/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/togethercomputer/StripedHyena-Nous-7B
  monitoring: ''
  name: StripedHyena Nous
  nationality: USA
  organization: Together
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.together.ai/blog/stripedhyena-7b
- access: open
  analysis: The model was evaluated using five popular vision-language benchmarks
    that require strong commonsense reasoning and detailed image understanding, AI2D,
    ScienceQA, MMMU, MMVet, and POPE. It demonstrated competitive performance in these
    evaluations compared to other vision-language models.
  created_date: 2024-06-06
  dependencies:
  - LLaMA
  description: A large vision-language model with multi-resolution zoom that enhances
    fine-grained visual understanding and reasoning about image regions. The Dragonfly
    model comes in two variants, the general-domain model ("Llama-3-8b-Dragonfly-v1")
    trained on 5.5 million image-instruction pairs, and the biomedical variant ("Llama-3-8b-Dragonfly-Med-v1")
    fine-tuned on an additional 1.4 million biomedical image-instruction pairs. Dragonfly
    demonstrates promising performance on vision-language benchmarks like commonsense
    visual QA and image captioning.
  feedback: Unknown
  intended_uses: Dragonfly is designed for image-text tasks, including commonsense
    visual question answering and image captioning. It is further focused on tasks
    that require fine-grained understanding of high-resolution image regions, such
    as in medical imaging.
  license: unknown
  modality: image, text; text
  model_card: unknown
  monitoring: Unknown
  name: Dragonfly
  nationality: USA
  organization: Together
  prohibited_uses: Unknown
  quality_control: The model employs two key strategies (multi-resolution visual encoding
    and zoom-in patch selection) that enable it to efficiently focus on fine-grained
    details in image regions and provide better commonsense reasoning. Its performance
    was evaluated on several benchmark tasks for quality assurance.
  size: 8B parameters
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.together.ai/blog/dragonfly-v1


================================================
FILE: assets/tokyo.yaml
================================================
- access: open
  analysis: Evaluated on all language datasets compared to similarly sized SOTA models,
    with Aurora-M achieving strong performance in most.
  created_date: 2024-04-23
  dependencies:
  - StarCoderPlus
  description: Aurora-M is a 15B parameter multilingual open-source model trained
    on English, Finnish, Hindi, Japanese, Vietnamese, and code.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: none
  monitoring: unknown
  name: Aurora-M
  nationality: unknown
  organization: Tokyo Institute of Technology, MIT-IBM Watson Lab, Sapienza University
    of Rome
  prohibited_uses: ''
  quality_control: ''
  size: 15B parameters
  training_emissions:
    explanation: The training process operated entirely on 100% hydro-powered energy
      and included waste heat recycling.
    value: unknown
  training_hardware: LUMI supercomputer, using 128 AMD MI250X GPUs
  training_time: 48 days
  type: model
  url: https://arxiv.org/pdf/2404.00399
- access:
    explanation: Reflection Llama-3.1 70B is an open-source LLM.
    value: open
  analysis: Unknown
  created_date: 2024-09-28
  dependencies:
  - Glaive
  - Llama 3.1
  description: Reflection Llama-3.1 70B is an open-source LLM, trained with a new
    technique called Reflection-Tuning that teaches a LLM to detect mistakes in its
    reasoning and correct course. The model was trained on synthetic data generated
    by Glaive.
  feedback: Unknown
  intended_uses: The model is intended for complex reasoning and reflection tasks.
    It is designed to separate its internal thoughts and reasoning from its final
    answer.
  license: Llama 3.1 Community License Agreement
  modality:
    explanation: you can sample from Reflection Llama-3.1 70B using the same code,
      pipelines, etc. as any other Llama model. This implies that the model takes
      text input and produces text output.
    value: text; text
  model_card: https://huggingface.co/mattshumer/Reflection-70B
  monitoring: Unknown
  name: Reflection Llama-3.1 70B
  nationality: Unknown
  organization: Unknown
  prohibited_uses: Unknown
  quality_control: The model uses a Reflection-Tuning technique which allows it to
    notice mistakes in its reasoning and correct them.
  size:
    explanation: The size is in the model's name, Reflection Llama-3.1 70B.
    value: 70B parameters
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://huggingface.co/mattshumer/Reflection-70B
- access:
    explanation: Pixtral 12B is released under Apache 2.0 license.
    value: open
  analysis: The model outperforms other models of similar and larger sizes on multimodal
    benchmarks. An open-source benchmark, MM-MT-Bench, is contributed for evaluating
    vision-language models.
  created_date: 2024-10-10
  dependencies:
  - Mistral Nemo 12B
  description: Pixtral 12B is a 12-billion-parameter multimodal language model trained
    to understand both natural images and documents, achieving leading performance
    on various multimodal benchmarks without compromising on natural language performance.
  feedback: Unknown
  intended_uses: Multimodal instruction following tasks, capable of multi-turn, multi-image
    conversations.
  license:
    explanation: Pixtral 12B is released under Apache 2.0 license.
    value: Apache 2.0
  modality:
    explanation: Pixtral 12B is trained to understand both natural images and documents.
    value: text; image
  model_card: unknown
  monitoring: Evaluation protocols and benchmarks are open-sourced to establish fair
    and standardized testing.
  name: Pixtral 12B
  nationality: Unknown
  organization: Unknown
  prohibited_uses: Unknown
  quality_control: Evaluation protocols for multimodal language models were standardized
    and analysis was conducted to improve the reliability of model evaluations.
  size:
    explanation: "We introduce Pixtral 12B, a 12\u2013billion-parameter multimodal\
      \ language model."
    value: 12B parameters
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://arxiv.org/pdf/2410.07073


================================================
FILE: assets/toronto.yaml
================================================
- access: open
  analysis: Compared models trained on OpenWebMath for 1 epoch to models trained on
    The Pile and ProofPile on mathematics benchmarks.
  created_date: 2023-10-10
  datasheet: Can be found at section E of https://arxiv.org/pdf/2310.06786.pdf
  dependencies:
  - Common Crawl
  description: OpenWebMath is an open dataset containing 14.7B tokens of mathematical
    webpages from Common Crawl, inspired by Minerva.
  excluded: ''
  feedback: https://huggingface.co/datasets/open-web-math/open-web-math/discussions
  included: ''
  intended_uses: Language model pretraining, finetuning, and evaluation.
  license: ODC-By 1.0
  modality: text, mathematical tokens
  monitoring: none
  name: OpenWebMath
  nationality: Canada
  organization: University of Toronto
  prohibited_uses: Any tasks which may considered irresponsible or harmful.
  quality_control: Documents are filtered, processed for mathematical value, deduplicated,
    and then the largest documents are manually inspected for quality.
  sample: []
  size: 14.7B documents
  type: dataset
  url: https://arxiv.org/pdf/2310.06786.pdf


================================================
FILE: assets/transformify.yaml
================================================
- access: open
  adaptation: ''
  created_date: 2023-05-30
  dependencies:
  - GPT-4
  description: Transformify Automate is a platform for automated task integration
    using natural language prompts.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: ''
  monitoring: ''
  monthly_active_users: ''
  name: Transformify Automate
  nationality: USA
  organization: Transformify
  output_space: text and code
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://www.transformify.ai/legal-stuff
  type: application
  url: https://www.transformify.ai/automate
  user_distribution: ''


================================================
FILE: assets/trevor.yaml
================================================
- access:
    explanation: 'The tool can only be accessed by the Trevor Project will be counsolors
      for training purposes.

      '
    value: closed
  adaptation:
    explanation: 'The base GPT-2 model was fine-tuned on the previous conversations
      between new trainees and the Trevor Project counselors posing to be a fictional
      persona in crisis.

      '
    value: Fine-tuning
  created_date:
    explanation: 'The release date of the blog post introducing Crisis Contact Simulator
      [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).

      '
    value: 2021-03-24
  dependencies:
  - OpenAI API
  description: 'Crisis Contact Simulator, developed as part of a collaboration with
    Google.org, helps train The Trevor Project counselors by mimicking to be a teen
    in crisis. Crisis Contact Simulator is used as part of the training programs for
    the Trevor Project''s 24/7 digital crisis services that supports LGBTQ youth [[Trevor
    Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).

    '
  failures: unknown
  feedback: unknown
  intended_uses: 'Training counselors

    '
  license: unknown
  monitoring: unknown
  monthly_active_users: unknown
  name: Crisis Contact Simulator
  nationality: USA
  organization: The Trevor Project
  output_space: Dialogue
  prohibited_uses: unknown
  quality_control:
    explanation: 'The tool can only be accessed by the Trevor Project will be counsolors
      for training purposes.

      '
    value: Limited release
  terms_of_service: unknown
  type: application
  url: https://www.thetrevorproject.org/
  user_distribution: unknown


================================================
FILE: assets/triml.yaml
================================================
- access: open
  analysis: Evaluated on standard VLM benchmarks and outperforms SotA open-source
    VLMs as of release.
  created_date: 2024-02-09
  dependencies: []
  description: Prism is a family of VLMs trained using new analyses about key vision
    design axes.
  feedback: none
  intended_uses: ''
  license: LLaMA 2
  modality: image, text; text
  model_card: none
  monitoring: unknown
  name: Prism
  nationality: Japan
  organization: Toyota Research Institute
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: 8 A100 GPUs
  training_time: less than 9 hours
  type: model
  url: https://arxiv.org/pdf/2402.07865.pdf


================================================
FILE: assets/tsinghua.yaml
================================================
- access:
    explanation: Model weights are available but gated via an [[application form]](https://models.aminer.cn/codegeex/download/request)
    value: limited
  analysis: none
  created_date: 2022-09-20
  dependencies: []
  description: CodeGeeX is an autoregressive language model trained on code
  feedback: none
  intended_uses: none
  license:
    explanation: The license is provided in the [[Github repository]](https://github.com/THUDM/CodeGeeX)
    value: Apache 2.0
  modality: text; code
  model_card: none
  monitoring: none
  name: CodeGeeX
  nationality: China
  organization: Tsinghua University
  prohibited_uses: none
  quality_control: none
  size: 13B parameters (dense)
  training_emissions: unknown
  training_hardware: THUDM 1536 Ascend 910 (32GB) Cluster
  training_time: unknown
  type: model
  url: https://github.com/THUDM/CodeGeeX
- access:
    explanation: Model checkpoints available from [[Wudao-Wenhui]](https://resource.wudaoai.cn/home?ind=2&name=WuDao%20WenHui&id=1399364355975327744)
    value: open
  analysis: ''
  created_date:
    explanation: The date the model paper was released
    value: 2021-05-26
  dependencies: []
  description: CogView is a transformer model for text-to-image generation
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'The license is provided in the [[Github repository]](https://github.com/THUDM/CogView)

      '
    value: Apache 2.0
  modality: text; image
  model_card: none
  monitoring: ''
  name: CogView
  nationality: China
  organization: Tsinghua University
  prohibited_uses: ''
  quality_control: ''
  size: 4B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2105.13290
- access:
    explanation: The model checkpoints are available for download from [[BAAI]](https://model.baai.ac.cn/model-detail/100041)
    value: open
  analysis: ''
  created_date:
    explanation: The date the model paper was released
    value: 2022-04-28
  dependencies: []
  description: CogView 2 is a hierarchical transformer for text-to-image generation
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'The license is provided in the [[Github repository]](https://github.com/THUDM/CogView2)

      '
    value: Apache 2.0
  modality: text; image
  model_card: none
  monitoring: ''
  name: CogView 2
  nationality: China
  organization: Tsinghua University
  prohibited_uses: ''
  quality_control: ''
  size: 6B parameters (dense)
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2204.14217
- access:
    explanation: Model checkpoints are available for download from https://github.com/THUDM/CogVideo
    value: open
  analysis: ''
  created_date:
    explanation: The date the model paper was released
    value: 2022-05-29
  dependencies: []
  description: CogVideo is a transformer model for text-to-video generation
  feedback: ''
  intended_uses: ''
  license:
    explanation: 'The license is provided in the [[Github repository]](https://github.com/THUDM/CogVideo)

      '
    value: Apache 2.0
  modality: text; video
  model_card: none
  monitoring: ''
  name: CogVideo
  nationality: China
  organization: Tsinghua University
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2205.15868
- access:
    explanation: Model checkpoints are available from the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE)
    value: open
  analysis: ''
  created_date:
    explanation: The date the model website was made public
    value: 2022-08-04
  dependencies:
  - The Pile
  - GLM-130B Chinese corpora
  - P3
  - DeepStruct finetuning dataset
  description: GLM-130B is a bidirectional language model trained on English and Chinese
  feedback: ''
  intended_uses: ''
  license:
    explanation: Unique model license. See the [[GitHub repository]](https://github.com/THUDM/GLM-130B/blob/main/MODEL_LICENSE)
    value: GLM-130B License
  modality: text; text
  model_card: none
  monitoring: ''
  name: GLM-130B
  nationality: China
  organization: Tsinghua University
  prohibited_uses: ''
  quality_control: ''
  size: 130B parameters (dense)
  training_emissions: ''
  training_hardware: THUDM 96 DGX-A100 (40G) cluster
  training_time: ''
  type: model
  url: https://keg.cs.tsinghua.edu.cn/glm-130b/
- access: open
  analysis: Evaluated on image captioning and visual question answering benchmarks.
  created_date: 2023-11-06
  dependencies:
  - Vicuna
  - CLIP
  description: CogVLM is a powerful open-source visual language foundation model
  feedback: none
  intended_uses: Future multimodal research
  license:
    explanation: Model license can be found at https://github.com/THUDM/CogVLM/blob/main/MODEL_LICENSE.
      Code license is under Apache 2.0
    value: custom
  modality: image, text; text
  model_card: none
  monitoring: none
  name: CogVLM
  nationality: unknown
  organization: Zhipu AI, Tsinghua University
  prohibited_uses: none
  quality_control: none
  size: 17B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: 4096 A100 days
  type: model
  url: https://arxiv.org/pdf/2311.03079.pdf
- access: open
  analysis: Evaluated on AlpacaEval Leaderboard benchmarks.
  created_date: 2023-06-27
  dependencies:
  - UltraChat
  description: UltraLM is a series of chat language models trained on UltraChat.
  feedback: https://huggingface.co/openbmb/UltraLM-13b/discussions
  intended_uses: ''
  license: LLaMA 2
  modality: text; text
  model_card: https://huggingface.co/openbmb/UltraLM-13b
  monitoring: unknown
  name: UltraLM
  nationality: China
  organization: Tsinghua University
  prohibited_uses: ''
  quality_control: ''
  size: 13B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/thunlp/UltraChat#UltraLM
- access: open
  analysis: UltraLM evaluated off of UltraChat is evaluated on standard LLM benchmarks.
  created_date: 2023-04-20
  datasheet: https://huggingface.co/datasets/stingning/ultrachat
  dependencies: []
  description: UltraChat is an open-source, large-scale, and multi-round dialogue
    data powered by Turbo APIs.
  excluded: ''
  feedback: https://huggingface.co/datasets/stingning/ultrachat/discussions
  included: Dialogue data of questions about the world, writing and creation tasks,
    and questions on existing materials.
  intended_uses: ''
  license: MIT
  modality: text
  monitoring: unknown
  name: UltraChat
  nationality: China
  organization: Tsinghua University
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: unknown
  type: dataset
  url: https://github.com/thunlp/UltraChat


================================================
FILE: assets/twelvelabs.yaml
================================================
- access: open
  analysis: Evaluated in comparison to SOTA video-to-language models.
  created_date: 2023-10-23
  dependencies:
  - MSR-VTT
  - Video-ChatGPT Video Descriptions Dataset
  description: Pegasus-1 is a video-language foundation model.
  feedback: none
  intended_uses: ''
  license: unknown
  modality: video; text
  model_card: none
  monitoring: none
  name: Pegasus-1
  nationality: USA
  organization: Twelve Labs
  prohibited_uses: ''
  quality_control: Data selected and cleaned to eliminate toxic and biased content.
  size: 80B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://app.twelvelabs.io/blog/introducing-pegasus-1
- access: open
  analysis: Marengo-2.6 sets new benchmarks in zero-shot text-to-video, text-to-image,
    and text-to-audio retrieval tasks with a single embedding model.
  created_date: 2024-03-01
  dependencies: []
  description: "Marengo 2.6 is a new state-of-the-art (SOTA) multimodal foundation\
    \ model capable of performing any-to-any search tasks, including Text-To-Video,\
    \ Text-To-Image, Text-To-Audio, Audio-To-Video, Image-To-Video, and more.\_"
  feedback: none
  intended_uses: ''
  license: unknown
  modality: audio, image, text; audio, image, video
  model_card: none
  monitoring: ''
  name: Marengo 2.6
  nationality: USA
  organization: Twelve Labs
  prohibited_uses: ''
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://www.twelvelabs.io/blog/introducing-marengo-2-6


================================================
FILE: assets/uae.yaml
================================================
- access: open
  analysis: Evaluated in 1-shot against the PaLM models, with the tasks of the paper
    "Language models are few-shot learners" (Brown et al., 2020); (2) on a small set
    of few-shot tasks reported by the GPT-4 paper; (3) against state-of-the-art models
    across common sense, question answering, and code tasks; (4) against models which
    also report results from the EAI Harness, for which we are able to compare with
    identical prompts and metrics.
  created_date: 2023-06-14
  dependencies:
  - RefinedWeb
  description: "Falcon-40B is a 40B parameters causal decoder-only model built by\
    \ TII and trained on 1,000B tokens of\_RefinedWeb enhanced with curated corpora."
  feedback: https://huggingface.co/tiiuae/falcon-40b/discussions
  intended_uses: Research on large language models; as a foundation for further specialization
    for specific use cases.
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/tiiuae/falcon-40b
  monitoring: None
  name: Falcon-40B
  nationality: UAE
  organization: UAE Technology Innovation Institute
  prohibited_uses: irresponsible or harmful use or production use without adequate
    assessment of risks and mitigation.
  quality_control: ''
  size: 40B parameters (dense)
  training_emissions: unknown
  training_hardware: 384 A100 40GB GPUs
  training_time: 2 months
  type: model
  url: https://arxiv.org/pdf/2311.16867.pdf
- access: open
  analysis: ''
  created_date: 2023-06-01
  datasheet: https://huggingface.co/datasets/tiiuae/falcon-refinedweb
  dependencies: []
  description: RefinedWeb is a high-quality five trillion tokens web-only English
    pretraining dataset.
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license:
    explanation: License can be found at https://huggingface.co/datasets/tiiuae/falcon-refinedweb
    value: custom
  modality: text
  monitoring: ''
  name: RefinedWeb
  nationality: UAE
  organization: UAE Technology Innovation Institute
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 600B tokens
  type: dataset
  url: https://arxiv.org/pdf/2306.01116.pdf
- access: open
  analysis: Falcon-180B outperforms LLaMA-2, StableLM, RedPajama, MPT on the Open
    LLM Leaderboard at https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard.
  created_date: 2023-09-06
  dependencies:
  - RefinedWeb
  description: Falcon-180B is a 180B parameters causal decoder-only model built by
    TII and trained on 3,500B tokens of RefinedWeb enhanced with curated corpora.
  feedback: https://huggingface.co/tiiuae/falcon-180b/discussions
  intended_uses: Research on large language models; as a foundation for further specialization
    for specific use cases.
  license: unknown
  modality: text; text
  model_card: https://huggingface.co/tiiuae/falcon-180B
  monitoring: None
  name: Falcon-180B
  nationality: UAE
  organization: UAE Technology Innovation Institute
  prohibited_uses: Production use without adequate assessment of risks and mitigation;
    any use cases which may be considered irresponsible or harmful.
  quality_control: ''
  size: 180B parameters (dense)
  training_emissions: ''
  training_hardware: 4096 A100 40GB GPUs
  training_time: 9 months
  type: model
  url: https://arxiv.org/pdf/2311.16867.pdf


================================================
FILE: assets/unknown.yaml
================================================
- access:
    explanation: producing the currently strongest open-source base model.
    value: open
  analysis: Comprehensive evaluations reveal that DeepSeek-V3 outperforms other open-source
    models and achieves performance comparable to leading closed-source models.
  created_date: 2025-01-14
  dependencies:
  - DeepSeek-R1
  description: DeepSeek-V3 is a Mixture-of-Experts (MoE) language model with 671B
    total parameters and 37B activated per token. It utilizes Multi-head Latent Attention
    (MLA) and adopts innovative strategies for improved performance, such as an auxiliary-loss-free
    load balancing and a multi-token prediction training objective. Comprehensive
    evaluations show it achieves performance comparable to leading closed-source models.
  feedback: unknown
  intended_uses: unknown
  license: MIT
  modality: unknown
  model_card: https://huggingface.co/deepseek-ai/DeepSeek-V3
  monitoring: unknown
  name: DeepSeek-V3
  nationality: unknown
  organization: DeepSeek
  prohibited_uses: unknown
  quality_control: Post-training includes knowledge distillation from the DeepSeek-R1
    model, incorporating verification and reflection patterns to enhance reasoning
    performance.
  size:
    explanation: a strong Mixture-of-Experts (MoE) language model with 671B total
      parameters with 37B activated for each token.
    value: 671B parameters (sparse)
  training_emissions: unknown
  training_hardware:
    explanation: DeepSeek-V3 requires only 2.788M H800 GPU hours for its full training.
    value: H800 GPUs
  training_time:
    explanation: DeepSeek-V3 requires only 2.788M H800 GPU hours for its full training.
    value: 2.788M GPU hours
  type: model
  url: https://huggingface.co/deepseek-ai/DeepSeek-V3
- access: open
  analysis: ''
  created_date: 2024-04-24
  dependencies: []
  description: We present a methodology for scaling LLMs called depth up-scaling (DUS)
    , which encompasses architectural modifications and continued pretraining. In
    other words, we integrated Mistral 7B weights into the upscaled layers, and finally,
    continued pre-training for the entire model. SOLAR-10.7B has remarkable performance.
    It outperforms models with up to 30B parameters, even surpassing the recent Mixtral
    8X7B model. For detailed information, please refer to the experimental table.
    Solar 10.7B is an ideal choice for fine-tuning. SOLAR-10.7B offers robustness
    and adaptability for your fine-tuning needs. Our simple instruction fine-tuning
    using the SOLAR-10.7B pre-trained model yields significant performance improvements
    (SOLAR-10.7B-Instruct-v1.0).
  feedback: https://www.upstage.ai/solar-llm
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/upstage/SOLAR-10.7B-v1.0
  monitoring: ''
  name: SOLAR
  nationality: South Korea
  organization: Upstage.ai
  prohibited_uses: ''
  quality_control: ''
  size: 10.7B parameters
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://arxiv.org/abs/2312.15166


================================================
FILE: assets/uw.yaml
================================================
- access: open
  analysis: ''
  created_date: 2022-01-07
  datasheet: ''
  dependencies:
  - YouTube
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: MIT
  modality: video
  monitoring: ''
  name: YT-Temporal-1B
  nationality: USA
  organization: University of Washington
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: 20M videos
  type: dataset
  url: https://arxiv.org/abs/2201.02639


================================================
FILE: assets/uwashington.yaml
================================================
- access: open
  analysis: Reports results on the Vicuna benchmark and compares performance level
    and time expenditure with ChatGPT
  created_date: 2023-05-23
  dependencies:
  - QLoRA
  - OASST1
  description: Guanaco is a model family trained with QLORA, an efficient finetuning
    approach that reduces memory usage enough to finetune a 65B parameter model on
    a single 48GB GPU while preserving full 16-bit finetuning task performance.
  feedback: ''
  intended_uses: ''
  license: MIT
  modality: text; text
  model_card: ''
  monitoring: ''
  name: Guanaco
  nationality: USA
  organization: University of Washington
  prohibited_uses: ''
  quality_control: ''
  size: 33B parameters (dense)
  training_emissions: ''
  training_hardware: A single 24 GB GPU
  training_time: ''
  type: model
  url: https://arxiv.org/pdf/2305.14314v1.pdf
- access: open
  analysis: Evaluated on benchmark music understanding tasks on SOTA music datasets.
  created_date: 2023-10-11
  dependencies:
  - LLaMA 2
  - Jukebox
  description: Llark is an instruction-tuned multimodal model for music understanding.
  feedback: none
  intended_uses: ''
  license: Apache 2.0
  modality: audio, text; text
  model_card: none
  monitoring: ''
  name: Llark
  nationality: USA
  organization: University of Washington, Spotify
  prohibited_uses: ''
  quality_control: ''
  size: 12B parameters (dense)
  training_emissions: unknown
  training_hardware: 4 80GB NVIDIA A40 GPUs
  training_time: 54 hours
  type: model
  url: https://arxiv.org/pdf/2310.07160.pdf


================================================
FILE: assets/vago.yaml
================================================
- access: open
  analysis: Evaluated on standard benchmarks in comparison to other German language
    models.
  created_date: 2023-11-28
  dependencies:
  - OpenHermes 2.5 Mistral
  - OpenOrca Mistral
  description: SauerkrautLM is a German language model merged from two Mistral derivatives.
  feedback: https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
  monitoring: unknown
  name: SauerkrautLM
  nationality: USA
  organization: VAGO Solutions
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO


================================================
FILE: assets/viable.yaml
================================================
- access: limited
  adaptation: unknown
  created_date: unknown
  dependencies:
  - OpenAI API
  description: 'Viable analyzes qualitative consumer feedback and provides summary
    feedback to companies.

    '
  failures: unknown
  feedback:
    explanation: The feedback mechanism isn't explicitly listed. Company contact information
      is provided in the Terms of Service [[Terms of Service]](https://www.askviable.com/terms-of-service).
    value: unknown
  intended_uses: 'Intended to be used by companies to digest qualitative consumer
    feedback.

    '
  license: unknown
  monitoring: unknown
  monthly_active_users: unknown
  name: Viable
  nationality: USA
  organization: Viable
  output_space: Question and answer, summarization, sentiment analysis, topic identification
  prohibited_uses: 'Prohibited uses are listed in the Terms of Service [[Terms of
    Service]](https://www.askviable.com/terms-of-service). The terms don''t include
    statements specific to the use of the content generated by the system or GPT-3.

    '
  quality_control: unknown
  terms_of_service: https://www.askviable.com/terms-of-service
  type: application
  url: https://www.askviable.com/
  user_distribution: unknown


================================================
FILE: assets/vilm.yaml
================================================
- access: open
  analysis: none
  created_date: 2023-10-02
  dependencies:
  - Falcon-180B
  description: Vulture is a further fine-tuned causal Decoder-only LLM built by Virtual
    Interactive (VILM) on top of Falcon.
  feedback: https://huggingface.co/vilm/vulture-180b/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/vilm/vulture-180b
  monitoring: unknown
  name: Vulture
  nationality: USA
  organization: Virtual Interactive
  prohibited_uses: Production use without adequate assessment of risks and mitigation;
    any use cases which may be considered irresponsible or harmful.
  quality_control: ''
  size: 180B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: 3000 A100 hours
  type: model
  url: https://huggingface.co/vilm/vulture-180b


================================================
FILE: assets/wayve.yaml
================================================
- access: closed
  analysis: Compared to self before being scaled on quality of video generation.
  created_date: 2023-09-29
  dependencies: []
  description: "GAIA-1 (\u2018Generative AI for Autonomy\u2019) is a generative world\
    \ model that leverages video, text, and action inputs to generate realistic driving\
    \ scenarios while offering fine-grained control over ego-vehicle behavior and\
    \ scene features."
  feedback: none
  intended_uses: The main use cases are pure representation learning, planning (look-ahead
    search), or learning a policy in the world model (neural simulator)
  license: unknown
  modality: text, video; video
  model_card: none
  monitoring: none
  name: GAIA-1
  nationality: UK
  organization: Wayve
  prohibited_uses: none
  quality_control: none
  size: 9B parameters (dense)
  training_emissions: unknown
  training_hardware: 32 A100 80GB GPUs
  training_time: 4 days
  type: model
  url: https://arxiv.org/pdf/2309.17080.pdf


================================================
FILE: assets/writer.yaml
================================================
- access: open
  analysis: Evaluated on the SuperGLUE benchmark
  created_date:
    explanation: The model was stated to be published in January, but which day is
      not specified on the website.
    value: 2023-01-01
  dependencies:
  - Writer dataset
  description: Palmyra is a family of privacy-first LLMs for enterprises trained on
    business and marketing writing.
  feedback: https://huggingface.co/Writer/palmyra-base/discussions
  intended_uses: generating text from a prompt
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/Writer/palmyra-base
  monitoring: ''
  name: Palmyra
  nationality: USA
  organization: Writer
  prohibited_uses: ''
  quality_control: ''
  size: 20B parameters (dense)
  training_emissions: unknown
  training_hardware: ''
  training_time: unknown
  type: model
  url: https://gpt3demo.com/apps/palmyra
- access: open
  analysis: ''
  created_date:
    explanation: The model was stated to be published in April, but which day is not
      specified on the website.
    value: 2023-04-01
  dependencies:
  - Palmyra
  - Camel dataset
  description: Camel is an instruction-following large language model tailored for
    advanced NLP and comprehension capabilities.
  feedback: https://huggingface.co/Writer/camel-5b-hf/discussions
  intended_uses: ''
  license: Apache 2.0
  modality: text; text
  model_card: https://huggingface.co/Writer/camel-5b-hf
  monitoring: ''
  name: Camel
  nationality: USA
  organization: Writer
  prohibited_uses: ''
  quality_control: ''
  size: 5B parameters (dense)
  training_emissions: unknown
  training_hardware: ''
  training_time: unknown
  type: model
  url: https://chatcamel.vercel.app/
- access: open
  analysis: The model was evaluated across 9 diverse biomedical datasets where it
    achieved state-of-the-art results with an average score of 85.9%. It also demonstrated
    robust capability in efficiently processing extensive medical documents, as showcased
    by its near-perfect score in the NIH evaluation.
  created_date: 2024-09-08
  dependencies:
  - Palmyra-X-004
  description: Palmyra-Med-70b-32k is a Language Model designed specifically for healthcare
    and biomedical applications. It builds upon the foundation of Palmyra-Med-70b
    and offers an extended context length. This model integrates the DPO dataset,
    a custom medical instruction dataset, and has been fine-tuned to meet the unique
    requirements of the medical and life sciences sectors. It is ranked as the leading
    LLM on biomedical benchmarks with an average score of 85.87%.
  feedback: Downstream problems with this model should be reported via email to Hello@writer.com.
  intended_uses: Palmyra-Med-70b-32k is intended for non-commercial and research use
    in English. Specifically, it can be used for tasks like clinical entity recognition
    and knowledge discovery from EHRs, research articles, and other biomedical sources.
    It excels in analyzing and summarizing complex clinical notes, EHR data, and discharge
    summaries.
  license: Writer open model
  modality: text; text
  model_card: https://huggingface.co/Writer/Palmyra-Med-70B-32K
  monitoring: Measures in place to monitor misuse include the addition of watermarks
    in all models built by Writer.com to detect and prevent misuse and illegal use.
  name: Palmyra-Med-70b-32k
  nationality: USA
  organization: Writer
  prohibited_uses: The model should not be used in any manner that violates applicable
    laws or regulations. It is not to be used in direct patient care, clinical decision
    support, or professional medical purposes. The model should not replace professional
    medical judgment.
  quality_control: The model has been refined using Policy Optimization and a finely
    crafted fine-tuning dataset. It contains watermarks to detect and prevent misuse
    and illegal use.
  size: 70B parameters
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://huggingface.co/Writer/Palmyra-Med-70B-32K
- access: open
  analysis: The model has been evaluated internally, showing state-of-the-art results
    on various financial datasets. It has shown 100% accuracy in needle-in-haystack
    tasks and superior performance in comparison to other models in the organization's
    internal finance evaluations. It passed the CFA Level III test with a score of
    73% and has shown superior performance compared to other models in the long-fin-eval,
    an internally created benchmark that simulates real-world financial scenarios.
  created_date: 2024-09-08
  dependencies:
  - Palmyra-X-004
  - Writer in-house financial instruction dataset
  description: Palmyra-Fin-70B-32K is a leading LLM built specifically to meet the
    needs of the financial industry. It has been fine-tuned on an extensive collection
    of high-quality financial data and it is highly adept at handling the specific
    needs of the finance field. It outperforms other large language models in various
    financial tasks and evaluations, achieving state-of-the-art results across various
    financial datasets. Its strong performance in tasks like financial document analysis,
    market trend prediction, risk assessment underscores its effective grasp of financial
    knowledge.
  feedback: Downstream problems with this model should be reported to Hello@writer.com.
  intended_uses: The model is intended for use in English for financial analysis,
    market trend prediction, risk assessment, financial report generation, automated
    financial advice, and answering questions from long financial documents. It can
    be used for entity recognition, identifying key financial concepts such as market
    trends, economic indicators, and financial instruments from unstructured text.
  license: Writer open model license
  modality: text; text
  model_card: https://huggingface.co/Writer/Palmyra-Fin-70B-32K
  monitoring: Unknown
  name: Palmyra-Fin-70B-32K
  nationality: USA
  organization: Writer
  prohibited_uses: The model should not be used in manners that violate applicable
    laws or regulations, including trade compliance laws, use prohibited by Writer's
    acceptable use policy, the Writer open model license, and in languages other than
    English. It is advised not to use the model for direct financial decision-making
    or professional financial advice without human oversight. Always consult a qualified
    financial professional for personal financial needs.
  quality_control: The model was trained with a proprietary internal database and
    a fine-tuning recipe to ensure a greater level of domain-specific accuracy and
    fluency. Still, the model may contain inaccuracies, biases, or misalignments and
    its usage for direct financial decision-making or professional financial advice
    without human oversight is not recommended. It has not been rigorously evaluated
    in real-world financial settings and it requires further testing, regulatory compliance,
    bias mitigation, and human oversight for more critical financial applications.
  size: 70B parameters (dense)
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://huggingface.co/Writer/Palmyra-Fin-70B-32K


================================================
FILE: assets/xai.yaml
================================================
- access: open
  analysis: Grok-1 was evaluated on a range of reasoning benchmark tasks and on curated
    foreign mathematic examination questions.
  created_date: 2023-11-04
  dependencies: []
  description: "Grok is an AI modeled after the Hitchhiker\u2019s Guide to the Galaxy,"
  feedback: none
  intended_uses: Grok-1 is intended to be used as the engine behind Grok for natural
    language processing tasks including question answering, information retrieval,
    creative writing and coding assistance.
  license: Apache 2.0
  modality: text; text
  model_card: https://x.ai/model-card/
  monitoring: unknown
  name: Grok-1
  nationality: USA
  organization: xAI
  prohibited_uses: none
  quality_control: none
  size: 314B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://grok.x.ai/
- access: limited
  analysis: The model is evaluated in a zero-shot setting without chain-of-thought
    prompting. The evaluation domains include multi-disciplinary reasoning, understanding
    documents, science diagrams, charts, screenshots, photographs and real-world spatial
    understanding. The model shows competitive performance with existing frontier
    multimodal models.
  created_date: 2024-04-12
  dependencies: []
  description: Grok-1.5V is a first-generation multimodal model which can process
    a wide variety of visual information, including documents, diagrams, charts, screenshots,
    and photographs.
  feedback: none
  intended_uses: Grok-1.5V can be used for understanding documents, science diagrams,
    charts, screenshots, photographs. It can also translate diagrams into Python code.
  license: unknown
  modality: image, text; text
  model_card: none
  monitoring: unknown
  name: Grok-1.5V
  nationality: USA
  organization: xAI
  prohibited_uses: unknown
  quality_control: ''
  size: unknown
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://x.ai/blog/grok-1.5v
- access: limited
  analysis: The Grok-2 models were evaluated across a series of academic benchmarks
    that included reasoning, reading comprehension, math, science, and coding. They
    showed significant improvements over the earlier model Grok-1.5 and achieved performance
    levels competitive to other frontier models in areas such as graduate-level science
    knowledge (GPQA), general knowledge (MMLU, MMLU-Pro), and math competition problems
    (MATH).
  created_date: 2024-08-13
  dependencies: []
  description: Grok-2 is a state-of-the-art language model with advanced capabilities
    in both text and vision understanding. It demonstrates significant improvements
    in reasoning with retrieved content and tool use capabilities over its previous
    Grok-1.5 model. It also excels in vision-based tasks and delivers high performance
    in document-based question answering and visual math reasoning (MathVista). Grok-2
    mini, a smaller version of Grok-2, is also introduced, offering a balance between
    speed and answer quality.
  feedback: Issues with the model should be reported to xAI.
  intended_uses: The model is intended to be used for understanding text and vision,
    answering questions, collaborating on writing, solving coding tasks, and enhancing
    search capabilities.
  license: Unknown
  modality: text; text, vision
  model_card: unknown
  monitoring: Unknown
  name: Grok-2
  nationality: USA
  organization: xAI
  prohibited_uses: Unknown
  quality_control: Grok-2 models were tested in real-world scenarios using AI tutors
    that engaged with the models across a variety of tasks and selected the superior
    response based on specific criteria outlined in the guidelines.
  size: unknown
  training_emissions: Unknown
  training_hardware: Unknown
  training_time: Unknown
  type: model
  url: https://x.ai/blog/grok-2


================================================
FILE: assets/xverse.yaml
================================================
- access: open
  analysis: Evaluated across a range of standard datasets regarding multiple model
    capabilities like language comprehension and logical reasoning.
  created_date: 2023-11-06
  dependencies: []
  description: XVERSE is a multilingual large language model for over 40 languages.
  feedback: https://huggingface.co/xverse/XVERSE-65B/discussions
  intended_uses: ''
  license:
    explanation: can be found at https://github.com/xverse-ai/XVERSE-65B/blob/main/MODEL_LICENSE.pdf.
      Code license is under Apache 2.0
    value: custom
  modality: text; text
  model_card: https://huggingface.co/xverse/XVERSE-65B
  monitoring: unknown
  name: XVERSE
  nationality: USA
  organization: Xverse
  prohibited_uses: ''
  quality_control: ''
  size: 65B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://github.com/xverse-ai/XVERSE-65B


================================================
FILE: assets/xwin.yaml
================================================
- access: open
  analysis: Evaluated on AlpacaEval benchmark against SOTA LLMs.
  created_date: 2023-09-20
  dependencies: []
  description: Xwin-LM is a LLM, which on release, ranked top 1 on AlpacaEval, becoming
    the first to surpass GPT-4 on this benchmark.
  feedback: https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1/discussions
  intended_uses: ''
  license: LLaMA2
  modality: text; text
  model_card: https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1
  monitoring: none
  name: Xwin-LM
  nationality: USA
  organization: Xwin
  prohibited_uses: ''
  quality_control: ''
  size: 70B parameters (dense)
  training_emissions: unknown
  training_hardware: unknown
  training_time: unknown
  type: model
  url: https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1


================================================
FILE: assets/yandex.yaml
================================================
- access: open
  analysis: ''
  created_date:
    explanation: The date the blog post about YaLM was published.
    value: 2022-06-22
  dependencies:
  - The Pile
  - Yandex Russian Pretraining Dataset
  description: YaLM is a 100B parameter autoregressive model trained on 25% English
    and 75% Russian text.
  feedback: ''
  intended_uses: ''
  license:
    explanation: ''
    value: Apache 2.0
  modality: text; text
  model_card: ''
  monitoring: ''
  name: YaLM
  nationality: Russia
  organization: Yandex
  prohibited_uses: ''
  quality_control: ''
  size: 100B parameters (dense)
  training_emissions: ''
  training_hardware: Yandex 800 A100 Cluster
  training_time: ''
  type: model
  url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6
- access: open
  adaptation: ''
  created_date: 2022-06-23
  dependencies:
  - YaLM
  description: Yandex is a search engine and web portal. Yandex offers internet search
    and other services
  failures: ''
  feedback: ''
  intended_uses: ''
  license:
    explanation: '"3.1. The Rightholder, on the basis of a simple (non-exclusive)
      license, shall grant the User a free, worldwide, non-transferable right to use
      the Software as set forth below:

      3.1.1. To use the Software in accordance with its functionality by copying and
      installing it on the User''s personal device(s). The User shall be entitled
      to install the Software on any number of personal devices.

      3.1.2. To reproduce and to distribute this Software in an unmodified form and
      solely for non-commercial purposes (free of charge)." - excerpt from the Terms
      of Service document for browsers.

      '
    value: custom
  monitoring: ''
  monthly_active_users: ''
  name: Yandex Search
  nationality: Russia
  organization: Yandex
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: https://yandex.com/legal/browser_agreement/
  type: application
  url: https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6
  user_distribution: ''


================================================
FILE: assets/you.yaml
================================================
- access: closed
  analysis: ''
  created_date: unknown
  datasheet: ''
  dependencies: []
  description: ''
  excluded: ''
  feedback: ''
  included: ''
  intended_uses: ''
  license: unknown
  modality: text
  monitoring: ''
  name: You dataset
  nationality: USA
  organization: You
  prohibited_uses: ''
  quality_control: ''
  sample: []
  size: unknown
  type: dataset
  url: https://you.com/
- access: closed
  analysis: ''
  created_date: unknown
  dependencies:
  - You dataset
  description: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  modality: text; text
  model_card: ''
  monitoring: ''
  name: You model
  nationality: USA
  organization: You
  prohibited_uses: ''
  quality_control: ''
  size: unknkown
  training_emissions: ''
  training_hardware: ''
  training_time: ''
  type: model
  url: https://you.com/
- access: open
  adaptation: ''
  created_date: unknown
  dependencies:
  - You model
  description: You.com is a search engine built on artificial intelligence that provides
    users with a customized search experience while keeping their data 100% private.
  failures: ''
  feedback: ''
  intended_uses: ''
  license: unknown
  monitoring: ''
  monthly_active_users: ''
  name: You Search
  nationality: USA
  organization: You
  output_space: ''
  prohibited_uses: ''
  quality_control: ''
  terms_of_service: ''
  type: application
  url: https://you.com/
  user_distribution: ''


================================================
FILE: assets/zhejiang.yaml
================================================
- access: open
  analysis: Evaluated on standard and ocean science benchmarks in comparison to other
    similar-sized models.
  created_date: 2024-02-06
  dependencies: []
  description: OceanGPT is the first-ever LLM in the ocean domain and displays expertise
    in various ocean science tasks.
  feedback: https://huggingface.co/zjunlp/OceanGPT-7b/discussions
  intended_uses: ''
  license: MIT
  modality: text; text
  model_card: https://huggingface.co/zjunlp/OceanGPT-7b
  monitoring: unknown
  name: OceanGPT
  nationality: China
  organization: Zhejiang University
  prohibited_uses: ''
  quality_control: ''
  size: 7B parameters (dense)
  training_emissions: unknown
  training_hardware: 6 A800 NVIDIA GPUs
  training_time: 7 days
  type: model
  url: https://www.zjukg.org/project/OceanGPT/


================================================
FILE: components/home.html
================================================
<div class="bg-white d-flex justify-content-center">

  <div class="container-fluid mx-5 mt-5 px-5 py-4" style="width: 60%;"> 

    <div class="row align-items-center">
      <div class="col-xs-6 center-block mx-3">
        <img height="130" width="auto" src="./img/icon.svg" alt="Ecosystem Graphs Logo">
      </div>
      <div class="col-xs-6">
        <h1 class="fs-1">ecosystem</h1>
        <h1 class="fs-1">graphs</h1>
      </div>
    </div>

    <div class="my-5 mx-3">
      <p class="lead">
        Foundation models define an emerging ecosystem of datasets, models, and
        applications. As their footprint grows, so does their
        <b>societal impact</b>.
      </p>
      <p class="lead">
        <b>Ecosystem graphs</b> is a framework to document the foundation models
        ecosystem, namely both the assets (datasets, models, and applications)
        and their relationships. Using it, one can answer questions such as:
        What are the latest foundation models? Who builds them and where are
        they used downstream? What are the general trends over time? We hope
        that ecosystem graphs will be a useful resource for researchers,
        application developers, policymakers, and the public to better
        understand the foundation models ecosystem.
      </p>
    </div>

    <div class="center-block text-center m-9">
      <a href="index.html?mode=table" class="btn btn-lg btn-primary m-3 px-5" role="button">VIEW</a>
      <a href="https://github.com/stanford-crfm/ecosystem-graphs" class="btn btn-lg btn-primary m-3 px-5" role="button">CODE</a>
      <button class="btn btn-lg btn-primary m-3 px-4" role="button" id="download-csv" onclick="downloadCSV()">DOWNLOAD</button>
    </div>


  </div>

</div>


================================================
FILE: components/nav.html
================================================
<nav class="navbar sticky-top navbar-expand-sm navbar-light bg-white py-4">

    <a class="navbar-brand" href="index.html?mode=home">
      <img src="./img/icon.svg" width="30" height="30" class="d-inline-block align-top" alt="Ecosystem Graphs Logo">
      ecosystem graphs
    </a>

    <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#nav-content" aria-controls="nav-content" aria-expanded="false" aria-label="Toggle navigation">
      <span class="navbar-toggler-icon"></span>
    </button>

    <div class="collapse navbar-collapse" id="nav-content">
      <ul class="navbar-nav ml-auto">
        <li class="nav-item mx-1"><a class="nav-link" href="index.html?mode=table">TABLE</a></li>
        <li class="nav-item mx-1"><a class="nav-link" href="index.html?mode=graph">GRAPH</a></li>
        <li class="nav-item"><a class="nav-link" href="https://github.com/stanford-crfm/ecosystem-graphs">GITHUB</a></li>
        <li class="nav-item mx-1">
          <a class="btn btn-primary" href="https://forms.gle/VqnSsZhv62hJ5rP36">CONTRIBUTE</a>
        </li>
      </ul>
    </div>

  </nav>


================================================
FILE: components/table.html
================================================
<div class="m-5">

  <div class="d-flex justify-content-between">
    <div class="col-xs-6">
      <input class="search-box m-3 px-3 py-2" type="text" id="table-search" placeholder="Search table">
    </div>
    <div class="col-xs-6">
      <button class="btn btn-md btn-secondary" role="button" id="download-csv" onclick="downloadCSV()">Download Assets CSV</button>
    </div>
  </div>

  <div id="table-container">

  </div>
</div>

================================================
FILE: css/main.css
================================================

/* Imports */
@import url("https://fonts.googleapis.com/css?family=Hahmlet:400,300,700");
@import url("https://fonts.googleapis.com/css?family=Montserrat:400,300,700");

/* Style settings for our website */
:root {
  /* Colors */
  --primary-color: #7CB342;
  --primary-color-light: #DCEDC8;
  --primary-color-dark: #4b6c27;
  --primary-color-slightly-darker: #587f2f;
  --secondary-color: #76462c;
  --secondary-color-light: #d4bfb4;
  --secondary-color-dark: #392215;
  --light: #f8faf5;
  --dark: #0d1207;
  --white: #ffffff;
  /* Fonts */
  --title-font: 'Montserrat';
  --body-font: 'Hahmlet';
}

/* Useful custom classes */
.primary-color {
  color: var(--primary-color) !important;
}

.search-box {
  font-family: var(--title-font);
  border: 0.1rem solid #ddd; /* Add a grey border */
  border-radius: 0.2rem;
  margin: 0rem;
}

.search-box:focus {
  outline-style: solid;
  outline-width: 0.1rem;
  outline-color: var(--primary-color);
  margin: 0rem;
}

/* Modifications to bootstrap defaults */
body {
  font-family: var(--body-font) !important;
  color: var(--dark) !important;
}

h1 {
  font-family: var(--title-font) !important;
}

a, a:hover {
  color: var(--primary-color-slightly-darker)
}

nav {
  font-family: var(--title-font) !important;
}

.table td {
  padding: 0.4rem 0.75rem;
}

.table p {
  margin: 0;
}

.btn {
  font-family: var(--title-font) !important;
}

*:focus {
  border-color: var(--primary-color-light) !important;
  box-shadow: none !important;
}

.btn-primary, .btn-primary:hover, .btn-primary:active, .btn-primary:visited, .btn-primary:hover {
  background-color: var(--primary-color) !important;
  border-color: var(--primary-color) !important;
}

.btn-secondary, .btn-secondary:hover, .btn-secondary:active, .btn-secondary:visited, .btn-secondary:hover {
  background-color: var(--white) !important;
  border-color: var(--primary-color-dark) !important;
  color: var(--primary-color-dark) !important;
  border-width: 0.1rem;
}

/* Custom CSS styles */
.block {
  margin: 10px;
}

.dataset {
  color: darkgreen;
}

.model {
  color: darkred;
}

.application {
  color: darkblue;
}

.help-icon {
  margin-left: 3px;
  margin-right: 3px;
}

.graph {
  width: 100%;
  height: 600px;
  display: block;
}

.field-explanation {
  color: gray;
  font-size: smaller;
  font-style: italic;
}

================================================
FILE: index.html
================================================
<!DOCTYPE html>
<html lang="en">
	<head>
    <!-- Metadata -->
		<title>Ecosystem Graphs for Foundation Models</title>
    <link rel="icon" href="img/icon.svg">
		<meta charset="utf-8">
		<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
    <!-- Style -->
		<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-beta/css/bootstrap.min.css" integrity="sha384-/Y6pD6FV/Vv2HJnA6t+vslU6fwYXjCFtcEpHbNJ0lyAFsXTsjBbfaDjzALeQsN6M" crossorigin="anonymous">
    <link rel="stylesheet" type="text/css" href="css/main.css">
    <!-- Scripts -->
		<script src="https://code.jquery.com/jquery-3.2.1.min.js"></script>
		<script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.11.0/umd/popper.min.js" integrity="sha384-b/U6ypiBEHpOf/4+1nzFpr53nxSS+GLCkfwBdFNTxtclqqenISfwAzpKaMNFNmj4" crossorigin="anonymous"></script>
		<script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-beta/js/bootstrap.min.js" integrity="sha384-h0AbiXch4ZDo7tp9hKZ4TsHbi047NrKGLO3SEJAg45jXxnGIfYzk4Si90RDIqNm1" crossorigin="anonymous"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery.tablesorter/2.29.0/js/jquery.tablesorter.min.js"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/js-yaml/4.1.0/js-yaml.min.js"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/showdown/2.0.3/showdown.min.js"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/cytoscape/3.21.0/cytoscape.min.js"></script>
    <script src="js/general.js"></script>
    <script src="js/main.js"></script>
	</head>

	<body>
		<div class="container-fluid">

      <!-- Navigation bar -->
      <div id="nav-placeholder">
        <script> renderNavBar(); </script>
      </div>

      <!-- Page content -->
      <div class="row">
        <div class="col-sm-12" id="main">
          <script> loadAssetsAndRenderPageContent(); </script>
        </div>
      </div>

		</div>
	</body>
</html>


================================================
FILE: js/general.js
================================================
function encodeUrlParams(params) {
  let s = '';
  for (let k in params)
    s += (s === '' ? '?' : '&') + k + '=' + encodeURIComponent(params[k]);
  return s;
}

function decodeUrlParams(str) {
  const params = {};
  if (str === '')
    return params;
  const items = str.substring(1).split(/&/);
  for (let i = 0; i < items.length; i++) {
    const pair = items[i].split(/=/);
    params[pair[0]] = decodeURIComponent(pair[1]);
  }
  return params;
}

function updateBrowserLocation(params) {
  // Update the address bar
  window.history.pushState({}, '', window.location.pathname + encodeUrlParams(params));
}

function openBrowserLocation(params) {
  // Update the address bar
  window.open(window.location.pathname + encodeUrlParams(params), 'blank_');
}

function multilineHtml(s) {
  return s.replace(/\n/g, '<br>');
}

function renderError(e) {
  return $('<div>').addClass('alert alert-danger').append(multilineHtml(e));
}

function getField(obj, key) {
  if (key in obj) {
    return obj[key];
  }
  console.error('Missing key', key, 'in', obj);
  return null;
}


================================================
FILE: js/main.js
================================================
class Field {
  constructor(raw) {
    this.name = getField(raw, 'name');
    this.description = getField(raw, 'description');
    this.type = raw.type;
  }
}

class Schema {
  constructor(name, raw) {
    this.name = name;
    this.fields = raw.map((field) => new Field(field));
  }

  hasField(name) {
    return this.fields.some((field) => field.name === name);
  }
}

/**
 * AssetField represents a field of an Asset. Each AssetField has a value and
 * an optional explanation for the value.
 */
class AssetField {
  constructor(value, explanation) {
    this.value = value;
    this.explanation = explanation;
  }
 }

/**
 * An Asset in the ecosystem.
 */
class Asset {

  constructor(item, schema) {
    // Set the parameters
    this.schema = schema;
    this.type = getField(item, 'type');
    // This field is an object matching field_name => AssetField
    this.fields = {};

    // Loop through the schema to populate the asset fields
    schema.fields.forEach((schemaField) => {

      // The asset fields we will populate
      let value = null, explanation = null;

      // We expect each assetField to be an object with a "value" and an
      // "explanation" key. In the absence of these keys, we read the object
      // directly (e.g. arrays or dates)
      const schemaFieldValue = getField(item, schemaField.name);
      const isObject = typeof schemaFieldValue === 'object';
      const hasValue = isObject && 'value' in schemaFieldValue;
      const hasExplanation = isObject && 'explanation' in schemaFieldValue;
      if (hasValue && hasExplanation) {
        value = getField(schemaFieldValue, 'value');
        explanation = schemaFieldValue.explanation;
      } else if (hasValue || hasExplanation) {
        console.error(
          'Error in schemaField', schemaField, 'Don\'t use value and explanation fields without one another. Couldn\'t read the asset, fix.'
        );
      } else {
        value = schemaFieldValue;
      }

      // Once value is extracted, we perform type checking.
      if (schemaField.type === 'list') {
        if (!(value instanceof Array)) {
          console.error('Expected list for', schemaField.name, 'but got', value);
        }
      } else {
        if (!['string', 'number', 'boolean'].includes(typeof(value)) && !(value instanceof Date)) {
          console.error('Expected string, number, boolean, or date for', schemaField.name, 'but got', value);
        }
      }

      this.fields[schemaField.name] = new AssetField(value, explanation);
    });

    // Print warnings about any extraneous fields
    for (let key in item) {
      if (key != 'type' && !schema.hasField(key)) {
        console.error('Extra key', key, 'in', item);
      }
    }

    // To be filled out later
    this.downstreamAssets = [];
  }
}

function updateDownstreamAssets(nameToAsset) {
  // Use each asset's dependencies (upstream pointers) to update the corresponding downstream pointers.
  Object.values(nameToAsset).forEach((asset) => {
    asset.fields.dependencies.value.forEach((dep) => {
      if (!(dep in nameToAsset)) {
        console.error('The node ', dep, 'does not exist in the graph.');
      }
      const depAsset = nameToAsset[dep];
      if (depAsset) {
        depAsset.downstreamAssets.push(asset.fields.name.value);
      }
    });
  });
}

////////////////////////////////////////////////////////////

function getStandardSize(value) {
  const thousand = 1000;
  const dataSizeDict = {'B': 0, 'KB': 1, 'MB': 2, 'GB': 3, 'TB': 4, 'PB': 5};
  const modelSizeDict = {'M': 2, 'B': 3, 'T': 4};
  if (value.includes('parameters')) {
    var size = value.split(' ')[0];
    const unit = size.slice(-1);
    const exp = modelSizeDict[unit];
    size = size.substring(0, size.length - 1);
    value = Math.pow(thousand, exp);
  } else {
    const arr = value.split(' ');
    const num = parseInt(arr[0]);
    const unit = arr[2];
    const exp = dataSizeDict[unit];
    value = Math.pow(thousand, exp);
  }
  return value
}

function compareValues(valueA, valueB, columnName) {
  // Filter for null, empty and unknown values
  const specialValues = [null, "", "unknown", "n/a"]
  const valueAIndex = specialValues.indexOf(valueA);
  const valueBIndex = specialValues.indexOf(valueB);
  const bothNotFound = valueAIndex === -1 && valueBIndex === -1
  if (!bothNotFound) {
    return valueBIndex - valueAIndex;
  }

  // Standardize the value
  if (columnName === "Created date") {
    valueA = Date.parse(valueA);
    valueB = Date.parse(valueB);
  } else if (columnName === "Size") {
    valueA = getStandardSize(valueA);
    valueB = getStandardSize(valueB);
  } 
  // @TODO Decide how to sort the "dependencies" column
  
  // Compare the values
  if (valueA > valueB) {
    return 1;
  } else if (valueA < valueB) {
    return -1;
  } else {
    return 0;
  }
}

// const $search = $('<input>', {type: 'text', size: 40, placeholder: 'Enter regex query (enter to open all)'});
// $search.keyup((e) => {
//   // Open up all match specs
//   if (e.keyCode === 13) {
//     const href = encodeUrlParams(Object.assign(urlParams, {runSpec: '.*' + query + '.*'}));
//     window.open(href);
//   }
//   query = $search.val();
//   renderTable();
// });

function filterTable(query) {

  // Get rows
  const rows = $('tr').slice(1); // Skip the header row

  query = query.toLowerCase();

  if (query.includes('=')) {
    // Filter columns
    // @TODO placeholder comment
    let a = 2;
  } else {
    // Filter all values
    [].forEach.call(rows, function (row) {
      const fieldValue = $(row).find("td .field-value");
      const values = $(fieldValue).children().not(":last");

      let found = false;
      [].forEach.call(values, function (value) {
        const innerHTML = value.innerHTML.toLowerCase();
        if (innerHTML.includes(query)) {
          found = true;
        }
      });

      if (found) {
        $(row).show();
      } else {
        $(row).hide();
      }

    });
  }
}

function sortColumn(columnName) {
  // Get the index for the column
  const headerRow = $('tr').slice(0)[0];
  const index = headerRow.innerText.split('\t').indexOf(columnName);

  // Get the current direction
  const direction = globalThis.tableDirections[index] || 'desc';

  // A factor based on the direction
  const multiplier = (direction === 'asc') ? 1 : -1;

  // Get rows
  const rows = $('tr').slice(1); // Skip the header row

  // Sort rows
  rows.sort((rowA, rowB) => {
    const fvA = $(rowA).find("td .field-value")[index];
    const fvB = $(rowB).find("td .field-value")[index];
    const valueA = $(fvA).children()[0].innerHTML;
    const valueB = $(fvB).children()[0].innerHTML;
    return multiplier * compareValues(valueA, valueB, columnName);
  });

  // Create a new tbody
  const newTBody = $('<tbody>');

  // // // Append new rows
  [].forEach.call(rows, function (row) {
    newTBody.append(row);
  });

  // Replace the table body with the newly created table body
  const tbody = $('tbody');
  tbody.replaceWith(newTBody);

  // Reverse the direction
  globalThis.tableDirections[index] = direction === 'asc' ? 'desc' : 'asc';
}

////////////////////////////////////////////////////////////

function helpIcon(help, link) {
  // Show a ?
  return $('<a>', {href: link, target: 'blank_', class: 'help-icon'}).append($('<img>', {src: 'img/info-icon.png', width: 15, title: help}));
}

function renderList(items) {
  const $list = $('<span>');
  items.forEach((item, i) => {
    if (i > 0) {
      $list.append(' | ');
    }
    $list.append(item);
  });
  return $list;
}

function renderAccessType(value) {
  const valueToColor = {
    'open': 'lightgreen', // '#c0eec0',  // Slightly lighter than lightgreen
    'limited': 'yellow', // 'papayawhip',
    'closed': 'lightgray', // '#f0b0b0'  // Slightly lighter than lightcoral
  }
  const color = value in valueToColor ? valueToColor[value] : 'mistyrose';
  const textElement = $('<span class="btn">').css({"background-color": color, "min-width": 100}).append(value);
  return textElement;
}

function renderField(schemaField) {
  const text = schemaField.name.replace(/_/g, ' ');
  return $('<div>').append(text).append(helpIcon(schemaField.description, '#'));
}

function renderValueExplanation(type, value, explanation) {
  const converter = new showdown.Converter();
  // Render value
  let renderedValue = $('<div>').append(value);
  if (value === 'unknown' || value === 'none') {
    renderedValue = converter.makeHtml(value);
  } else if (value instanceof Date) {
    let dateString = value.toLocaleDateString('en-us', {year:"numeric", month:"short", day:"numeric"});
    renderedValue = converter.makeHtml(dateString);
  } else if (type === 'list') {
    renderedValue = renderList(value.map((elemValue) => renderValueExplanation(null, elemValue, null)));
  } else if (type === 'url') {
    renderedValue = $('<a>', {href: value, target: 'blank_'}).append(value);
  } else if (type === 'access_type') {
    renderedValue = renderAccessType(value);
  } else if (typeof(value) === 'string') {
    renderedValue = converter.makeHtml(value);
  }
  // Wrap the value in a custom element
  const fieldValue = $('<div>', {class: 'field-value'}).append(renderedValue);

  // Render explanation, if provided
  let renderedExplanation = converter.makeHtml(explanation);
  const fieldExplanation = $('<div>', {class: 'field-explanation'}).append(renderedExplanation);
  return $('<div>').append(fieldValue)
                    .append(fieldExplanation);
}

function renderAssetLink(nameToAsset, assetName) {
  const asset = getField(nameToAsset, assetName);
  if (!asset) {
    return assetName;
  }
  const href = encodeUrlParams({asset: asset.fields.name.value});
  return $('<a>', {href, target: 'blank_'}).append(assetName);
}

function renderAssetLinks(nameToAsset, assetNames) {
  return renderList(assetNames.map((name) => renderAssetLink(nameToAsset, name)));
}

function renderAsset(nameToAsset, assetName) {
  const asset = getField(nameToAsset, assetName);
  if (!asset) {
    return renderError('Invalid asset: ' + assetName);
  }

  const $card = $('<div>');

  $card.append($('<h3>').append(asset.fields.name.value));

  // Render upstream and downstream assets
  $card.append($('<div>', {class: 'block'}).append('Upstream: ').append(renderAssetLinks(nameToAsset, asset.fields.dependencies.value)));
  $card.append($('<div>', {class: 'block'}).append('Downstream: ').append(renderAssetLinks(nameToAsset, asset.downstreamAssets)));

  // Render a single asset
  const $table = $('<table>', {class: 'table'});
  const $tbody = $('<tbody>');
  asset.schema.fields.forEach((schemaField) => {
    const value = asset.fields[schemaField.name].value;
    const explanation = asset.fields[schemaField.name].explanation;

    $tbody.append($('<tr>')
      .append($('<td>').append(renderField(schemaField)))
      .append($('<td>').append(schemaField.name === 'dependencies' ? renderAssetLinks(nameToAsset, value) : renderValueExplanation(schemaField.type, value, explanation)))
    );
  });

  $table.append($tbody);
  $card.append($table);

  return $card;
}

function renderFieldName(fieldName) {
  // Capitalizes and removes '_' from a fieldName (which should be a field name
  // from the schema, such as created_date, name, etc.)
  const capitalized = fieldName.charAt(0).toUpperCase() + fieldName.slice(1);
  if (capitalized === 'Modality') {
    return 'Modality (In; Out)'
  }
  return capitalized.replace('_', ' ');
}

/**
 * Renders a table given the column properties.
 * @param {Array.<Asset>} selectedAssets - Array of the assets that will be
 *   rendered in the custom table.
 * @param {Object.<string, Asset>} allNameToAsset - Object mapping the names of
 *   all the assets in the ecosystem to their Asset representation.
 * @param {Array.<string>} columnNames - Columns that will be included in the
 *   table.
 */
function renderCustomTable(selectedAssets, allNameToAsset, columnNames) {
  const $table = $('<table>', {class: 'table'});
  const $thead = $('<thead>');
  const $headRow = $('<tr>');
  // Add column names
  columnNames.forEach((columnName, index) => {
    const renderedName = renderFieldName(columnName);
    const onclickString = 'sortColumn(\'' + renderedName + '\')';
    $headRow.append($('<th>', {onClick: onclickString}).append(renderedName));
  });
  $thead.append($headRow);
  $table.append($thead);
  // Keep track of the directions, used to sort asc and desc
  globalThis.tableDirections = Array.from(columnNames).map(function (header) {
    return '';
  });
  // Add body
  const $tbody = $('<tbody>');
  selectedAssets.forEach((asset) => {
    const $bodyRow = $('<tr>');
    columnNames.forEach((columnName) => {
      // Set the default value
      let tdValue = 'n/a';

      // Render the field value
      if (columnName === 'type') {
        tdValue = renderValueExplanation('', asset.type, null);
      } else if (columnName === 'name') {
        const href = encodeUrlParams({asset: asset.fields.name.value});
        const fieldValue = $('<a>', {href, target: 'blank_'}).append(asset.fields.name.value);
        tdValue = $('<div>', {class: 'field-value'}).append(fieldValue);
      } else if (columnName === 'dependencies') {
        const fieldValue = renderAssetLinks(allNameToAsset, asset.fields.dependencies.value);
        tdValue = $('<div>', {class: 'field-value'}).append(fieldValue);
      } else {
        //
        let type = '';
        asset.schema.fields.forEach(item => item.name === columnName ? type = item.type : '');
        const value = columnName in asset.fields ? asset.fields[columnName].value : tdValue;
        const explanation = columnName in asset.fields ? asset.fields[columnName].explanation : tdValue;
        tdValue = renderValueExplanation(type, value, explanation);
      }
      $bodyRow.append($('<td>').append(tdValue));
    });
    $tbody.append($bodyRow);
  });
  $table.append($tbody);
  return $table;
}

function renderAssetsTable(nameToAsset) {
  // Render the table
  const columnNames = [
    'type', 'name', 'organization', 'created_date', 'size', 'modality', 'access', 'license',
    'dependencies',
  ];
  const assets = Object.keys(nameToAsset).map((key) => (nameToAsset[key]));
  const $table = renderCustomTable(assets, nameToAsset, columnNames);

  return $table;
}

function renderAssetsGraph(nameToAsset) {
  // Render the ecosystem graph
  const $graph = $('<div>', {class: 'graph'});

  const nodes = [];
  const edges = [];

  const typeToShape = {
    'dataset': 'ellipse',
    'model': 'square',
    'application': 'hexagon',
    'undocumented': 'ellipse',
  };

  const typeToColor = {
    'dataset': 'orange',
    'model': 'dodgerblue',
    'application': 'firebrick',
    'undocumented': 'gray',
  };

  Object.values(nameToAsset).forEach((asset) => {
    nodes.push({
      data: {
        id: asset.fields.name.value,
        shape: typeToShape[asset.type],
        color: typeToColor[asset.type],
      },
    });

    asset.fields.dependencies.value.forEach((dep) => {
      edges.push({
        data: {
          id: asset.fields.name.value + '->' + dep,
          source: dep,
          target: asset.fields.name.value,
        },
      });
    });
  });

  undocumentedDeps = Array.from(new Set(
    edges.map(edge => edge.data.source)
         .filter(dep => !(dep in nameToAsset))
  )).sort();

  console.log("The following dependencies are undocumented.");
  console.log(undocumentedDeps);

  undocumentedDeps.forEach(dep => {
    nodes.push({
      data: {
        id: dep,
        shape: typeToShape["undocumented"],
        color: typeToColor["undocumented"],
      },
    });
  });

  $graph.ready(() => {
    const cy = cytoscape({
      container: $graph.get(0),
      elements: {nodes, edges},
      layout: {
        name: 'cose',
        randomize: false,
        componentSpacing: 100,
        nodeOverlap: 10,
        //nodeDimensionsIncludeLabels: true,
        nodeRepulsion: function( node ){ return 4096; },
        //padding: 30,
        gravity: 0.5
      },
      style: [
        {
          selector: 'node',
          style: {
            label: 'data(id)',
            shape: 'data(shape)',
            'background-color': 'data(color)',
            'text-wrap': 'wrap',
            'text-max-width': 30,
            'text-valign': 'center',
            'color': 'white',
            'text-outline-width': 3,
            'text-outline-color': 'data(color)',
            'text-outline-opacity': 1,
            'padding': 40,
          },
        },
        {
          selector: 'edge',
          style: {
            'width': 3,
            'curve-style': 'straight',
            'target-arrow-shape': 'triangle',
            'target-arrow-color': '#7CB342',
            'line-fill': 'linear-gradient',
            'line-gradient-stop-colors': '#392215 #7CB342',
            'target-endpoint': 'outside-to-node-or-label',
            'line-cap': 'square'
          },
        },
      ],
    });

    cy.on('click', (e) => {
      const data = e.target._private.data;
      const assetName = data.id;
      if (assetName && !data.source) {
        openBrowserLocation({asset: assetName});
      }
    });
  });

  return $graph;
}

////////////////////////////////////////////////////////////

// UI Helpers

function downloadCSV() {
  var downloadLink = document.createElement('a');
  downloadLink.href = 'resources/all_assets.csv';
  downloadLink.download = 'assets.csv';
  document.body.appendChild(downloadLink);
  downloadLink.click();
  document.body.removeChild(downloadLink);
}

function setUpSearch() {
  const $search = $('#table-search');
  let query = '';
  $search.keyup((e) => {
    if (e.keyCode === 13) {
      // Enter @TODO
    }
    query = $search.val();
    filterTable(query);
  });
}

////////////////////////////////////////////////////////////

// Home Page
function renderHomePage(pageContainer) {
  $("nav").hide();
  $.get("components/home.html", function(data){
    pageContainer.append(data);
  });
}

// Table Page
function renderTablePage(pageContainer, nameToAsset) {
  $.get("components/table.html", function(data){
    pageContainer.append(data);
    const tableContainer = $("#table-container");
    setUpSearch();
    const table = renderAssetsTable(nameToAsset);
    tableContainer.append(table);
    $(".field-explanation").toggle();
    // Sort by created_date, name
    sortColumn("Name");
    sortColumn("Created date");
  });

}

// Graph Page
function renderGraphPage(pageContainer, nameToAsset) {
  const graph = renderAssetsGraph(nameToAsset);
  pageContainer.append(graph);
}

function renderPageContent(nameToAsset) {
  const urlParams = decodeUrlParams(window.location.search);
  const pageContainer = $('#main');
  const mode = urlParams.mode || 'home';
  if (urlParams.asset) {
    const content = renderAsset(nameToAsset, urlParams.asset);
    pageContainer.append(content);
  } else if (mode === 'home') {
    renderHomePage(pageContainer);
  } else if (mode === 'table') {
    renderTablePage(pageContainer, nameToAsset);
  } else if (mode === 'graph') {
    renderGraphPage(pageContainer, nameToAsset);
  } else {
    const content = renderError('Unrecognized mode: ' + mode + '.');
    pageContainer.append(content);
  }
}

function renderNavBar() {
  $.get("components/nav.html", function(data){
    $("#nav-placeholder").replaceWith(data);
  });
}

function loadAssetsAndRenderPageContent() {

  const paths = [
    'assets/zhejiang.yaml',
    'assets/you.yaml',
    'assets/yandex.yaml',
    'assets/xwin.yaml',
    'assets/xverse.yaml',
    'assets/xai.yaml',
    'assets/writer.yaml',
    'assets/wayve.yaml',
    'assets/vilm.yaml',
    'assets/viable.yaml',
    'assets/vago.yaml',
    'assets/uwashington.yaml',
    'assets/uw.yaml',
    'assets/unknown.yaml',
    'assets/uae.yaml',
    'assets/twelvelabs.yaml',
    'assets/tsinghua.yaml',
    'assets/triml.yaml',
    'assets/trevor.yaml',
    'assets/transformify.yaml',
    'assets/toronto.yaml',
    'assets/tokyo.yaml',
    'assets/together.yaml',
    'assets/tiger.yaml',
    'assets/suno.yaml',
    'assets/stonybrook.yaml',
    'assets/stanford.yaml',
    'assets/stability.yaml',
    'assets/stability_ai.yaml',
    'assets/spotify.yaml',
    'assets/speak.yaml',
    'assets/soochow.yaml',
    'assets/snap.yaml',
    'assets/skt.yaml',
    'assets/singapore.yaml',
    'assets/shop.yaml',
    'assets/shanghai.yaml',
    'assets/sciphi.yaml',
    'assets/sana.yaml',
    'assets/samba.yaml',
    'assets/salesforce.yaml',
    'assets/rwkv.yaml',
    'assets/runway_ai,_inc..yaml',
    'assets/robin.yaml',
    'assets/reka.yaml',
    'assets/reexpress.yaml',
    'assets/rakuten.yaml',
    'assets/qwen_team.yaml',
    'assets/quora.yaml',
    'assets/quizlet.yaml',
    'assets/portkey.yaml',
    'assets/pleias.yaml',
    'assets/perplexity.yaml',
    'assets/peking.yaml',
    'assets/paladin.yaml',
    'assets/oxford.yaml',
    'assets/othersideai.yaml',
    'assets/osu.yaml',
    'assets/orion.yaml',
    'assets/openx.yaml',
    'assets/openlemur.yaml',
    'assets/openbmb.yaml',
    'assets/openai.yaml',
    'assets/ollama.yaml',
    'assets/oasst.yaml',
    'assets/nvidia.yaml',
    'assets/nucleus.yaml',
    'assets/nous.yaml',
    'assets/notion.yaml',
    'assets/nolano.yaml',
    'assets/nextdoor.yaml',
    'assets/neeva.yaml',
    'assets/ncsoft.yaml',
    'assets/naver.yaml',
    'assets/nanyang.yaml',
    'assets/mosaic.yaml',
    'assets/moreh.yaml',
    'assets/moonhub.yaml',
    'assets/mistral.yaml',
    'assets/mila.yaml',
    'assets/microsoft.yaml',
    'assets/meta.yaml',
    'assets/maya.yaml',
    'assets/mathai.yaml',
    'assets/lmsys.yaml',
    'assets/llm360.yaml',
    'assets/linkedin.yaml',
    'assets/lg.yaml',
    'assets/lg_ai_research.yaml',
    'assets/lehigh.yaml',
    'assets/latitude.yaml',
    'assets/laion.yaml',
    'assets/laion_ev.yaml',
    'assets/kunlun.yaml',
    'assets/ktai.yaml',
    'assets/kotoba.yaml',
    'assets/konan.yaml',
    'assets/khan.yaml',
    'assets/kakaobrain.yaml',
    'assets/kaist.yaml',
    'assets/juni.yaml',
    'assets/internlm.yaml',
    'assets/instacart.yaml',
    'assets/inflection.yaml',
    'assets/ideogram.yaml',
    'assets/idea.yaml',
    'assets/ibm.yaml',
    'assets/huggingface.yaml',
    'assets/hubspot.yaml',
    'assets/h2oai.yaml',
    'assets/greenbit.yaml',
    'assets/google.yaml',
    'assets/glm.yaml',
    'assets/genmo.yaml',
    'assets/fuse.yaml',
    'assets/faraday.yaml',
    'assets/evolutionaryscale.yaml',
    'assets/epfl.yaml',
    'assets/ens.yaml',
    'assets/eleutherai.yaml',
    'assets/duolingo.yaml',
    'assets/duckduckgo.yaml',
    'assets/dibt.yaml',
    'assets/deepseek.yaml',
    'assets/deepnight.yaml',
    'assets/deepmind.yaml',
    'assets/deci.yaml',
    'assets/databricks.yaml',
    'assets/cresta.yaml',
    'assets/continue.yaml',
    'assets/compute_01_research.yaml',
    'assets/columbia.yaml',
    'assets/cohere.yaml',
    'assets/cognitive.yaml',
    'assets/cognition.yaml',
    'assets/cmu.yaml',
    'assets/chatglm.yaml',
    'assets/character.yaml',
    'assets/cerebras.yaml',
    'assets/causallm.yaml',
    'assets/casia.yaml',
    'assets/cartesia.yaml',
    'assets/cagliostro.yaml',
    'assets/bytedance.yaml',
    'assets/brex.yaml',
    'assets/boston.yaml',
    'assets/bloomberg.yaml',
    'assets/bigscience.yaml',
    'assets/bigcode.yaml',
    'assets/berkeley.yaml',
    'assets/beitech.yaml',
    'assets/be_my_eyes.yaml',
    'assets/bain.yaml',
    'assets/baidu.yaml',
    'assets/baichuan.yaml',
    'assets/baai.yaml',
    'assets/avignon.yaml',
    'assets/autogpt.yaml',
    'assets/assembly.yaml',
    'assets/aspia_space_institu.yaml',
    'assets/argonne.yaml',
    'assets/argilla.yaml',
    'assets/apple.yaml',
    'assets/anthropic.yaml',
    'assets/amazon.yaml',
    'assets/alibaba.yaml',
    'assets/aleph_alpha.yaml',
    'assets/ai21.yaml',
    'assets/ai2.yaml',
    'assets/adobe.yaml',
    'assets/adept.yaml',
    'assets/360.yaml',
    'assets/01ai.yaml'
  ];

  $.get('js/schemas.yaml', {}, (response) => {
    // First read the schema...
    const typeToSchema = {};  // asset type (e.g., "model") => schema
    const raw = jsyaml.load(response);
    console.log('Read schemas', raw);
    for (const name in raw) {
      typeToSchema[name] = new Schema(name, raw[name]);
    }

    // Then read all the assets in parallel
    const nameToAsset = {};  // asset name (e.g., "GPT-3") => asset
    $.when(
      ...paths.map((path) => {
        return $.get(path, {}, (response) => {
          const raw = jsyaml.load(response);
          console.log('Read assets', path, raw);
          raw.forEach((item) => {
            nameToAsset[item.name] = new Asset(item, getField(typeToSchema, item.type));
          });
        })
      })
    ).then(() => {
      updateDownstreamAssets(nameToAsset);
      renderPageContent(nameToAsset);
    });
  });
};


================================================
FILE: js/schemas.yaml
================================================
---

dataset:
  - name: name
    description: Name of the dataset (must be a unique identifier).
  # General
  - name: organization
    description: Organization that curated the dataset.
  - name: description
    description: Description of the dataset.
  - name: created_date
    description: When the dataset was created.
  - name: url
    type: url
    description: Link to website or paper that provides a detailed description of
      the dataset.
  - name: datasheet
    type: url
    description: Link to the datasheet describing the dataset.
  - name: modality
    description: Modalities represented in the dataset (e.g., text, images, videos).
  - name: size
    description: How big (uncompressed) the dataset is.
  - name: sample
    type: list
    description: Small sample of content from the dataset.
  - name: analysis
    description: Description of any analysis (evaluation) that was done on the dataset.
  # Construction
  - name: dependencies
    type: list
    description: A list of assets (e.g., datasets, models, applications) that were
      used to create this dataset.
  - name: included
    description: Description of what data is included.
  - name: excluded
    description: Description of what data is excluded (e.g., filtered out) and why.
  - name: quality_control
    description: What measures were taken to ensure quality, safety, and mitigate
      harms.
  # Downstream
  - name: access
    type: access_type
    description: Who can access (and use) the dataset.
  - name: license
    description: License of the dataset.
  - name: intended_uses
    description: Description of what the dataset can be used for downstream.
  - name: prohibited_uses
    description: Description of what the dataset should not be used for downstream.
  - name: monitoring
    description: Description of measures taken to monitor downstream uses of this
      dataset.
  - name: feedback
    description: How downstream problems with this dataset should be reported.

model:
  - name: name
    description: Name of the model (must be a unique identifier).
  # General
  - name: organization
    description: Organization that created the model.
  - name: description
    description: Description of the model.
  - name: created_date
    description: When the model was created.
  - name: url
    type: url
    description: Link to website or paper that provides a detailed description of
      the model.
  - name: model_card
    type: url
    description: Link to the model card describing this model.
  - name: modality
    description: Modalities represented in the model (e.g., Text, Text (English),
      Code, Code (Python), Video, Image).
  - name: analysis
    description: Description of any analysis that was done on the model.
  - name: nationality
    description: Description of which nationality the model is affiliated with.
  - name: size
    description: Size (and shape) of the model (e.g., number of parameters in the
      model)
  # Construction
  - name: dependencies
    type: list
    description: A list of assets (e.g., datasets, models, applications) that were
      used to create this model.
  - name: training_emissions
    description: Estimate of the carbon emissions used to create this model.
  - name: training_time
    description: How much time it took to train this model.
  - name: training_hardware
    description: What hardware was used to train the model.
  - name: quality_control
    description: What measures were taken to ensure quality, safety, and mitigate
      harms.
  # Downstream
  - name: access
    type: access_type
    description: Who can access (and use) this model.
  - name: license
    description: License of the model.
  - name: intended_uses
    description: Description of what the model can be used for downstream.
  - name: prohibited_uses
    description: Description of what the model should not be used for downstream.
  - name: monitoring
    description: Description of measures taken to monitor downstream uses of this
      model.
  - name: feedback
    description: How downstream problems with this model should be reported.

application:
  - name: name
    description: Name of the product or service.
  - name: organization
    description: Organization that created the application.
  - name: description
    description: Description of the application.
  - name: created_date
    description: When the application was created.
  - name: url
    type: url
    description: Link to website describing the application.
  # Construction
  - name: dependencies
    type: list
    description: A list of assets (e.g., datasets, models, applications) that were
      used to create this application.
  - name: adaptation
    description: How the model was adapted (e.g., fine-tuned) to produce the derived
      task model.
  - name: output_space
    description: Description of the output space (e.g., generation, ranking, etc.).
  - name: quality_control
    description: What measures were taken to ensure quality, safety, and mitigate
      harms.
  # Downstream
  - name: access
    type: access_type
    description: Who can access (and use) the application.
  - name: license
    description: License of the application.
  - name: terms_of_service
    type: url
    description: Link to the terms of service.
  - name: intended_uses
    description: Description of what the application can be used for downstream.
  - name: prohibited_uses
    description: Description of what the application should not be used for downstream.
  - name: monitoring
    description: Description of measures taken to monitor downstream uses of this
      application.
  - name: feedback
    description: How downstream problems with this application should be reported.
  # Deployment
  - name: monthly_active_users
    description: Rough order of magnitude of number of active users.
  - name: user_distribution
    description: Demographic and geographic diversity.
  - name: failures
    description: Description of known failures (and errors).


================================================
FILE: pre-commit.sh
================================================
#!/bin/bash

# This script fails when any of its commands fail.
set -e

if ! [ -e venv ]; then
  python3 -m pip install virtualenv
  python3 -m virtualenv -p python3 venv
fi

venv/bin/pip install -r requirements.txt
venv/bin/pip check

# Run YAML formatter and linter.
echo ""
echo "Running yamlfix"
venv/bin/yamlfix --config-file ./yamlfix.toml ./**/*.yaml
echo ""
echo "Checking for YAML formatting"
venv/bin/yamllint . || (
  echo "[-] YAML formatting failed. Fix the errors and run the check again."
  echo ""
  exit 1
)
echo "[+] YAML formatting succeeded!"
echo ""

# Signal the end of the script.
echo "Finished running pre-commit.sh"


================================================
FILE: requirements.txt
================================================
yamllint==1.26.3
yamlfix==1.2.0


================================================
FILE: resources/.gitkeep
================================================


================================================
FILE: resources/all_assets.csv
================================================
access,analysis,created_date,datasheet,dependencies,description,excluded,feedback,included,intended_uses,license,modality,monitoring,name,nationality,organization,prohibited_uses,quality_control,sample,size,type,url,model_card,training_emissions,training_hardware,training_time,adaptation,failures,monthly_active_users,output_space,terms_of_service,user_distribution,value
open,Models fine-tuned on EXMODD and earlier dataset Image-Chat and then evaluated on Image-Chat validation set.,2023-10-17,,"['YFCC100M', 'Image-Chat']",EXMODD (Explanatory Multimodal Open-Domain Dialogue dataset) is a dataset built off the proposed MDCF (Multimodal Data Construction Framework).,,Feedback can be sent to authors via poplpr@bit.edu.cn,,,MIT,"image, text",,EXMODD,China,Beijing Institute of Technology,,,[],unknown,dataset,https://arxiv.org/pdf/2310.10967.pdf,,,,,,,,,,,
open,"Evaluated on standard benchmarks including MMLU, CEval, and DROP.",2023-11-13,,['Llama 2'],MiniMA is a smaller finetuned Llama 2 model adapted for Chinese.,,https://huggingface.co/GeneZC/MiniMA-3B/discussions,,,Llama 2,text; text,unknokwn,MiniMA,China,Beijing Institute of Technology,,,,3B parameters (dense),model,https://github.com/GeneZC/MiniMA,https://huggingface.co/GeneZC/MiniMA-3B,unknown,8 A100 80G GPUs,unknown,,,,,,,
open,,,,['Azure Cognitive Services for Vision'],"More than 40 percent of LinkedIn's feed posts include at least one image. We want every member to have equal access to opportunity and are committed to ensuring that we make images accessible to our members who are blind or who have low vision so they can be a part of the online conversation. With Azure Cognitive Service for Vision, we can provide auto-captioning to edit and support alt. text descriptions.",,,,,unknown,,,LinkedIn,USA,LinkedIn,,,,,application,https://www.linkedin.com/,,,,,,,,,,,
open,"Evaluated on the OpenLLM benchmarks and, on release, outperforms all other 7B models on the OpenLLM Leaderboard.",2023-12-12,,[],DeciLM is a LLM that on release ranks as the fastest and most accurate model of its size.,,,,This model is intended for commercial and research use in English and can be fine-tuned for use in other languages.,Apache 2.0,text; text,unknown,DeciLM,Israel,Deci,,,,7B parameters (dense),model,https://deci.ai/blog/introducing-decilm-7b-the-fastest-and-most-accurate-7b-large-language-model-to-date,https://deci.ai/model-zoo/decilm-7b/,unknown,NVIDIA A10 GPUs,unknown,,,,,,,
open,"Authors use the dataset to learn a joint text-video embedding by leveraging more than 130M video clip-caption pairs. They then evaluate the learned embeddings on the tasks of localizing steps in instructional videos of CrossTask and textbased video retrieval on YouCook2, MSR-VTT and LSMDC datasets. They show that their learned embedding can perform better compared to models trained on existing carefully annotated but smaller video description datasets.",2019-06-07,https://arxiv.org/pdf/1906.03327.pdf,['YouTube'],"HowTo100M is a large-scale dataset of narrated videos with an emphasis on instructional videos where content creators teach complex tasks with an explicit intention of explaining the visual content on screen. HowTo100M features a total of 136M video clips with captions sourced from 1.2M Youtube videos (15 years of video) and 23k activities from domains such as cooking, hand crafting, personal care, gardening or fitness.","Categories such as Relationships and Finance and Business, that may be more abstract, are excluded. Videos with less than 100 views are removed. Authors also ignore videos that have less than 100 words. Videos longer than 2,000 seconds are removed. As some videos may appear in several tasks, the videos are deduplicated based on YouTube IDs.",,"The dataset features 1.22 million videos from YouTube with a primary focus on videos containing ""visual tasks"", that involve some interaction with the physical world (e.g. Making peanut butter, Pruning a tree) as compared to others that are more abstract (e.g. Ending a toxic relationship, Choosing a gift). To obtain predominantly visual tasks, the authors limit them to one of 12 categories (Food and Entertaining, Home and Garden, Hobbies and Crafts, Cars & Other Vehicles, Pets and Animals, Holidays and Traditions, Personal Care and Style, Sports and Fitness, Health, Education and Communications, Arts and Entertainment, Computers and Electronics). They also restrict to the top 200 YouTube search results, as the latter ones may not be related to the query task.",,Apache 2.0,"text, video",,HowTo100M,unknown,"École Normale Supérieure, Inria","No uses are explicitly prohibited by the authors. They note the following limitations of the dataset: ""We note that the distribution of identities and activities in the HowTo100M dataset may not be representative of the global human population and the diversity in society. Please be careful of unintended societal, gender, racial and other biases when training or deploying models trained on this data.""
",,[],136M video clips,dataset,https://arxiv.org/pdf/1906.03327.pdf,,,,,,,,,,,
open,unknown,2024-03-20,,[],"Common Corpus is the largest public domain dataset released for training Large Language Models (LLMs). This dataset includes 500 billion words from a diverse range of cultural heritage initiatives and is the largest corpus in English, French, Dutch, Spanish, German and Italian. It supports efforts to train fully open LLMs on sources without copyright concerns.",The data excluded are those that have copyright issues.,unknown,"The dataset includes 500 billion words from a wide diversity of cultural heritage initiatives. It also has the largest English-speaking dataset to date with 180 billion words, including a major US collection of 21 million digitized newspapers and large monographs datasets collected by digital historian Sebastian Majstorovic. It also contains a huge volume of data in French (110 billion words), German (30 billion words), Spanish, Dutch and Italian, as well as data in low-resource languages that are currently underrepresented.","The dataset is intended to support open and reproducible AI research, enhancing accessibility, diversity, and democracy in AI by enabling everyone to explore large models.",,text,unknown,Common Corpus,USA,Pleias,It should not be used for tasks that infringe on copyright laws.,"All data included in the corpus are from fully open and auditable sources, ensuring they are copyright-free.",[],500 billion words,dataset,https://huggingface.co/blog/Pclanglais/common-corpus,,,,,,,,,,,
open,,2022-03-25,,[],CodeGen is a language model for code,,,,,"none (model weights), BSD-3-Clause (code)","code, text; code, text",,CodeGen,USA,Salesforce,,,,16B parameters (dense),model,https://arxiv.org/abs/2203.13474,,,Unspecified Salesforce Compute (TPU-V4s),,,,,,,,
open,,2022-01-28,,"['ViT-B', 'BERT', 'COCO', 'Visual Genome', 'Conceptual Captions', 'Conceptual 12M', 'SBU Captions', 'LAION-115M']",,,,,,BSD-3-Clause,text; image,,BLIP,USA,Salesforce,,,,unknown,model,https://arxiv.org/abs/2201.12086,,,,,,,,,,,
open,,2022-01-28,,['LAION-400M'],,,,,,BSD-3-Clause,"image, text",,LAION-115M,USA,Salesforce,,,[],115M image-text pairs,dataset,https://arxiv.org/abs/2201.12086,,,,,,,,,,,
limited,,2023-03-07,,['ChatGPT API'],EinsteinGPT is generative AI for customer relationship management (CRFM).,,,,,unknown,,,EinsteinGPT,USA,Salesforce,,,,,application,https://www.salesforce.com/products/einstein/overview/?d=cta-body-promo-8,,,,,,,,,,,
open,"BLIP-2 achieves state-of-the-art performance on various vision-language tasks, despite having significantly fewer trainable parameters than existing methods",2023-01-30,,['OPT'],BLIP-2 is a model that employs a generic and efficient pretraining strategy that bootstraps vision-language pre-training from off-the-shelf frozen pre-trained image encoders and frozen large language models.,,https://huggingface.co/Salesforce/blip2-opt-2.7b/discussions,,,MIT,text; image,,BLIP-2,USA,Salesforce,,,,2.7B parameters (dense),model,https://arxiv.org/pdf/2301.12597.pdf,https://huggingface.co/Salesforce/blip2-opt-2.7b,unknown,unknown,less than 9 days,,,,,,,
open,Moirai has undergone a comprehensive evaluation in both in-distribution and out-of-distribution settings. It demonstrated its capabilities as a zero-shot forecaster and delivered competitive or superior performance compared to full-shot models.,2024-03-19,,['LOTSA'],"Moirai is a cutting-edge time series foundation model, offering universal forecasting capabilities. It stands out as a versatile time series forecasting model capable of addressing diverse forecasting tasks across multiple domains, frequencies, and variables in a zero-shot manner.",,https://huggingface.co/Salesforce/moirai-1.0-R-large/discussions,,"Moirai can be used for time series forecasting in multiple domains. It offers robust zero-shot forecasting capabilities and eliminates the need for additional data, extensive computational resources, and expert input for achieving accurate forecasts.",Apache 2.0,time-series; time-series,unknown,Moirai,USA,Salesforce,unknown,The performance of Moirai was evaluated through in-distribution and out-of-distribution settings.,,311M parameters,model,https://blog.salesforceairesearch.com/moirai/,https://huggingface.co/Salesforce/moirai-1.0-R-large,unknown,NVIDIA A100 40G GPUs,unknown,,,,,,,
open,,2024-03-19,https://huggingface.co/datasets/Salesforce/lotsa_data,[],LOTSA is the largest collection of open time series datasets with 27B observations across nine domains.,,https://huggingface.co/datasets/Salesforce/lotsa_data/discussions,"domain observations in energy, transport, climate, cloudops, web, sales, nature, econ/finance, and healthcare",pre-training Large Time Series Models,Apache 2.0,time-series,,LOTSA,USA,Salesforce,,,[],27B observations,dataset,https://arxiv.org/pdf/2402.02592.pdf,,,,,,,,,,,
open,,2022-12-15,,['The Pile'],,,,,,bigscience-bloom-rail-1.0,text; text,,BioMedLM,USA,Stanford,,,,2.7B parameters (dense),model,https://crfm.stanford.edu/2022/12/15/pubmedgpt.html,,,,,,,,,,,
open,Evaluated on own framework that tests domain-specific tasks in medical field.,2022-11-23,,"['Stable Diffusion', 'RoentGen radiology dataset']",RoentGen is a generative medical imaging model that can create visually convincing X-ray images.,,,,,,text; image,,RoentGen,USA,Stanford,,,,330M parameters (dense),model,https://arxiv.org/pdf/2211.12737.pdf,,unknown,64 A100 GPUs,60k training steps per day,,,,,,,
open,"Evaluated on three physical control tasks, drawing, steering, and human body movement on various dynamics",2023-06-12,,"['GPT-2', 'BABEL', 'text-davinci-003']",Model trained to generate language corrections for physical control tasks.,,,,,MIT,human trajectories; text,,CORGI,USA,Stanford,,,,124M parameters (dense),model,https://arxiv.org/pdf/2306.07012.pdf,,,one NVIDIA A40 GPU,unknown,,,,,,,
open,,2023-03-13,https://huggingface.co/datasets/tatsu-lab/alpaca,['text-davinci-003'],"Alpaca dataset consistes of 52,000 instruction-following demonstrations generated in the style of the [Self-Instruct framework](https://github.com/yizhongw/self-instruct) using OpenAI's text-davinci-003 engine. This instruction data can be used to conduct instruction-tuning for language models and make the language model follow instruction better.
",,Feedback can be provided on [[GitHub Issues]](https://github.com/tatsu-lab/stanford_alpaca/issues).,,Alpaca is intended and licensed for research use only.,CC BY-NC 4.0,text (English),,Alpaca dataset,USA,Stanford,,,[],52K instruction-following demonstrations,dataset,https://crfm.stanford.edu/2023/03/13/alpaca.html,,,,,,,,,,,
open,,2023-03-13,,"['LLaMa', 'Alpaca dataset']","Alpaca-7B is an instruction-following model fine-tuned from the LLaMA 7B model on 52K instruction-following demonstrations.
",,Feedback can be provided on [[GitHub Issues]](https://github.com/tatsu-lab/stanford_alpaca/issues).,,Alpaca is intended and licensed for research use only.,CC BY NC 4.0 (model weights),text (English),,Alpaca,USA,Stanford,,,,7B parameters (dense model),model,https://crfm.stanford.edu/2023/03/13/alpaca.html,,unknown,,,,,,,,,
open,"Merlin has been comprehensively evaluated on 6 task types and 752 individual tasks. The non-adapted (off-the-shelf) tasks include zero-shot findings classification, phenotype classification, and zero-shot cross-modal retrieval, while model adapted tasks include 5-year chronic disease prediction, radiology report generation, and 3D semantic segmentation. It has undergone internal validation on a test set of 5,137 CTs, and external validation on 7,000 clinical CTs and on two public CT datasets (VerSe, TotalSegmentator).",2024-09-08,,[],"Merlin is a 3D Vision Language Model that's designed for interpretation of abdominal computed tomography (CT) scans. It uses both structured Electronic Health Record (EHR) and unstructured radiology reports for supervision without requiring additional manual annotations. The model was trained on a high-quality clinical dataset of paired CT scans, EHR diagnosis codes, and radiology reports and was evaluated on 6 task types and 752 individual tasks.",,"Feedback and reports for problems with the model should likely be routed to Stanford Center for Artificial Intelligence in Medicine and Imaging, or the corresponding author of the research (louis.blankemeier@stanford.edu).",,"This model is intended for use in the interpretation of abdominal computed tomography (CT) scans, chronic disease prediction, radiology report generation, and 3D semantic segmentation.",Unknown,image; text,Unknown,Merlin,unknown,"Stanford Center for Artificial Intelligence in Medicine and Imaging, Stanford University","The model should not be used outside of healthcare-related context, such as for personal or non-medical commercial purposes.",The model has undergone extensive evaluations and also internal and external validation tests.,,Unknown,model,https://arxiv.org/pdf/2406.06512,unknown,Unknown,Single GPU.,Unknown,,,,,,,
open,Evaluated on AlpacaEval benchmark against SOTA LLMs.,2023-09-20,,[],"Xwin-LM is a LLM, which on release, ranked top 1 on AlpacaEval, becoming the first to surpass GPT-4 on this benchmark.",,https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1/discussions,,,LLaMA2,text; text,,Xwin-LM,USA,Xwin,,,,70B parameters (dense),model,https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1,https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1,unknown,unknown,unknown,,,,,,,
limited,,2023-04-13,,"['Jurassic-2', 'Claude', 'Stable Diffusion', 'Amazon Titan', 'Claude 2', 'Cohere Command']","Bedrock is a new service that makes FMs from AI21 Labs, Anthropic, Stability AI, and Amazon accessible via an API. Bedrock is intended for customers to build and scale generative AI-based applications using FMs, democratizing access for all builders. using an API.",,,,allowing companies to incorporate generative AI into their business models,unknown,,,Bedrock,USA,Amazon,,,,,application,https://aws.amazon.com/bedrock/,,,,,,,,foundation models made accessible via an API,https://aws.amazon.com/service-terms/,,
open,Evaluated against benchmarks that are specifically designed to assess the capabilities of LLMs in handling longer contexts.,2023-08-08,,['Falcon-40B'],"FalconLite2 is a fine-tuned and quantized Falcon language model, capable of processing long (up to 24K tokens) input sequences.",,https://huggingface.co/amazon/FalconLite2/discussions,,,Apache 2.0,text; text,,FalconLite2,USA,Amazon,,,,40B parameters (dense),model,https://huggingface.co/amazon/FalconLite2,https://huggingface.co/amazon/FalconLite2,unknown,unknown,unknown,,,,,,,
open,Chronos has been evaluated comprehensively on 42 datasets both in the in-domain (15 datasets) and zero-shot settings (27 datasets). Chronos outperforms task specific baselines in the in-domain setting and is competitive or better than trained models in the zero-shot setting.,2024-03-13,,['T5'],"Chronos is a family of pretrained time series forecasting models based on language model architectures. A time series is transformed into a sequence of tokens via scaling and quantization, and a language model is trained on these tokens using the cross-entropy loss. Once trained, probabilistic forecasts are obtained by sampling multiple future trajectories given the historical context.",,https://github.com/amazon-science/chronos-forecasting/discussions,,"Chronos can be used for zero-shot time series forecasting on univariate time series from arbitrary domains and with arbitrary horizons. Chronos models can also be fine-tuned for improved performance of specific datasets. Embeddings from Chronos encoder may also be useful for other time series analysis tasks such as classification, clustering, and anomaly detection.",Apache 2.0,time-series; time-series,,Chronos,USA,Amazon,,"Chronos was evaluated rigorously on 42 datasets, including 27 in the zero-shot setting against a variety of statistical and deep learning baselines.",,710M parameters (dense),model,https://github.com/amazon-science/chronos-forecasting,https://huggingface.co/amazon/chronos-t5-large,,8 NVIDIA A100 40G GPUs,63 hours on p4d.24xlarge EC2 instance,,,,,,,
limited,Amazon Nova Pro is capable of processing up to 300K input tokens and sets new standards in multimodal intelligence and agentic workflows that require calling APIs and tools to complete complex workflows. It achieves state-of-the-art performance on key benchmarks including visual question answering ( TextVQA ) and video understanding ( VATEX ).,2024-12-03,,[],"A new generation of state-of-the-art foundation models (FMs) that deliver frontier intelligence and industry leading price performance, available exclusively in Amazon Bedrock. Amazon Nova understanding models excel in Retrieval-Augmented Generation (RAG), function calling, and agentic applications.",,unknown,,"You can build on Amazon Nova to analyze complex documents and videos, understand charts and diagrams, generate engaging video content, and build sophisticated AI agents, from across a range of intelligence classes optimized for enterprise workloads.",unknown,"text, image, video; text",unknown,Amazon Nova (Understanding),USA,Amazon Web Services (AWS),unknown,All Amazon Nova models include built-in safety controls and creative content generation models include watermarking capabilities to promote responsible AI use.,,unknown,model,https://aws.amazon.com/blogs/aws/introducing-amazon-nova-frontier-intelligence-and-industry-leading-price-performance/,unknown,unknown,unknown,unknown,,,,,,,
limited,Amazon Nova Canvas excels on human evaluations and key benchmarks such as text-to-image faithfulness evaluation with question answering (TIFA) and ImageReward.,2024-12-03,,[],"A new generation of state-of-the-art foundation models (FMs) that deliver frontier intelligence and industry leading price performance, available exclusively in Amazon Bedrock.",,unknown,,"You can build on Amazon Nova to analyze complex documents and videos, understand charts and diagrams, generate engaging video content, and build sophisticated AI agents, from across a range of intelligence classes optimized for enterprise workloads.",unknown,"text, image;image, video",unknown,Amazon Nova (Creative Content Generation),USA,Amazon Web Services (AWS),unknown,All Amazon Nova models include built-in safety controls and creative content generation models include watermarking capabilities to promote responsible AI use.,,unknown,model,https://aws.amazon.com/blogs/aws/introducing-amazon-nova-frontier-intelligence-and-industry-leading-price-performance/,unknown,unknown,unknown,unknown,,,,,,,
open,Reports results on standard translation benchmarks across 102 languages in comparison with Google Translate and ChatGPT,2023-05-29,,"['LLaMA', 'CLUE', 'BigTrans parallel dataset']",BigTrans is a model which adapts LLaMA that covers only 20 languages and enhances it with multilingual translation capability on more than 100 languages,,https://huggingface.co/James-WYang/BigTrans/discussions,,Advancing future research in multilingual LLMs,Apache 2.0,text; text,,BigTrans,China,Institute of Automation Chinese Academy of Sciences,,,,13B parameters (dense),model,https://arxiv.org/pdf/2305.18098v1.pdf,https://huggingface.co/James-WYang/BigTrans,unknown,16 A100 GPUs with 80 GB of RAM,unknown,,,,,,,
open,"Evaluated on standard benchmarks for knowledge and language understanding, mathematical reasoning, and programming ability in comparison to similarly sized open-source models.",2023-12-22,,[],YAYI 2 is an open source large language model trained in both English and Chinese.,,https://huggingface.co/wenge-research/yayi2-30b/discussions,,,custom,text; text,,YAYI 2,China,Institute of Automation Chinese Academy of Sciences,,"data is deduplicated, normalized, cleaned, and filtered for toxicity",,30B parameters (dense),model,https://arxiv.org/pdf/2312.14862.pdf,https://huggingface.co/wenge-research/yayi2-30b,unknown,over 1000 A800 GPUs,unknown,,,,,,,
open,"Analyses of the data's composition, document statistics, language/dialectal coverage, topical distribution, and biases are conducted are conducted in the paper [[The Pile Paper]](https://arxiv.org/pdf/2101.00027.pdf).
",2021-01-01,https://arxiv.org/pdf/2201.07311.pdf,[],"A large language model training dataset, used to train GPT-NeoX-20B.
","Authors report that they have excluded some datasets ""because they were too small to be worth spending time or because the English component of the data did not merit inclusion on its own. Three datasets were excluded for other reasons: (1) US Congressional Records were excluded because it ""reflects the opinions and biases of the political class over the past 200 years, including segregationism and xenophobia."" (2) Online Fanfiction resources amounting to Hundreds of GiB were excluded on logistical grounds. (3) Literotica, platform where users can upload short-form erotic fiction, was excluded because the authors decided to exclude fanfiction, the corpus would require significant investigation, and corpus contain significant amount of stereotyping [[Appendix B]](https://arxiv.org/pdf/2101.00027.pdf).
","Feedback can be given by emailing the authors at contact at eleuther.ai.
","The Pile data come from 22 sources, with over half of the data being from Common Crawl (Pile-CC; 227GB), fiction and nonfiction books (Books3; 101GB), biomedical articles (PubMed Central; 90GB), and code (Github; 95 GB). Refer to the paper for full decomposition [[Table 1]](https://arxiv.org/pdf/2101.00027.pdf#table.caption.2).
","The Pile was intended to be used as a high quality large text dataset for language modeling tasks, explained in more detail in the paper [[Section 1]](https://arxiv.org/pdf/2101.00027.pdf#section.1).
",MIT,"code, text",,The Pile,USA,EleutherAI,,"In addition to the data inclusion and exclusion decisions, the quality was controlled through filtering for English (pycld2 language classifier), filtering for documents similar to OpenWebText2 (classifier on CommonCrawl), and several forms of deduplication as detailed in the paper [[Appendix C]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.C) [[Appendix D]](https://arxiv.org/pdf/2101.00027.pdf#appendix.1.D).
","['...pot trending topics and the coverage around them. First up, there’s a bit of a visual redesign. Previously, clicking on a trending topic would highlight a story from one publication, and you’d have to scroll down past a live video section to view related stories. Facebook is replacing that system with a simple carousel, which does a better job of showing you different coverage options. To be clear, the change doesn’t affect how stories are sourced, according to Facebook. It’s still the same algorithm pickin...', 'Total knee arthroplasty (TKA) is a promising treatment for endstage osteoarthritis (OA) of the knee for alleviating pain and restoring the function of the knee. Some of the cases with bilateral TKA are symptomatic, necessitating revision arthroplasty in both the knees. A bilateral revision TKA can be done ei', 'On the converse, the set-valued map $\\Phi:[0,3]\\rightrightarrows [0,3]$ $$\\Phi(x):=\\left\\{\\begin{array}{ll} \\{1\\} & \\mbox{ if } 0\\leq x<1\\\\ {}[1,2] & \\mbox{ if } 1\\leq x\\leq 2\\\\ \\{2\\} &', 'This Court thus uses the same interpretation of V.R.C.P. 52(a) as it did *487 under the previous statutory requirement found in 12 V.S.A. § 2385.  In essense, the defendants urge that this Court should reconsider the case of Green Mountain Marble Co. v. Highway Board, supra, and follow the Federal practice of looking to the evide']",825 GB,dataset,https://arxiv.org/pdf/2101.00027.pdf,,,,,,,,,,,
open,,2021-06-04,,['The Pile'],GPT-J is an open-source autoregressive language model.,,,,,Apache 2.0,text; text,,GPT-J,USA,EleutherAI,,,,6B parameters (dense),model,https://arankomatsuzaki.wordpress.com/2021/06/04/gpt-j/,,,TRC (Unspecified # of TPU v3-8s),,,,,,,,
open,,2021-03-21,,['The Pile'],,,,,,MIT,text; text,,GPT-Neo,USA,EleutherAI,,,,2.7B parameters (dense),model,https://github.com/EleutherAI/gpt-neo,,,,,,,,,,,
open,"The model was evaluated on standard NLP benchmarks: LAMBADA, ANLI, HellaSwag, MMLU among others [[Section 4]](http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf#section.4).
",2022-02-02,,['The Pile'],"GPT-NeoX (20B) is an open-sourced autoregressive language model.
",,"Feedback can be provided using the  # 20b channel in EleutherAI Discord group [[EleutherAI Blog Post]](https://blog.eleuther.ai/announcing-20b/). Find the Discord link in the FAQ page [[FAQ]](https://www.eleuther.ai/faq/).
",,"As stated in the model card: ""GPT-NeoX-20B learns an inner representation of the English language that can be used to extract features useful for downstream tasks. The model is best at what it was pretrained for however, which is generating text from a prompt. Due to the generality of the pretraining set, it has acquired the ability to generate completions across a wide range of tasks - from programming to fiction writing [[Model Card]](https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md).""
",Apache 2.0,"text; text, code",,GPT-NeoX,USA,EleutherAI,,,,20B parameters (dense),model,http://eaidata.bmk.sh/data/GPT_NeoX_20B.pdf,https://mystic.the-eye.eu/public/AI/models/GPT-NeoX-20B/20B_model_card.md,31.73 tCO2e,12 x 8 A100 GPUs,47.10 petaflop/s-day,,,,,,,
limited,,,,['GPT-NeoX'],"GooseAI API is an API service providing access to NLP services.
",,Email support,,"Intended to be used as an NLP infrastructure.
",custom,,At will monitoring by the provider,GooseAI API,USA,GooseAI,"Illegal or abusive activity, security violations, network abuse
",unknown,,,application,goose.ai,,,,,unknown,unknown,unknown,"Text Generation, Text Completion",https://goose.ai/docs/tos,unknown,
open,"Evaluated by human testers rating alignment of text input, image output pairs.",2022-09-04,,"['VQGAN', 'CLIP']",VQGAN-CLIP is a model that better generates and edits images using a multimodal encoder to guide image generation.,,,,,MIT,text; image,,VQGAN-CLIP,USA,EleutherAI,,,,227M parameters (dense),model,https://arxiv.org/pdf/2204.08583.pdf,,unknown,1 NVIDIA Tesla K80 GPU,Less than 1 V100-hour,,,,,,,
open,Evaluated on a variety of NLP benchmarks and found to perform similarly to OPT and BLOOM models.,2023-05-31,,['The Pile'],A suite of 16 LLMs all trained on public data seen in the exact same order and ranging in size from 70M to 12B parameters,,https://huggingface.co/EleutherAI/pythia-6.9b/discussions,,,Apache 2.0,text; text,,Pythia,unknown,Eleuther AI,,,,12B parameters (dense),model,https://arxiv.org/pdf/2304.01373.pdf,https://huggingface.co/EleutherAI/pythia-12b,,64 A100 GPUs,,,,,,,,
open,Evaluated on math benchmarks in comparison to general large language models.,2023-10-16,,"['Proof Pile 2', 'Code LLaMA']",Llemma is a large language model for mathematics.,,https://huggingface.co/EleutherAI/llemma_34b/discussions,,,LLaMA 2,text; text,,Llemma,unknown,"Princeton University, Eleuther AI",,,,34B parameters (dense),model,https://arxiv.org/pdf/2310.10631.pdf,https://huggingface.co/EleutherAI/llemma_34b,unknown,256 A100 40GB GPUs,47k A100 hours,,,,,,,
open,,2023-10-16,https://huggingface.co/datasets/EleutherAI/proof-pile-2,"['Common Crawl', 'OpenWebMath', 'Arxiv', 'RedPajama-Data']",Proof Pile 2 is a corpus for language modeling of mathematics.,,https://huggingface.co/datasets/EleutherAI/proof-pile-2/discussions,"scientific papers, web data containing mathematics, mathematical code",,MIT,text,,Proof Pile 2,unknown,"Princeton University, Eleuther AI",,,[],55B tokens,dataset,https://arxiv.org/pdf/2310.10631.pdf,,,,,,,,,,,
open,"The models were evaluated on SuperGLUE, CodeXGLUE, as well as MMLU and Bigbench Hard. Comparisons were made with T5v1.1 and found that Pile-T5 models performed better in most conditions.",2024-04-15,,"['The Pile', 'T5x', 'LLaMA', 'umT5']","Pile-T5 is a version of the broadly used T5 model, but improved to eliminate weaknesses such as the omission of crucial code-related tokens. It utilizes LLaMA tokenizer and is trained on the Pile, offering enhancements for finetuning on downstream tasks, particularly those involving code.",,unknown,,The model is aimed at downstream tasks that benefit from the encoder-decoder architecture. Particularly useful for tasks involving code.,unknown,text; text,unknown,Pile-T5,USA,EleutherAI,unknown,,,unknown,model,https://blog.eleuther.ai/pile-t5/,,unknown,unknown,2 million steps,,,,,,,
open,,2023-04-16,,['GPT-4 API'],Auto-GPT is an experimental open-source application showcasing the capabilities of the GPT-4 language model.,,,,,MIT,,,Auto-GPT,USA,Auto-GPT,,,,,application,https://news.agpt.co/,,,,,"GPT-4 adapted to run autonomously by chaining together LLM ""thoughts""",,,text,,,
open,Evaluated across different text benchmarks in English and Chinese.,2023-10-01,,[],OpenBA is an open-sourced 15B bilingual (English + Chinese) asymmetric seq2seq model.,,https://huggingface.co/OpenBA/OpenBA-LM/discussions,,,Apache 2.0,text; text,,OpenBA,China,Soochow University,,,,15B parameters (dense),model,https://arxiv.org/pdf/2309.10706.pdf,https://huggingface.co/OpenBA/OpenBA-LM,6.5 tCO2eq,8 NVIDIA A100-80GB GPUs,38k GPU hours,,,,,,,
limited,,2023-03-21,,[],"Reexpress One offers a means of document classification, semantic search, and uncertainty analysis on-device.",,https://github.com/ReexpressAI/support,,,unknown,,unknown,Reexpress One,USA,Reexpress AI,,,,,application,https://re.express/index.html,,,,,,unknown,unknown,data analyses,hhttps://re.express/tos.html,unknown,
closed,,2021-08-11,,[],"The dataset used to train the Jurassic-1 models, based on publicly available data.",unknown,,unknown,unknown,unknown,text,,Jurassic-1 dataset,Israel,AI21 Labs,,,[],300B tokens,dataset,https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf,,,,,,,,,,,
closed,unknown,2022-12-01,,[],The dataset used to instruction-tune the Jurassic-1 Instruct models.,unknown,,unknown,unknown,unknown,text,,Jurassic-1 Instruct dataset,Israel,AI21 Labs,unknown,unknown,[],unknown,dataset,,,,,,,,,,,,
limited,"Evaluated on several standard benchmarks (e.g. ARC, BoolQ, HellaSwag, RTE, Winogrande)",2021-08-11,,['Jurassic-1 dataset'],"Jurassic-1 is a family of autoregressive language models (Large, Grande, Jumbo).",,,,unknown,unknown,text,,Jurassic-1,Israel,AI21 Labs,"Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence.",,,178B parameters (dense),model,https://uploads-ssl.webflow.com/60fd4503684b466578c0d307/61138924626a6981ee09caf6_jurassic_tech_paper.pdf,,unknown,Over 800 A100 GPUs,Several months,,,,,,,
limited,,2022-12-01,,"['Jurassic-1', 'Jurassic-1 Instruct dataset']",Jurassic-1 Instruct is an instruction-tuned autoregressive language model.,,,,"Jurassic-1 Instruct was trained specifically to handle instructions-only prompts (""zero-shot"") without examples (""few-shot""). It is the most natural way to interact with language models, and it is the best way to get a sense of the optimal output for your task without any examples.",unknown,text; text,,Jurassic-1 Instruct,Israel,AI21 Labs,"Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence.",unknown,,17B parameters (dense),model,https://docs.ai21.com/docs/jurassic-1-instruct-beta,,unknown,unknown,unknown,,,,,,,
limited,The model was evaluated on the HELM benchmark as discussed in https://www.ai21.com/blog/introducing-j2.,2023-03-09,,[],Jurassic-2 is a family of language models designed to replace Jurassic-1.,,,,,unknown,text; text,unknown,Jurassic-2,Israel,AI21 Labs,"Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence.",unknown,,unknown,model,https://docs.ai21.com/docs/jurassic-2-models,,unknown,unknown,unknown,,,,,,,
limited,,2021-08-11,,"['Jurassic-1', 'Jurassic-1 Instruct', 'Jurassic-2', 'AI21 Summarization API', 'AI21 Paraphrase API']",The AI21 Labs Playground supports several task-specific APIs in addition to a variety of models.,,Feedback can be given by emailing at info at ai21.com,,"The intended uses are text completion, rewriting, and summarization.",,,unknown,AI21 Playground,Israel,AI21 Labs,"Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence.",unknown,,,application,https://studio.ai21.com/playground/,,,,,The Playground provides direct access to the language models (Complete API) as well as wrapped for Rewrite and Summarize.,unknown,unknown,text,https://www.ai21.com/terms-of-use,unknown,
limited,,2023-03-09,,['Jurassic-2'],AI21 Studio's Paraphrase API offers access to our world-class paraphrasing engine. It has been specifically developed for suggesting alternative ways to convey the same message using different words.,,Feedback can be given by emailing at info at ai21.com,,The intended uses are text paraphrasing.,,,unknown,AI21 Paraphrase API,Israel,AI21 Labs,"Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence.",unknown,,,application,https://docs.ai21.com/docs/paraphrase-api,,,,,The AI21 language models are further specialized to the task of paraphrasing.,unknown,unknown,text,https://www.ai21.com/terms-of-use,unknown,
limited,,2023-03-09,,['Jurassic-2'],AI21 Studio's Summarize API offers access to our world-class summarization engine. It has been specifically developed for reading long texts and providing a faithful summary of the original document.,,Feedback can be given by emailing at info at ai21.com,,The intended uses are text paraphrasing.,,,unknown,AI21 Summarization API,Israel,AI21 Labs,"Illegal activities, such as hate speech, gambling, child pornography or violating intellectual property rights; Harassment, victimization, intimidation, fraud or spam; Creation or dissemination of misinformation, promotion of self-harm, glorification of violent events or incitement of violence.",unknown,,,application,https://docs.ai21.com/docs/summarize-api,,,,,The AI21 language models are further specialized to the task of summarization.,unknown,unknown,text,https://www.ai21.com/terms-of-use,unknown,
limited,,2020-10-27,,['AI21 Paraphrase API'],"Wordtune, the first AI-based writing companion that understands context and meaning.",,,,The Wordtune assistant is a writing assistant,Wordtune License,,unknown,Wordtune,Israel,AI21 Labs,,unknown,,,application,https://www.wordtune.com/,,,,,unknown,unknown,unknown,text,https://www.wordtune.com/terms-of-use,unknown,
limited,,2021-11-16,,['AI21 Summarize API'],"Wordtune Read is an AI reader that summarizes long documents so you can understand more, faster.",,,,,Wordtune License,,unknown,Wordtune Read,Israel,AI21 Labs,,unknown,,,application,https://www.wordtune.com/read,,,,,unknown,unknown,unknown,text,https://www.wordtune.com/terms-of-use,unknown,
open,Jamba outperforms or matches other state-of-the-art models in its size class on a wide range of benchmarks.,2024-03-28,,[],"Jamba is a state-of-the-art, hybrid SSM-Transformer LLM. Jamba is the world’s first production-grade Mamba based model.",,https://huggingface.co/ai21labs/Jamba-v0.1/discussions,,"intended for use as a foundation layer for fine tuning, training",Apache 2.0,text; text,,Jamba,Israel,AI21 Labs,,,,52B parameters (sparse),model,https://www.ai21.com/blog/announcing-jamba,https://huggingface.co/ai21labs/Jamba-v0.1,unknown,unknown,unknown,,,,,,,
open,"The models were evaluated based on their ability to handle long contexts, speed, and quality. They outperformed competitors in their size class, scoring high on the Arena Hard benchmark.",2024-08-22,,[],"A family of models that demonstrate superior long context handling, speed, and quality. Built on a novel SSM-Transformer architecture, they surpass other models in their size class. These models are useful for enterprise applications, such as lengthy document summarization and analysis. The Jamba 1.5 family also includes the longest context window, at 256K, among open models. They are fast, quality-focused, and handle long contexts efficiently.",,Unknown,,"The models are built for enterprise scale AI applications. They are purpose-built for efficiency, speed, and ability to solve critical tasks that businesses care about, such as lengthy document summarization and analysis. They can also be used for RAG and agentic workflows.",Jamba Open Model License,text; text,Unknown,Jamba 1.5,unknown,AI21,Unknown,"The models were evaluated on the Arena Hard benchmark. For maintaining long context performance, they were tested on the RULER benchmark.",,94B parameters,model,https://www.ai21.com/blog/announcing-jamba-model-family,unknown,Unknown,"For speed comparisons, Jamba 1.5 Mini used 2xA100 80GB GPUs, and Jamba 1.5 Large used 8xA100 80GB GPUs.",Unknown,,,,,,,
limited,,2022-09-16,,[],Character allows users to converse with various chatbot personas.,,,,,unknown,,,Character,USA,Character AI,,,,,application,https://beta.character.ai/,,,,,,unknown,unknown,AI-generated chat conversations,https://beta.character.ai/tos,unknown,
closed,,2021-12-23,,[],ERNIE 3.0 Titan is a language model,,,,unknown,unknown,text; text,,ERNIE 3.0 Titan,China,"Baidu, PengCheng Laboratory",unknown,,,260B parameters (dense),model,https://arxiv.org/abs/2112.12731,,unknown,"Baidu V100 Cluster, PengCheng Lab Ascend 910 NPU cluster",unknown,,,,,,,
limited,,2021-12-31,,[],ERNIE-ViLG is a model for text-to-image generation,,,,unknown,,text; image,,ERNIE-ViLG,China,Baidu,unknown,,,10B parameters (dense),model,https://arxiv.org/abs/2112.15283,,unknown,unknown,unknown,,,,,,,
closed,,2022-10-27,,[],ERNIE-ViLG is a model for text-to-image generation,,,,unknown,unknown,text; image,,ERNIE-ViLG 2.0,China,Baidu,unknown,,,10B parameters (dense),model,https://arxiv.org/abs/2210.15257,,unknown,320 A100 GPUs according to [[the paper]](https://arxiv.org/abs/2210.15257),18 days according to [[the paper]](https://arxiv.org/abs/2210.15257),,,,,,,
limited,,2023-10-17,,[],ERNIE-4.0 is a multimodal generalist foundation model.,,,,unknown,unknown,"text; image, text, video",,ERNIE 4.0,China,Baidu,unknown,,,unknown,model,https://www.prnewswire.com/news-releases/baidu-launches-ernie-4-0-foundation-model-leading-a-new-wave-of-ai-native-applications-301958681.html,,unknown,unknown,unknown,,,,,,,
closed,Outperforms GPT-4 in common sense and reasoning tasks on the basis of both efficiency and accuracy.,2023-06-20,,"['GPT-4', 'Claude', 'Falcon-40B']",Ocean-1 is the culmination of Cresta's experience in deploying generative AI systems for large enterprises and signifies their latest milestone in advancing the cutting edge AI technology for customer facing conversations.,,,,Acting as a contact center chatbot agent.,unknown,text; text,unknown,Ocean-1,USA,Cresta,,,,7B parameters (dense),model,https://cresta.com/blog/introducing-ocean-1-worlds-first-contact-center-foundation-model/,,unknown,unknown,unknown,,,,,,,
open,Evaluated on English and Chinese language benchmarks.,2023-05-27,,[],"CPM-Bee is a fully open-source, commercially-usable Chinese-English bilingual base model with a capacity of ten billion parameters.",,https://huggingface.co/openbmb/cpm-bee-10b/discussions,,You can use the raw model for many NLP tasks like text generation or fine-tune it to a downstream task.,custom,text; text,unknown,CPM Bee,International,OpenBMB,,,,10B parameters (dense),model,https://github.com/OpenBMB/CPM-Bee,https://huggingface.co/openbmb/cpm-bee-10b,unknown,unknown,unknown,,,,,,,
open,Randomly chosen models trained on UltraFeedback evaluated across standard benchmarks.,2023-09-26,https://huggingface.co/datasets/openbmb/UltraFeedback,[],"UltraFeedback is a large-scale, fine-grained, diverse preference dataset, used for training powerful reward models and critic models.",,https://huggingface.co/datasets/openbmb/UltraFeedback/discussions,,,MIT,text,unknown,UltraFeedback,International,OpenBMB,,,[],256k samples,dataset,https://github.com/OpenBMB/UltraFeedback,,,,,,,,,,,
open,Evaluated on open-sourced general benchmarks in comparison to SotA LLMs.,2024-02-01,,[],"MiniCPM is an End-Side LLM developed by ModelBest Inc. and TsinghuaNLP, with only 2.4B parameters excluding embeddings (2.7B in total).",,https://huggingface.co/openbmb/MiniCPM-V/discussions,,,custom,text; text,unknown,MiniCPM,International,OpenBMB,,,,2.4B parameters (dense),model,https://github.com/OpenBMB/MiniCPM/,https://huggingface.co/openbmb/MiniCPM-V,unknown,unknown,unknown,,,,,,,
open,The model was comprehensively benchmarked across 12 tests covering five tasks. Eurus achieved the best overall performance among open-source models of similar sizes and even outperformed specialized models in many cases.,2024-04-02,,"['Eurus SFT', 'UltraInteract', 'UltraFeedback']",Eurus is a suite of large language models (LLMs) optimized for reasoning.,,https://huggingface.co/openbmb/Eurus-70b-nca/discussions,,The model can be used for reasoning tasks and is especially tailored for coding and math following specific prompts.,Apache 2.0,text; text,unknown,Eurus,International,OpenBMB,,,,70B parameters,model,https://arxiv.org/abs/2404.02078,https://huggingface.co/openbmb/Eurus-70b-nca,unknown,unknown,unknown,,,,,,,
limited,,2019-12-17,,['OpenAI API'],"AI Dungeon is a single-player text adventure game that uses AI to generate content.
",,,,,custom,,,AI Dungeon,USA,Latitude,,,,,application,https://play.aidungeon.io,,,,,,,,,https://play.aidungeon.io/main/termsOfService,,
open,,2023-05-06,,[],Portkey is a hosted middleware that allows users to create generative AI applications,,,,,,,,Portkey,USA,Portkey,,,,,application,https://portkey.ai/,,,,,,,,generative AI apps,https://portkey.ai/terms,,
open,,2023-03-08,,['Anthropic API'],The first Instant Answer in DuckDuckGo search results to use natural language technology to generate answers to search queries using Wikipedia and other related sources,,,,,unknown,,,DuckAssist,USA,DuckDuckGo,,,,,application,https://spreadprivacy.com/duckassist-launch/,,,,,,,,,,,
open,,2022-04-16,,[],,,,,,Apache 2.0,NLP tasks,,NaturalInstructions-v2,USA,AI2,,,[],1600 tasks,dataset,https://arxiv.org/abs/2204.07705,,,,,,,,,,,
open,"Randomly sampled dialogues from dataset are evaluated according to six established criteria of natural flow, context dependence, topic consistency, speaker consistency, specificity, and overall.",2023-05-24,https://huggingface.co/datasets/allenai/soda,[],"SODA is the first publicly available, million-scale, high-quality dialogue dataset covering a wide range of social interactions.",,,,,CC BY 4.0,text,,SODA,USA,AI2,,,[],1.5M dialogues,dataset,https://arxiv.org/pdf/2212.10465.pdf,,,,,,,,,,,
open,Conducted experiments on models trained with Multimodal C4 in comparison to models trained on single image/caption datasets,2023-06-09,,['C4'],An augmentation of C4 with images added and made openly available.,,,,,ODC-By,"image, text",,Multimodal C4,USA,AI2,,,[],43B English tokens with 101.2M documents and 571M images,dataset,https://arxiv.org/pdf/2304.06939.pdf,,,,,,,,,,,
open,Evaluated by human testers on generalization capabilities and responses compared to other chatbots.,2023-05-24,,"['SODA', 'ProsocialDialog', 'T5']",COSMO is a conversation agent with greater generalizability on both in- and out-of-domain chitchat datasets,,https://huggingface.co/allenai/cosmo-xl/discussions,,,,text; text,,COSMO,USA,AI2,,,,11B parameters (dense),model,https://arxiv.org/pdf/2212.10465.pdf,https://huggingface.co/allenai/cosmo-xl,unknown,v3-128 TPU accelerators with batch size 256,unknown,,,,,,,
open,Compared with other open and closed datasets in regards to size and quality control.,2023-08-18,https://drive.google.com/file/d/12gOf5I5RytsD159nSP7iim_5zN31FCXq/view,[],"Dolma is a dataset of 3 trillion tokens from a diverse mix of web content, academic publications, code, books, and encyclopedic materials",,,,,AI2 ImpACT,text,"Quality filtration, deduplication, and risk mitigation via logistic qualifiers and regular expressions used.",Dolma,USA,AI2,,,[],3T tokens,dataset,https://blog.allenai.org/dolma-3-trillion-tokens-open-llm-corpus-9a0ff4b8da64,,,,,,,,,,,
open,Models trained with dataset evaluated on downstream performance.,2023-11-20,https://huggingface.co/datasets/allenai/tulu-v2-sft-mixture,"['FLAN Collection', 'Open Assistant 1', 'ShareGPT', 'Alpaca dataset', 'Code Alpaca', 'LIMA', 'WizardLM', 'OpenOrca']",Tulu-V2-mix is a dataset composed of many high-quality instruction datasets that results in stronger performance across a variety of reasoning and knowledge-probing tasks.,,https://huggingface.co/datasets/allenai/tulu-v2-sft-mixture/discussions,,,ODC-BY,text,,Tulu-V2-mix,USA,AI2,,,[],unknown,dataset,https://arxiv.org/pdf/2311.10702.pdf,,,,,,,,,,,
open,Evaluated on MT-Bench and AlpacaEval. compared to other chatbots.,2023-11-20,,"['LLaMA 2', 'Tulu-V2-mix']",Tulu 2 is a language model trained on the new Tulu-v2-mix dataset and fine-tuned on more state of the art language models.,,https://huggingface.co/allenai/tulu-2-70b/discussions,,,AI2 ImpACT,text; text,,Tulu 2,USA,AI2,,,,70B parameters (dense),model,https://arxiv.org/pdf/2311.10702.pdf,https://huggingface.co/allenai/tulu-2-70b,unknown,unknown,unknown,,,,,,,
open,Evaluated on MT-Bench and AlpacaEval. compared to other chatbots.,2023-11-20,,"['LLaMA 2', 'Tulu-V2-mix']","Tulu 2 DPO is created in a similar manner to Tulu 2, but with Direct Preference Optimization (DPO).",,https://huggingface.co/allenai/tulu-2-dpo-70b/discussions,,,AI2 ImpACT,text; text,,Tulu 2 DPO,USA,AI2,,,,70B parameters (dense),model,https://arxiv.org/pdf/2311.10702.pdf,https://huggingface.co/allenai/tulu-2-dpo-70b,unknown,unknown,unknown,,,,,,,
open,Evaluated on MT-Bench and AlpacaEval. compared to other chatbots.,2023-11-20,,"['Code LLaMA', 'Tulu-V2-mix']","Code Tulu 2 is a fine-tuned version of Code LLaMA that was trained on a mix of publicly available, synthetic and human datasets.",,https://huggingface.co/allenai/codetulu-2-13b/discussions,,,AI2 ImpACT,"text; code, text",,Code Tulu 2,USA,AI2,,,,13B parameters (dense),model,https://arxiv.org/pdf/2311.10702.pdf,https://huggingface.co/allenai/codetulu-2-13b,unknown,unknown,unknown,,,,,,,
open,"Evaluated on standard LLM tasks and benchmarks in comparison to LLaMA, Falcon, and MPT, in addition to other same-sized models.",2024-02-01,,['Dolma'],"Open Language Model (OLMo) is designed to provide access to data, training code, models, and evaluation code necessary to advance AI through open research to empower academics and researchers to study the science of language models collectively.",,https://huggingface.co/allenai/OLMo-7B/discussions,,,Apache 2.0,text; text,unknown,OLMo,USA,AI2,,training data from Dolma filtered and deduplicated before being trained on.,,7B parameters (dense),model,https://allenai.org/olmo/olmo-paper.pdf,https://huggingface.co/allenai/OLMo-7B,75.05 tCo2eq,"27 nodes, with each node containing 8x NVIDIA A100-40GB GPUs provided by MosaicML",unknown,,,,,,,
open,,2023-09-09,https://huggingface.co/datasets/allenai/MADLAD-400,['Common Crawl'],"MADLAD-400 is a document-level multilingual dataset based on Common Crawl, covering 419 languages in total.",,https://huggingface.co/datasets/allenai/MADLAD-400/discussions,,,CC BY 4.0,text,unknown,MADLAD-400,USA,AI2,,,[],3 trillion tokens,dataset,https://arxiv.org/abs/2309.04662,,,,,,,,,,,
closed,Extensive scaling laws analysis has been performed on Evo considering beyond-transformer architectures.,2024-09-30,,['StripedHyena'],"Evo is a foundation model based on StripedHyena, which is a deep signal processing architecture. It is capable of generalizing across DNA, RNA, and proteins and is used for generative design of new CRISPR systems.",,Feedback or inquiries can be addressed to the Liquid AI team.,,"The Evo model is intended for generative design of new CRISPR systems, and it generalizes across DNA, RNA, and proteins.",unknown,text; text,unknown,Evo,USA,Liquid AI,unknown,The model performance has been extensively evaluated with scaling laws analysis.,,unknown,model,https://www.liquid.ai/research/liquid-neural-networks-research,unknown,unknown,unknown,unknown,,,,,,,
closed,,2021-03-24,,['OpenAI API'],"Crisis Contact Simulator, developed as part of a collaboration with Google.org, helps train The Trevor Project counselors by mimicking to be a teen in crisis. Crisis Contact Simulator is used as part of the training programs for the Trevor Project's 24/7 digital crisis services that supports LGBTQ youth [[Trevor Project Blog]](https://www.thetrevorproject.org/blog/the-trevor-project-launches-new-ai-tool-to-support-crisis-counselor-training/).
",,unknown,,"Training counselors
",unknown,,unknown,Crisis Contact Simulator,USA,The Trevor Project,unknown,Limited release,,,application,https://www.thetrevorproject.org/,,,,,Fine-tuning,unknown,unknown,Dialogue,unknown,unknown,
open,"Evaluated on benchmarks pertaining to speech, music, and other audio recognition.",2023-10-20,,"['Whisper', 'BEATs', 'Vicuna']","SALMONN is a large language model (LLM) enabling speech, audio event, and music inputs.",,https://huggingface.co/MSIIP/SALMONN/discussions,,,Apache 2.0,"audio, text; text",,SALMONN,unknown,"ByteDance, Tsinghua University",,,,unknown,model,https://github.com/bytedance/SALMONN,https://huggingface.co/MSIIP/SALMONN,unknown,unknown,unknown,,,,,,,
open,Evaluated via qualitative comparison relative to other SoTA image generation models.,2024-02-21,,['Stable Diffusion XL'],"SDXL-Lightning is a lightning-fast text-to-image generation model. It can generate high-quality 1024px images in a few steps. The models are distilled from stabilityai/stable-diffusion-xl-base-1.0. This repository contains checkpoints for 1-step, 2-step, 4-step, and 8-step distilled models.",,https://huggingface.co/ByteDance/SDXL-Lightning/discussions,,"The model can be used for fast, high-quality text-to-image generation. It supports 1-step, 2-step, 4-step, and 8-step distilled models which provide varying generation quality.",OpenRail++,text; image,unknown,SDXL-Lightning,China,ByteDance,unknown,unknown,,unknown,model,https://arxiv.org/pdf/2402.13929.pdf,https://huggingface.co/ByteDance/SDXL-Lightning,unknown,64 A100 80G GPUs,unknown,,,,,,,
open,"LLaVA-Critic was tested in scenarios such as LMM-as-a-Judge and Preference Learning, showing a high correlation with commercial GPT models in evaluation scores. It served as an alternative to expensive human feedback in resource-constrained settings and demonstrated better performance in providing AI-generated feedback for model alignment compared to human-reliant reward models.",2024-10-06,,[],"LLaVA-Critic is an open-source large multimodal model (LMM) designed as a generalist evaluator. It assesses performance across a variety of multimodal tasks by following a high-quality critic instruction dataset, incorporating diverse evaluation criteria. The model is effective in areas like LMM-as-a-Judge, providing reliable evaluation scores comparable to GPT models, and Preference Learning, offering reward signals for preference learning to enhance model alignment capabilities.",,unknown,,"The model can be used for evaluating multimodal tasks, generating reward signals for preference learning, and serving as a reliable alternate judge for model assessments.",Apache 2.0,"image, text; text",unknown,LLaVA-Critic,unknown,"ByteDance and University of Maryland, College Park","The model should not be used in scenarios requiring authorization from proprietary models, nor relied upon for critical applications without human oversight due to potential biases in dataset.","The model ensures quality by utilizing a high-quality dataset for critic instructions, providing both quantitative judgments and reasoning, with transparency in assessments.",,unknown,model,https://arxiv.org/pdf/2410.02712,unknown,unknown,unknown,unknown,,,,,,,
limited,,2023-02-01,,[],UnderwriteGPT is the world's first generative AI underwriting tool.,,,,,,,,UnderwriteGPT,USA,Paladin Group and Dais Technology,,,,,application,https://dais.com/underwritegpt/,,,,,,,,,,,
closed,"Boasts the highest performance among the Korean LLMs of similar sizes that have been released to date, according to internal evaluations.",2023-08-16,,[],VARCO-LLM is NCSOFT’s large language model and is trained on English and Korean.,,,,"Developing various NLP-based AI services such as Q&A, chatbot, summarization, information extraction",custom,text; text,,VARCO-LLM,South Korea,NCSOFT,,,,13B parameters,model,https://github.com/ncsoft/ncresearch,,unknown,unknown,unknown,,,,,,,
open,Evaluated on the SuperGLUE benchmark,2023-01-01,,['Writer dataset'],Palmyra is a family of privacy-first LLMs for enterprises trained on business and marketing writing.,,https://huggingface.co/Writer/palmyra-base/discussions,,generating text from a prompt,Apache 2.0,text; text,,Palmyra,USA,Writer,,,,20B parameters (dense),model,https://gpt3demo.com/apps/palmyra,https://huggingface.co/Writer/palmyra-base,unknown,,unknown,,,,,,,
open,,2023-04-01,,"['Palmyra', 'Camel dataset']",Camel is an instruction-following large language model tailored for advanced NLP and comprehension capabilities.,,https://huggingface.co/Writer/camel-5b-hf/discussions,,,Apache 2.0,text; text,,Camel,USA,Writer,,,,5B parameters (dense),model,https://chatcamel.vercel.app/,https://huggingface.co/Writer/camel-5b-hf,unknown,,unknown,,,,,,,
open,"The model was evaluated across 9 diverse biomedical datasets where it achieved state-of-the-art results with an average score of 85.9%. It also demonstrated robust capability in efficiently processing extensive medical documents, as showcased by its near-perfect score in the NIH evaluation.",2024-09-08,,['Palmyra-X-004'],"Palmyra-Med-70b-32k is a Language Model designed specifically for healthcare and biomedical applications. It builds upon the foundation of Palmyra-Med-70b and offers an extended context length. This model integrates the DPO dataset, a custom medical instruction dataset, and has been fine-tuned to meet the unique requirements of the medical and life sciences sectors. It is ranked as the leading LLM on biomedical benchmarks with an average score of 85.87%.",,Downstream problems with this model should be reported via email to Hello@writer.com.,,"Palmyra-Med-70b-32k is intended for non-commercial and research use in English. Specifically, it can be used for tasks like clinical entity recognition and knowledge discovery from EHRs, research articles, and other biomedical sources. It excels in analyzing and summarizing complex clinical notes, EHR data, and discharge summaries.",Writer open model,text; text,Measures in place to monitor misuse include the addition of watermarks in all models built by Writer.com to detect and prevent misuse and illegal use.,Palmyra-Med-70b-32k,USA,Writer,"The model should not be used in any manner that violates applicable laws or regulations. It is not to be used in direct patient care, clinical decision support, or professional medical purposes. The model should not replace professional medical judgment.",The model has been refined using Policy Optimization and a finely crafted fine-tuning dataset. It contains watermarks to detect and prevent misuse and illegal use.,,70B parameters,model,https://huggingface.co/Writer/Palmyra-Med-70B-32K,https://huggingface.co/Writer/Palmyra-Med-70B-32K,Unknown,Unknown,Unknown,,,,,,,
open,"The model has been evaluated internally, showing state-of-the-art results on various financial datasets. It has shown 100% accuracy in needle-in-haystack tasks and superior performance in comparison to other models in the organization's internal finance evaluations. It passed the CFA Level III test with a score of 73% and has shown superior performance compared to other models in the long-fin-eval, an internally created benchmark that simulates real-world financial scenarios.",2024-09-08,,"['Palmyra-X-004', 'Writer in-house financial instruction dataset']","Palmyra-Fin-70B-32K is a leading LLM built specifically to meet the needs of the financial industry. It has been fine-tuned on an extensive collection of high-quality financial data and it is highly adept at handling the specific needs of the finance field. It outperforms other large language models in various financial tasks and evaluations, achieving state-of-the-art results across various financial datasets. Its strong performance in tasks like financial document analysis, market trend prediction, risk assessment underscores its effective grasp of financial knowledge.",,Downstream problems with this model should be reported to Hello@writer.com.,,"The model is intended for use in English for financial analysis, market trend prediction, risk assessment, financial report generation, automated financial advice, and answering questions from long financial documents. It can be used for entity recognition, identifying key financial concepts such as market trends, economic indicators, and financial instruments from unstructured text.",Writer open model license,text; text,Unknown,Palmyra-Fin-70B-32K,USA,Writer,"The model should not be used in manners that violate applicable laws or regulations, including trade compliance laws, use prohibited by Writer's acceptable use policy, the Writer open model license, and in languages other than English. It is advised not to use the model for direct financial decision-making or professional financial advice without human oversight. Always consult a qualified financial professional for personal financial needs.","The model was trained with a proprietary internal database and a fine-tuning recipe to ensure a greater level of domain-specific accuracy and fluency. Still, the model may contain inaccuracies, biases, or misalignments and its usage for direct financial decision-making or professional financial advice without human oversight is not recommended. It has not been rigorously evaluated in real-world financial settings and it requires further testing, regulatory compliance, bias mitigation, and human oversight for more critical financial applications.",,70B parameters (dense),model,https://huggingface.co/Writer/Palmyra-Fin-70B-32K,https://huggingface.co/Writer/Palmyra-Fin-70B-32K,Unknown,Unknown,Unknown,,,,,,,
open,Performance evaluated on English and Chinese language benchmark tests.,2023-03-14,,[],"ChatGLM is a Chinese-English language model with question and answer and dialogue functions, and is aimed at a Chinese audience.",,,,,Apache 2.0,text; text,,ChatGLM,China,ChatGLM,,,,6B parameters (dense),model,https://chatglm.cn/blog,,unknown,,unknown,,,,,,,
open,"Deepseek and baseline models (for comparison) evaluated on a series of representative benchmarks, both in English and Chinese.",2023-11-28,,[],Deepseek is a 67B parameter model with Grouped-Query Attention trained on 2 trillion tokens from scratch.,,https://huggingface.co/deepseek-ai/deepseek-llm-67b-base/discussions,,,custom,text; text,unknown,Deepseek,USA,Deepseek AI,,Training dataset comprised of diverse data composition and pruned and deduplicated.,,67B parameters (dense),model,https://github.com/deepseek-ai/DeepSeek-LLM,https://huggingface.co/deepseek-ai/deepseek-llm-67b-base,unknown,unknown,unknown,,,,,,,
open,"Deepseek and baseline models (for comparison) evaluated on a series of representative benchmarks, both in English and Chinese.",2023-11-29,,['Deepseek'],Deepseek Chat is a 67B parameter model initialized from Deepseek and fine-tuned on extra instruction data.,,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat/discussions,,,custom,text; text,unknown,Deepseek Chat,USA,Deepseek AI,,Training dataset comprised of diverse data composition and pruned and deduplicated.,,67B parameters (dense),model,https://github.com/deepseek-ai/DeepSeek-LLM,https://huggingface.co/deepseek-ai/deepseek-llm-67b-chat,unknown,unknown,unknown,,,,,,,
open,"Evaluated on code generation, code completion, cross-file code completion, and program-based math reasoning across standard benchmarks.",2023-11-03,,[],"Deepseek Coder is composed of a series of code language models, each trained from scratch on 2T tokens, with a composition of 87% code and 13% natural language in both English and Chinese.",,https://huggingface.co/deepseek-ai/deepseek-coder-33b-base/discussions,,,custom,text; code,unknkown,Deepseek Coder,USA,Deepseek AI,,,,33B parameters (dense),model,https://github.com/deepseek-ai/DeepSeek-Coder,https://huggingface.co/deepseek-ai/deepseek-coder-33b-base,unknown,8 NVIDIA A100 GPUs and 8 NVIDIA H800 GPUs,unknown,,,,,,,
open,"Evaluated on MATH, a competition-level dataset, and achieves a 46% accuracy, higher than accuracy produced by GPT-4's chain of thought.",2023-09-11,,"['MathInstruct', 'LLaMA', 'Code LLaMA']",MAmmoTH is a series of open-source large language models (LLMs) specifically tailored for general math problem-solving.,,,,,MIT,text; text,,MAmmoTH,USA,Ohio State University,,,,34B parameters (dense),model,https://arxiv.org/pdf/2309.05653.pdf,,,,,,,,,,,
open,,2023-03-01,,['ChatGPT API'],"My AI offers Snapchatters a friendly, customizable chatbot at their fingertips that offers recommendations, and can even write a haiku for friends in seconds. Snapchat, where communication and messaging is a daily behavior, has 750 million monthly Snapchatters.",,,,,custom,,,My AI for Snapchat,USA,Snap,,,,,application,https://openai.com/blog/introducing-chatgpt-and-whisper-apis,,,,,,,,,https://snap.com/terms,,
open,"Models have been evaluated on a series of math benchmarks, demonstrating outperformance of the state-of-the-art models in both the English and Chinese language.",2024-08-08,,[],"Qwen2-Math is a series of specialized math language models built upon the Qwen2 large language models, with a focus on enhancing the reasoning and mathematical capabilities. Their intended use is for solving complex mathematical problems. They significantly outperform both open-source and closed-source models in terms of mathematical capabilities.",,Problems with the model should be reported to the Qwen Team via their official channels.,,These models are intended for solving complex mathematical problems.,Tongyi Qianwen,text; text,Unknown,Qwen2-Math,China,Qwen Team,Uses that go against the ethical usage policies of Qwen Team.,The models were tested with few-shot chain-of-thought prompting and evaluated across mathematical benchmarks in both English and Chinese.,,72B parameters,model,https://qwenlm.github.io/blog/qwen2-math/,https://huggingface.co/Qwen/Qwen2-Math-72B,Unknown,Unknown,Unknown,,,,,,,
open,,2022-12-07,,"['GPT-3.5', 'Bing Search']",Perplexity Ask is a new search interface that uses advanced artificial intelligence technologies,,,,,,,,Perplexity Ask,USA,Perplexity,,,,,application,https://www.perplexity.ai/,,,,,,,,,,,
closed,,2022-12-15,,"['Perplexity Ask', 'OpenAI API']",Twitter search interface that is powered by Perplexity's structured search engine.,,,,,,,,Bird SQL,USA,Perplexity,,,,,application,https://www.perplexity.ai/sql,,,,,,,,,,,
open,,2023-10-27,,[],Perplexity chat is an AI chatbot trained in-house by Perplexity.,,,,,,,,Perplexity Chat,USA,Perplexity,,,,,application,https://labs.perplexity.ai/,,,,,,,,Chatbot output in response to user queries,https://blog.perplexity.ai/legal/terms-of-service,,
open,,2023-03-01,,['Whisper API'],Speak is an AI-powered language learning app focused on building the best path to spoken fluency and is the the fastest-growing English app in South Korea.,,,,,,,,Speak,USA,Speak,,,,,application,https://openai.com/blog/introducing-chatgpt-and-whisper-apis,,,,,,,,,,,
open,Reports results on the Vicuna benchmark and compares performance level and time expenditure with ChatGPT,2023-05-23,,"['QLoRA', 'OASST1']","Guanaco is a model family trained with QLORA, an efficient finetuning approach that reduces memory usage enough to finetune a 65B parameter model on a single 48GB GPU while preserving full 16-bit finetuning task performance.",,,,,MIT,text; text,,Guanaco,USA,University of Washington,,,,33B parameters (dense),model,https://arxiv.org/pdf/2305.14314v1.pdf,,,A single 24 GB GPU,,,,,,,,
open,Evaluated on benchmark music understanding tasks on SOTA music datasets.,2023-10-11,,"['LLaMA 2', 'Jukebox']",Llark is an instruction-tuned multimodal model for music understanding.,,,,,Apache 2.0,"audio, text; text",,Llark,USA,"University of Washington, Spotify",,,,12B parameters (dense),model,https://arxiv.org/pdf/2310.07160.pdf,,unknown,4 80GB NVIDIA A40 GPUs,54 hours,,,,,,,
open,"The model was tested in the generation of a new green fluorescent protein. Its effectiveness was compared to natural evolutionary processes, and it was deemed to simulate over 500 million years of evolution.",2024-06-25,,[],"ESM3 is the first generative model for biology that simultaneously reasons over the sequence, structure, and function of proteins. It is trained across the natural diversity of Earth, reasoning over billions of proteins from diverse environments. It advances the ability to program and create with the code of life, simulating evolution, and making biology programmable. ESM3 is generative, and scientists can guide the model to create proteins for various applications.",,Unknown,,"To engineer biology from first principles. It functions as a tool for scientists to create proteins for various applications, including medicine, biology research, and clean energy.",Unknown,"text; image, text",Unknown though specific measures are not specified.,ESM3,USA,EvolutionaryScale,Unknown,"The creators have put in place a responsible development framework to ensure transparency and accountability from the start. ESM3 was tested in the generation of a new protein, ensuring its quality and effectiveness.",,98B parameters (Dense),model,https://www.evolutionaryscale.ai/blog/esm3-release,unknown,Unknown,unknown,Unknown,,,,,,,
open,Evaluated on standard benchmarks across a range of tasks.,2023-10-21,,"['Qwen', 'OpenOrca', 'Open Platypus']",CausalLM is an LLM based on the model weights of Qwen and trained on a model architecture identical to LLaMA 2.,,,,,WTFPL,text; text,unknown,CausalLM,USA,CausalLM,,,,14B parameters (dense),model,https://huggingface.co/CausalLM/14B,https://huggingface.co/CausalLM/14B,unknown,unknown,unknown,,,,,,,
open,Evaluated against similar LLMs using GPT-4 as a judge.,2023-03-30,,"['LLaMA', 'ShareGPT conversations data']",An open-source chatbot trained by fine-tuning LLaMA on user-shared conversations collected from ShareGPT.,,https://huggingface.co/datasets/bigcode/the-stack/discussions,,research on LLMs and chatbots,Apache 2.0,text; text,,Vicuna,USA,LMSYS,,,,13B parameters (dense),model,https://lmsys.org/blog/2023-03-30-vicuna/,https://huggingface.co/lmsys/vicuna-13b-delta-v0,,8 A100 GPUs,1 day,,,,,,,
limited,,2023-10-11,,['Cohere Base'],Moonhub Recruiter is the world's first AI-powered recruiter providing sourcing and recruiting services for startups and growing businesses.,,,,recruiting candidates for business needs,custom,,,Moonhub Recruiter,USA,Moonhub,,,,,application,https://www.moonhub.ai/,,,,,,,unknown,job candidate matches,https://www.moonhub.ai/terms,unknown,
open,https://arxiv.org/abs/2104.08758,2019-10-23,https://huggingface.co/datasets/c4,['CommonCrawl'],The Colossal Clean Crawled Corpus (C4) is a processed version of Common Crawl to facilitate transfer learning in NLP.,"Data was filtered for English using langdetect. Further, data was filtered to end in terminal punctuation, to remove short pages (less than 5 sentences), and to remove ""Dirty, Naughty, Obscene or Otherwise Bad Words"".
",https://huggingface.co/datasets/c4/discussions,,To faciliate transfer learning research in NLP.,ODC-By 1.0,text,,C4,USA,Google,,"Data filtering excluded obscene words from a block list as well as short documents and some deduplication was done based on string overlap.
",['https://huggingface.co/datasets/c4/viewer/en/train'],750GB,dataset,https://arxiv.org/abs/1910.10683,,,,,,,,,,,
closed,unknown,2019-11-25,,[],"The dataset used to train Internal Google BERT models.
",unknown,unknown,"Web pages, and search queries",unknown,unknown,text,unknown,Internal Google BERT dataset,USA,Google,unknown,unknown,[],unknown,dataset,https://blog.google/products/search/search-language-understanding-bert/,,,,,,,,,,,
open,"Authors evaluate the dataset on two image captioning models - RNN-based model and Transformer model, under two experimental conditions - using the training & development sets provided by the MS COCO dataset, versus training & development sets using the Conceptual dataset. They use three different test sets- the blind COCO-C40 test set, the Conceptual test set and the Flickr 1K test set. They present both Human and Automatic evaluation results. Human evaluations indicate that the Conceptual-based models are superior. Automatic models fail to corroborate the human evaluation results. This highlights the weakness of these automatic metrics.",2018-07-01,,[],"A dataset containing 3 million (image-URL, caption) pairs designed for the training and evaluation of machine learned image captioning systems.
","The following filtering steps are applied in the given order:
1. Image-based Filtering - ""It only keeps JPEG images where both dimensions are greater than 400 pixels, and the ratio of larger to smaller dimension is no more than 2. It excludes images that trigger pornography or profanity detectors. These filters discard more than 65% of the candidates.""
2. Text-based Filtering - ""Candidates with no determiner, no noun, or no preposition are discarded; candidates with a high noun ratio are also discarded; candidates with a high rate of token repetition are discarded; candidates where the first word is not capitalized, or with too high capitalized-word ratio are discarded; we use a vocabulary VW of 1B token types, appearing at least 5 times in the English Wikipedia, and discard candidates that contain tokens that are not found in this vocabulary. candidates that score too high or too low on the polarity annotations, or trigger the pornography/profanity detectors, are discarded; predefined boiler-plate prefix/suffix sequences matching the text are cropped, e.g. “click to enlarge picture”, “stock photo”; we also drop text which begins/ends in certain patterns, e.g. “embedded image permalink”, “profile photo”. These filters only allow around 3% of the incoming candidates to pass to the later stages.""
3. Image&Text-based Filtering - ""We filter out candidates for which none of the text tokens can be mapped to the content of the image. This filter discards around 60% of the incoming candidates.""
4. Text Transformation with Hypernymization - ""Noun modifiers of certain types (proper nouns, numbers, units) are removed; dates, durations, and preposition-based locations (e.g., ""in Los Angeles"") are removed; named-entities are identified, matched against the KG entries, and substitute with their hypernym; resulting coordination noun-phrases with the same head (e.g., ""actor and actor"") are resolved into a single-head, pluralized form (e.g., ""actors""). Around 20% of samples are discarded during this transformation. We then cluster all resolved entities (e.g., 2560 ""actor"", ""dog"", ""neighborhood"", etc.) and keep only the candidates for which all detected types have a count of over 100 (around 55% of the candidates).""
",Feedback can be provided by creating an issue in the [[Conceptual Captions GitHub repository]](https://github.com/google-research-datasets/conceptual-captions) or by emailing at conceptual-captions at google.com,,,Conceptual Captions License,"image, text",unknown,Conceptual Captions,USA,Google,unknown,"Input candidate (image, caption) pairs pass through several stages of filtering and processing to ensure quality.",[],"3.3M (image, text) pairs",dataset,https://aclanthology.org/P18-1238/,,,,,,,,,,,
open,"The dataset is benchmarked against CC3M on two most fundamental V+L tasks: vision-to-language generation and vision-and-language matching, with an emphasis on long-tail visual recognition. The results illustrate the benefit of scaling up pre-training data for vision-and-language tasks, as indicated by the new state-of-the-art results on both the nocaps and Conceptual Captions benchmarks.
",2021-02-17,,[],"A dataset with 12 million image-text pairs specifically meant to be used for vision-and-language pre-training.
","Some of the filtering steps used in the preparation of Conceptual Captions dataset are relaxed to trade off high-recall for low-precision. The following steps are applied in the given order:
1. Image-based Filtering - Only keep JPEG images where both dimensions are greater than 400 pixels, and the ratio of larger to smaller dimension is no more than 2.5. Exclude images that trigger pornography or profanity detectors.
2. Text-based Filtering - Allow text between 3 and 256 words in the alt-text. Discard candidates with no noun or no determiner, but permit ones without prepositions. Set the maximum fraction of word repetition allowed to 0.2. Increase the threshold for counting a word type as rare from 5 to 20.
3. Image&Text-based Filtering - Filter out candidates for which none of the text tokens can be mapped to the content of the image.
",Feedback can be provided by creating an issue in the [[Conceptual 12M GitHub repository]](https://github.com/google-research-datasets/conceptual-12m) or by emailing at conceptual-captions at google.com,,,Conceptual Captions License,"image, text",unknown,Conceptual 12M,USA,Google,unknown,"Input candidate (image, caption) pairs pass through several stages of filtering and processing to ensure quality. Person-name substitutions are performed in the alt-texts to protect the privacy of individuals in the associated images.",[],"12M (image, text) pairs",dataset,https://arxiv.org/pdf/2102.08981.pdf,,,,,,,,,,,
open,https://huggingface.co/t5-base#evaluation,2019-10-23,,['C4'],Text-To-Text Transfer Transformer (T5) is a model that unifies all NLP tasks under the text-to-text format.,,https://huggingface.co/t5-large/discussions,,NLP tasks,Apache 2.0,text; text,,T5,USA,Google,unknown,The T5 paper documents many analyses/ablations that were considered before arriving at the final architecture/training procedure.,,11B parameters (dense),model,https://arxiv.org/abs/1910.10683,https://huggingface.co/t5-base,unknown,"1,024 TPU v3 chips (Cloud TPU Pods)",unknown,,,,,,,
closed,unknown,2019-11-25,,['Internal Google BERT dataset'],"Internal Google BERT model used to power Google Search products.
",,unknown,,unknown,unknown,text; text,unknown,Internal Google BERT,USA,Google,unknown,unknown,,unknown,model,https://blog.google/products/search/search-language-understanding-bert/,unknown,unknown,unknown,unknown,,,,,,,
open,,2019-11-25,,"['Internal Google BERT', 'MUM']","Google Search is Google's search engine.
",,"Feedback can be sent to Google Feedback using the product interface [[Google Feedback]](https://www.google.com/tools/feedback).
",,"Searching the web using text, voice or image",,,"It is implied that Google scan uses of its products for spam, malware and illegal content in the [[Term of Service]](https://policies.google.com/terms).
",Google Search,USA,Google,"Prohibited use cases aren't specifically spelled out for Google search, but several illegal and discouraged use cases are shared in the Respect Others section of the [[Term of Service]](https://policies.google.com/terms).
",unknown,,,application,https://blog.google/products/search/search-language-understanding-bert/,,,,,unknown,unknown,unknown,web page ranking,https://policies.google.com/terms,unknown,
closed,unknown,2021-06-18,,[],"Infiniset ""is a combination of dialog data from public dialog data and other public web documents"" [[Appendix E]](https://arxiv.org/pdf/2201.08239.pdf#appendix.E).
",unknown,,"Included in the dataset are data from ""public forums (0%); C4 data (12.5% ); code documents from sites related to programming like Q&A sites tutorials, etc (12.5%); Wikipedia (English) (12.5%); English web documents (6.25%); and Non-English web documents (6.25%).""
",unknown,unknown,"code, text",unknown,Infiniset,USA,Google,"The prohibited uses for Infiniset weren't specifically listed, but the Google AI principles inspired safety objectives in [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1) advises avoiding harm, unjust impact and misinformation, among others.
",unknown,[],unknown,dataset,https://arxiv.org/pdf/2201.08239.pdf,,,,,,,,,,,
closed,"The model performance was analyzed on sensibleness, specificity and interestingness. The model was also analyzed on safety, following metrics derived from Google AI Principles [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1). Finally, the model was analyzed on groundedness, testing its ability to produce responses that can be associated with ""known sources whenever possible [[Section 4.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.4.1).""
",2021-06-18,,['Infiniset'],"LaMDA stands for Language Models for Dialog Application. It is a transformer based language model trained on dialogue data.
",,,,"LaMDA is a language model, so it can be used for regular langauge modelling tasks without fine-tuning, but its fine-tuned for dialogue tasks.
",unknown,text; text,unknown,LaMDA,USA,Google,"The prohibited uses of LaMDA weren't specifically listed, but the Google AI principles inspired safety objectives in [[Appendix A.1]](https://arxiv.org/pdf/2201.08239.pdf#subsection.A.1) advises avoiding harm, unjust impact and misinformation, among others.
","LaMDA was fine-tuned to predict sensibleness, specificity and interestingness as well as safety. Then, the candidates were filtered out if the model safety predictions were below a certain threshold. The next candidates in the conversation were selected as a combination of these predictions. The model was also fine-tuned for groundedness. The results are shown in [[Figure 5]](https://arxiv.org/pdf/2201.08239.pdf#figure.caption.23).
",,137B parameters (dense),model,https://arxiv.org/pdf/2201.08239.pdf,,26 tCO2e,1024 TPU-V3 chips,4108.80 petaflop/s-day,,,,,,,
closed,unknown,2022-04-04,https://arxiv.org/pdf/2204.02311.pdf#appendix.D,['Infiniset'],"PaLM dataset ""was created for pre-training language models"" [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).
","GitHub repositories with copyleft licenses were excluded. Programming languageges other than the most common 24 were excluded [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).
",Contact the authors.,"The dataset is based on Infiniset. It included multilingual text containing text from over 100 languages. The breakdown of the data included is as follows: Social media conversations (multilingual) 50, Filtered webpages (multilingual) 27%, BooksCorpus (English) 13%, GitHub (code) 5%, Wikipedia (multilingual) 4%, and News (English) 1%. Code was collected from GitHub repositories with appropriate licenses, totalling 96GB of source code [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).
","""The dataset was created for pre-training language models by a team of researchers at Google"".
",unknown,"code, text",unknown,PaLM dataset,USA,Google,"""... should not be used for any of the unacceptable language model use cases, e.g., generation of toxic speech"" [[Datasheet]](https://arxiv.org/pdf/2204.02311.pdf#appendix.D).
","In order to reduce low quality web pages, the web pages were sampled according to a ""quality score"" classifier. Code files were de-duplicated using Levenshtein distance [[Section 3]](https://arxiv.org/pdf/2204.02311.pdf#section.3).
",[],3.92 TB,dataset,https://arxiv.org/pdf/2204.02311.pdf,,,,,,,,,,,
open,Evaluated on a variety of standard language datasets.,2022-10-20,,"['T5', 'Muffin', 'P3', 'NaturalInstructions-v2', 'Flan CoT']",Flan-T5 is a version of the T5 language model fine-tuned on instruction data,,https://huggingface.co/google/flan-t5-xxl/discussions,,unknown,Apache 2.0,text; text,,Flan-T5,USA,Google,,"Across different multitask datasets, templates and formatting were maintained. For the chain-of-thoughts (CoT) data, specific exemplars were used.",,11B parameters (dense),model,https://arxiv.org/abs/2210.11416,https://arxiv.org/pdf/2210.11416.pdf,Unknown,512 v4 TPU Chips,Unknown,,,,,,,
open,,2022-05-10,,['C4'],UL2 is a language model trained with a new pretraining objective,,,,,Apache 2.0,text; text,,UL2,USA,Google,,,,20B parameters (dense),model,https://arxiv.org/abs/2205.05131,,,128 TPUv4,,,,,,,,
closed,,2022-06-22,,"['C4', 'LAION-400M', 'FIT400M', 'JFT-4B']",Parti is a text-to-image diffusion model,,,,,unknown,text; image,,Parti,USA,Google,,,,20B parameters (dense),model,https://parti.research.google/,,,unknown,unknown,,,,,,,
open,,2022-05-23,,"['LAION-400M', 'Google internal image-text dataset']",Imagen is a text-to-image diffusion model,,,,,unknown,text; image,,Imagen,USA,Google,,,,14B parameters (dense),model,https://imagen.research.google/,,unknown,128 TPU-v4,unknown,,,,,,,
open,,2022-04-22,,"['AudioSet', 'HowTo100M']",VATT is a family of models trained on multimodal data,,,,,Apache 2.0,"text; audio, video",,VATT,USA,Google,,,,155M parameters (dense),model,https://arxiv.org/abs/2104.11178,,unknown,256 TPU-v3,3 days,,,,,,,
limited,"""PaLM is evaluated on English Natural Language Processing (NLP) tasks, tasks from BIG-bench, reasoning tasks, code completion tasks, multilingual generation and question answering tasks, translation tasks, and bias and toxicity benchmarks"" [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).
",2022-04-04,,['PaLM dataset'],"PaLM stands Pathways Language Model, ""dense decoder-only Transformer model trained with the Pathways system"" [[Google ai Blog]](https://ai.googleblog.com/2022/04/pathways-language-model-palm-scaling-to.html).
",,Contact the authors.,,"""The primary use is research on language models, including: research on NLP applications like machine translation and question answering, advancing fairness and safety research, and understanding limitations of current LLMs. Within Google, PaLM is being used for research on a variety of open- ended text and code generation tasks, including reasoning [[Section 6.3]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.3) and code synthesis and understanding [[Section 6.4]](https://arxiv.org/pdf/2204.02311.pdf#subsection.6.4)"" [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E).
",unknown,"text; text, code",unknown,PaLM,USA,Google,"The model ""should not be used for downstream applications without further analysis on factors in the proposed downstream application [[Model Card]](https://arxiv.org/pdf/2204.02311.pdf#appendix.E)""
",Unknown,,540B parameters (dense),model,https://arxiv.org/pdf/2204.02311.pdf,https://arxiv.org/pdf/2204.02311.pdf#appendix.E,271.43 tCO2,6144 TPU v4 chips,29600 petaflop/s-days,,,,,,,
limited,,2023-03-14,,['PaLM'],a new developer offering that makes it easy and safe to experiment with Google’s language models.,,,,,unknown,,,PaLM API,USA,Google,,,,,application,https://developers.googleblog.com/2023/03/announcing-palm-api-and-makersuite.html,,,,,,,,,,,
closed,,2022-12-26,,"['Flan-PaLM', 'MultiMedQA']",,,,,,unknown,text; text,,Med-PaLM,USA,Google,,,,540B parameters (dense),model,https://arxiv.org/abs/2212.13138,,,,,,,,,,,
closed,Evaluated on MultiMedBench tasks and radiologist evaluations of model-generated chest X-ray reports,2023-07-26,,"['PaLM-E', 'MultiMedBench']",,,,,,unknown,"image, text, genome sequence; text",,Med-PaLM Multimodal,USA,Google,,,,562B parameters (dense),model,https://arxiv.org/pdf/2307.14334.pdf,,,,,,,,,,,
closed,,2022-12-26,,"['MedQA', 'MedMCQA', 'PubMedQA', 'MMLU', 'LiveQA', 'Medication QA', 'HealthSearchQA']",,,,,,unknown,text; text,,MultiMedQA,USA,Google,,,,unknown,model,https://arxiv.org/abs/2212.13138,,,,,,,,,,,
closed,,2022-10-20,,"['PaLM', 'Muffin', 'P3', 'NaturalInstructions-v2']",,,,,,unknown,text; text,,Flan-PaLM,USA,Google,,,,540B parameters (dense),model,https://arxiv.org/abs/2210.11416,,,,,,,,,,,
closed,,2022-10-20,,"['U-PaLM', 'Muffin', 'P3', 'NaturalInstructions-v2']",,,,,,unknown,text; text,,Flan-U-PaLM,USA,Google,,,,540B parameters (dense),model,https://arxiv.org/abs/2210.11416,,,,,,,,,,,
open,,2021-09-03,,[],,,,,,Apache 2.0,text,,Muffin,USA,Google,,,[],62 tasks,dataset,https://arxiv.org/abs/2109.01652,,,,,,,,,,,
closed,,2022-10-20,,"['PaLM', 'PaLM dataset']",,,,,,unknown,text; text,,U-PaLM,USA,Google,,,,540B parameters (dense),model,https://arxiv.org/abs/2210.11399,,,,,,,,,,,
closed,,2022-08-16,,['PaLM'],,,,,,"unknown (model weights), Apache 2.0 (SayCan code)",text; robotics trajectories,,PaLM-SayCan,USA,Google,,,,540B parameters (dense),model,https://arxiv.org/abs/2204.01691,,,,,,,,,,,
closed,,2021-12-13,,"['GLaM Web dataset', 'Wikipedia', 'GLaM Conversations dataset', 'GLaM Forums dataset', 'BooksCorpus', 'GLaM News dataset']",,,,,,unknown,text; text,,GLaM,USA,Google,,,,1.2T parameters (sparse),model,https://arxiv.org/abs/2112.06905,,,,,,,,,,,
closed,,2021-12-13,,[],,,,,,unknown,text,,GLaM Web dataset,USA,Google,,,[],unknown,dataset,https://arxiv.org/abs/2112.06905,,,,,,,,,,,
closed,,2021-12-13,,[],,,,,,unknown,text,,GLaM Conversations dataset,USA,Google,,,[],unknown,dataset,https://arxiv.org/abs/2112.06905,,,,,,,,,,,
closed,,2021-12-13,,[],,,,,,unknown,text,,GLaM Forums dataset,USA,Google,,,[],unknown,dataset,https://arxiv.org/abs/2112.06905,,,,,,,,,,,
closed,,2021-12-13,,[],,,,,,unknown,text,,GLaM News dataset,USA,Google,,,[],unknown,dataset,https://arxiv.org/abs/2112.06905,,,,,,,,,,,
closed,,2021-05-18,,['MUM dataset'],MUM (Multitask Unified Model) is a multimodal model that is specialized for more complex queries.,,,,,unknown,"image, text; text",,MUM,USA,Google,,,,unknown,model,https://blog.google/products/search/introducing-mum/,,,,,,,,,,,
closed,,2021-05-18,,[],,,,,,unknown,"image, text",,MUM dataset,USA,Google,,,[],unknown,dataset,https://blog.google/products/search/introducing-mum/,,,,,,,,,,,
closed,,2023-02-01,,"['LAION-400M', 'Phenaki Video-Text Corpus']",,,,,,unknown,text; video,,Phenaki,USA,Google,,,,1.8B parameters (dense),model,https://openreview.net/pdf?id=vOEXS39nOF,,,,,,,,,,,
closed,,2023-02-01,,[],,,,,,unknown,"text, video",,Phenaki Video-Text Corpus,USA,Google,,,[],15M text-video pairs at 8FPS,dataset,https://openreview.net/pdf?id=vOEXS39nOF,,,,,,,,,,,
open,,2023-03-02,,"['UL2', 'Flan Collection']",,,,,,Apache 2.0,text; text,,Flan-UL2,USA,Google,,,,20B parameters (dense),model,https://arxiv.org/abs/2205.05131,,,,,,,,,,,
open,,2023-01-31,,"['Flan dataset', 'P3', 'NaturalInstructions-v2']",,,,,,Apache 2.0,text,,Flan Collection,USA,Google,,,[],1836 tasks,dataset,https://arxiv.org/abs/2301.13688,,,,,,,,,,,
closed,,2023-01-26,,"['SoundStream', 'w2v-BERT', 'MuLan', 'MusicLM semantic model', 'MusicLM acoustic model']",,,,,,unknown,text; audio,,MusicLM,USA,Google,,,,1.4B parameters (dense),model,https://arxiv.org/pdf/2301.11325.pdf,,,,,,,,,,,
closed,,2023-01-26,,['Free Music Archive'],,,,,,unknown,audio; audio,,SoundStream,USA,Google,,,,unknown,model,https://arxiv.org/pdf/2301.11325.pdf,,,,,,,,,,,
closed,,2023-01-26,,['Free Music Archive'],,,,,,unknown,audio; audio,,w2v-BERT,USA,Google,,,,600M parameters (dense),model,https://arxiv.org/pdf/2301.11325.pdf,,,,,,,,,,,
closed,,2022-08-26,,"['AST', 'BERT', 'MuLan dataset']",,,,,,unknown,text; audio,,MuLan,USA,Google,,,,unknown,model,https://arxiv.org/abs/2208.12415,,,,,,,,,,,
closed,,2022-08-26,,[],,,,,,unknown,"audio, text",,MuLan dataset,USA,Google,,,[],370K hours audio,dataset,https://arxiv.org/abs/2208.12415,,,,,,,,,,,
closed,,2023-01-26,,[],,,,,,unknown,audio,,MusicLM dataset,USA,Google,,,[],280K hours audio,dataset,https://arxiv.org/pdf/2301.11325.pdf,,,,,,,,,,,
closed,,2023-01-26,,['MusicLM dataset'],,,,,,unknown,audio; audio,,MusicLM semantic model,USA,Google,,,,430M parameters (dense),model,https://arxiv.org/pdf/2301.11325.pdf,,,,,,,,,,,
closed,,2023-01-26,,['MusicLM dataset'],,,,,,unknown,audio; audio,,MusicLM acoustic model,USA,Google,,,,430M parameters (dense),model,https://arxiv.org/pdf/2301.11325.pdf,,,,,,,,,,,
closed,,2023-02-08,,['Noise2Music pseudolabel dataset'],,,,,,unknkown,"audio, text; audio",,Noise2Music,USA,Google,,,,unknown,model,https://google-research.github.io/noise2music/noise2music.pdf,,,,,,,,,,,
closed,,2023-02-08,,['LaMDA'],,,,,,unknown,text,,LaMDA-LF,USA,Google,,,[],150k songs,dataset,https://google-research.github.io/noise2music/noise2music.pdf,,,,,,,,,,,
closed,,2023-02-08,,['MusicCaps'],,,,,,unknown,text,,Rater-LF,USA,Google,,,[],10k captions,dataset,https://google-research.github.io/noise2music/noise2music.pdf,,,,,,,,,,,
closed,,2023-02-08,,['MusicCaps'],,,,,,unknown,text,,Rater-SF,USA,Google,,,[],24k captions,dataset,https://google-research.github.io/noise2music/noise2music.pdf,,,,,,,,,,,
closed,,2023-02-08,,"['MuLan', 'MuLaMCap', 'LaMDA-LF', 'Rater-LF', 'Rater-SF']",,,,,,unknown,"audio, text; audio",,Noise2Music pseudolabeler,USA,Google,,,,unknown,model,https://google-research.github.io/noise2music/noise2music.pdf,,,,,,,,,,,
closed,,2023-02-08,,[],,,,,,unknown,audio,,Noise2Music audio dataset,USA,Google,,,[],340k hours audio,dataset,https://google-research.github.io/noise2music/noise2music.pdf,,,,,,,,,,,
closed,,2023-02-08,,"['Noise2Music audio dataset', 'Noise2Music pseudolabeler']",,,,,,unknown,"audio, text",,Noise2Music pseudolabel dataset,USA,Google,,,[],340k hours audio with pseudolabels,dataset,https://google-research.github.io/noise2music/noise2music.pdf,,,,,,,,,,,
limited,,2022-08-25,,['LaMDA'],"AI Test Kitchen provides a new way for people to learn about, experience, and give feedback on emerging AI technology, like LaMDA.",,,,,unknown,,,AI Test Kitchen,USA,Google,,,,,application,https://blog.google/technology/ai/join-us-in-the-ai-test-kitchen/,,,,,,,,,,,
closed,,2023-02-06,,['LaMDA'],"Conversational AI service, powered by LaMDA",,,,,unknown,,,Bard,USA,Google,,,,,application,https://blog.google/technology/ai/bard-google-ai-search-updates/,,,,,,,,,,,
closed,,2022-06-29,,"['PaLM', 'arXiv', 'PaLM dataset', 'Minerva Math Web Pages dataset']",,,,,,unknown,text; text,,Minerva,USA,Google,,,,540B parameters (dense),model,https://arxiv.org/abs/2206.14858,,,,,,,,,,,
closed,,2022-06-29,,[],,,,,,unknown,text,,Minerva Math Web Pages dataset,USA,Google,,,[],17.5B tokens,dataset,https://arxiv.org/abs/2206.14858,,,,,,,,,,,
limited,,2023-03-06,,"['YT-NLU-U', 'Pub-U', 'Web-NTL', 'YT-SUP+', 'Pub-S']","Universal Speech Model (USM) is a family of state-of-the-art speech models with 2B parameters trained on 12 million hours of speech and 28 billion sentences of text, spanning 300+ languages. USM, which is for use in YouTube (e.g., for closed captions), can perform automatic speech recognition (ASR) on widely-spoken languages like English and Mandarin, but also languages like Punjabi, Assamese, Santhali, Balinese, Shona, Malagasy, Luganda, Luo, Bambara, Soga, Maninka, Xhosa, Akan, Lingala, Chichewa, Nkore, Nzema to name a few. Some of these languages are spoken by fewer than twenty million people, making it very hard to find the necessary training data.",,,,,unknown,"audio, text; text",,USM,USA,Google,,,,2B parameters (dense),model,https://arxiv.org/abs/2303.01037,,,,,,,,,,,
open,,2005-02-14,,['USM'],YouTube is a global online video sharing and social media platform,,,,,,,,YouTube,USA,Google,,,,,application,https://www.youtube.com/,,,,,,,,,,,
closed,,2023-03-06,,"['PaLM', 'ViT-22B']",,,,,,unknown,"image, text; text",,PaLM-E,USA,Google,,,,562B parameters (dense),model,https://arxiv.org/abs/2303.03378,,,,,,,,,,,
closed,,2023-02-10,,['JFT'],,,,,,unknown,"image, image",,ViT-22B,USA,Google,,,,22B parameters (dense),model,https://arxiv.org/abs/2302.05442,,,,,,,,,,,
closed,,2022-09-07,,"['w2v-BERT', 'SoundStream']",,,,,,unknown,"audio, text; audio",,AudioLM,USA,Google,,,,1B parameters (dense),model,https://arxiv.org/abs/2209.03143,,,,,,,,,,,
closed,,2022-09-14,,"['mT5', 'ViT-e', 'WebLI']",,,,,,unknown,text; image,,PaLI,USA,Google,,,,17B parameters (dense),model,https://arxiv.org/abs/2209.06794,,,,,,,,,,,
closed,,2022-09-14,,['JFT'],,,,,,unknown,image; image,,ViT-e,USA,Google,,,,3.9B parameters (dense),model,https://arxiv.org/abs/2209.06794,,,,,,,,,,,
closed,,2022-09-14,,[],,,,,,unknown,"image, text",,WebLI,USA,Google,,,[],"10B images, 12B alt-text",dataset,https://arxiv.org/abs/2209.06794,,,,,,,,,,,
open,,2023-02-27,,"['T5', 'CLIP', 'YT-Temporal-1B']",,,,,,Apache 2.0,video; text,,Vid2Seq,USA,Google,,,,500M parameters (dense),model,https://arxiv.org/abs/2302.14115,,,,,,,,,,,
closed,"evaluated on DSTC11 Challenge Task, based on MultiWoz 2.1, with a focus on dialog state tracking.",2023-06-08,,"['CTC blank-filtering', 'Speech2Text adapter']",Joint speech and language model using a Speech2Text adapter and using a CTC-based blank-filtering.,,,,,unknown,audio; text,,Google Joint SLM,USA,Google,,,,unknown,model,https://arxiv.org/pdf/2306.07944.pdf,,,,,,,,,,,
closed,Reports results on standard code benchmarks across a variety of programming languages.,2023-05-10,,['PaLM 2 dataset'],PaLM 2 is a new state-of-the-art language model that has better multilingual and reasoning capabilities and is more compute-efficient than its predecessor PaLM. PaLM 2 is a Transformer-based model trained using a mixture of objectives similar to UL2.,,Specific queries provided by annotators,,"general use large language model that can be used for language, reasoning, and code tasks.",unknown,text; text,Google internal monitoring,PaLM 2,USA,Google,becoming part of a general-purpose service or product or use within specific downstream applications without prior assessment,"Employed de-duplication, removal of sensitive-PII and filtering. Added control tokens marking toxicity of text.",,unknown,model,https://blog.google/technology/ai/google-palm-2-ai-large-language-model/,https://ai.google/static/documents/palm2techreport.pdf,,TPU v4 (number unspecified),,,,,,,,
limited,"Assessed on medical benchmarks of professional medical exams, medical research, and consumer queries.",2023-12-13,,[],"MedLM is a collection of foundation models tuned to follow natural language instructions for tasks in medicine, such as question answering and creating draft summaries.",,,,"to be used for question answering and creating draft summaries from existing documentation, to be reviewed, edited, and approved by the user before use.",unknown,text; text,Google internal monitoring,MedLM,USA,Google,,,,unknown,model,https://cloud.google.com/vertex-ai/docs/generative-ai/medlm/overview,https://cloud.google.com/static/vertex-ai/docs/generative-ai/medlm/MedLM-model-card.pdf,unknown,unknown,unknown,,,,,,,
closed,"Evaluated on standard general, reasoning, math, coding, and multimodal benchmarks with results that surpass GPT-4 on almost all.",2023-12-06,,[],"As of release, Gemini is Google's most capable and flexible AI model, proficient in multimodal domains.",,,,"general use large language model that can be used for language, reasoning, and code tasks.",unknown,"text; image, text, video",Google internal monitoring,Gemini,USA,Google,becoming part of a general-purpose service or product or use within specific downstream applications without prior assessment,,,unknown,model,https://deepmind.google/technologies/gemini/#introduction,,unknown,unknown,unknown,,,,,,,
closed,Evaluated on popular time-series benchmarks.,2024-02-02,,[],TimesFM is a single forecasting model pre-trained on a large time-series corpus of 100 billion real world time-points.,,,,,unknown,,unknown,TimesFM,USA,Google,,,,200M parameters (dense),model,https://blog.research.google/2024/02/a-decoder-only-foundation-model-for.html,,unknown,unknown,unknown,,,,,,,
open,Evaluation was conducted on standard LLM benchmarks and includes internal red-teaming testing of relevant content policies.,2024-02-21,,[],"Gemma is a family of lightweight, state-of-the-art open models from Google, based on the Gemini models. They are text-to-text, decoder-only large language models, available in English.",,https://huggingface.co/google/gemma-7b/discussions,,"Text generation tasks including question answering, summarization, and reasoning; content creation, communication, research, and education.",custom,text; text,,Gemma,USA,Google,Prohibited uses are specified in the Gemma Prohibited Use Policy here https://ai.google.dev/gemma/prohibited_use_policy,"Multiple evaluations and red-teaming conducted, with particular focus on ethics, bias, fair use cases, and safety.",,7B parameters (dense),model,https://blog.google/technology/developers/gemma-open-models/,https://huggingface.co/google/gemma-7b,unknown,TPUv5e,unknown,,,,,,,
closed,"Evaluated Med-Gemini on 14 medical benchmarks spanning text, multimodal and long-context applications, establishing new state-of-the-art (SoTA) performance on 10 of them, and surpassing the GPT-4 model family on every benchmark where a direct comparison is viable.",2024-04-29,,"['Gemini', 'MultiMedBench']","Med-Gemini is a family of highly capable multimodal models that are specialized in medicine with the ability to seamlessly integrate the use of web search, and that can be efficiently tailored to novel modalities using custom encoders.",,,,"To be used in areas of medical research including medical summarization, referral letter generation, and medical simplification tasks.",unknown,"image, text; text",,Med-Gemini,USA,Google,Unfit for real-world deployment in the safety-critical medical domain.,,,unknown,model,https://arxiv.org/pdf/2404.18416,,unknown,unknown,unknown,,,,,,,
open,Unknown,2024-09-05,,[],"Imagen 3 is a high-quality text-to-image model capable of generating images with improved detail, richer lighting, and fewer distracting artifacts. It features improved prompt understanding and can be used to generate a wide array of visual styles from quick sketches to high-resolution images. The model is available in multiple versions, each optimized for particular types of tasks. Imagen 3 has been trained to capture nuances like specific camera angles or compositions in long, complex prompts, making it a versatile tool for image generation from textual inputs.",,Unknown,,"Imagen 3 is intended to be used for generation of high-resolution images from textual prompts, from photorealistic landscapes to richly textured oil paintings or whimsical claymation scenes. It can also be used for stylized birthday cards, presentations, and more, due to its improved text rendering capabilities.",Unknown,text; image,Unknown,Imagen 3,unknown,Google DeepMind,Unknown,Unknown,,Unknown,model,https://deepmind.google/technologies/imagen-3/,unknown,Unknown,Unknown,Unknown,,,,,,,
open,The 27B Gemma 2 model outperforms other open models in its size category offering cutting-edge performance. Specific details can be found in the provided technical report.,2024-06-27,,"['Gemma', 'CodeGemma', 'RecurrentGemma', 'PaliGemma']","Gemma 2 is an open model that offers best-in-class performance and runs at incredible speed across different hardware. It easily integrates with other AI tools. This model is built on a redesigned architecture engineered for exceptional performance and inference efficiency. It is available in both 9 billion (9B) and 27 billion (27B) parameter sizes. Gemma 2 is optimized to run at incredible speed across a range of hardware, from powerful gaming laptops and high-end desktops, to cloud-based setups.",,Unknown,,Gemma 2 is designed for developers and researchers for various AI tasks. It can be used via the integrations it offers with other AI tools/platforms and can additionally be deployed for more accessible and budget-friendly AI deployments.,Gemma (commercially-friendly license given by Google DeepMind),text; text,Unknown,Gemma 2,unknown,Google DeepMind,Not specified,"Google DeepMind implemented a refined architecture for Gemma 2. The model has improvements in safety and efficiency over the first generation. The deployment of Gemma 2 on Vertex AI, scheduled for the next month, will offer effortless management of the model.",,27B parameters (dense),model,https://blog.google/technology/developers/google-gemma-2/,unknown,Unknown,"Google Cloud TPU host, NVIDIA A100 80GB Tensor Core GPU, NVIDIA H100 Tensor Core GPU",Unknown,,,,,,,
open,The model nearly matches the performance of the Gemini 1.5 Flash model across many benchmarks through developer feedback and testing.,2024-10-03,,[],"Gemini 1.5 Flash-8B is a lightweight, speed-optimized variant of the Gemini 1.5 Flash model, designed for efficiency and lower latency especially for tasks such as chat, transcription, and long context language translation. The model is production-ready and offers the lowest cost per intelligence of any Gemini model with improved rate limits.",,Encourages developer feedback to inform model improvements and future updates.,,"Designed for simple, high-volume tasks including multimodal use cases and long-context summarization.",unknown,"audio, image, text, video; text",unknown,Gemini 1.5 Flash-8B,unknown,Google DeepMind,unknown,Model improvement informed by developer feedback and extensive testing.,,8B parameters,model,https://developers.googleblog.com/en/gemini-15-flash-8b-is-now-generally-available-for-use/,unknown,unknown,unknown,unknown,,,,,,,
open,"After giving the community time to use the models and explore different applications, we collected feedback.",2024-11-25,,"['EfficientNet-L2', 'BERT', 'CLIP', 'BLIP-2', 'BiT ResNet-101x3', 'ViT-S']",Health AI Developer Foundations (HAI-DEF) is a new suite of open weight models to help developers more easily build AI models for healthcare applications.,,unknown,,"Building AI models for healthcare applications including imaging applications in radiology, dermatology, and pathology.",unknown,"text, image; vector","After giving the community time to use the models and explore different applications, we collected feedback.",Health AI Developer Foundations (HAI-DEF),USA,Google Research,unknown,Learning from community experience with previous research endpoints...we collected feedback.,,unknown,model,http://goo.gle/3AWVX33,unknown,unknown,unknown,unknown,,,,,,,
closed,Unknown,2024-12-04,,['Imagen 3'],"Genie 2 is a foundation world model capable of generating an endless variety of action-controllable, playable 3D environments for training and evaluating embodied agents based on a single prompt image.",,Unknown,,"Genie 2 can be used for generating diverse environments for training and evaluating AI agents, rapid prototyping interactive experiences, and experimenting with novel environments.",Unknown,image; video,Unknown,Genie 2,unknown,Google DeepMind,Unknown,"Responsible development is emphasized, developing our technologies responsibly and building towards more general AI systems that can safely carry out tasks.",,Unknown,model,https://deepmind.google/discover/blog/genie-2-a-large-scale-foundation-world-model/,unknown,Unknown,Unknown,Unknown,,,,,,,
limited,"Veo 2 outperforms other leading video generation models, based on human evaluations of its performance.",2024-12-16,,[],"Veo 2 is a state-of-the-art video generation model that creates videos with realistic motion and high-quality output, up to 4K, with extensive camera controls. It simulates real-world physics and offers advanced motion capabilities with enhanced realism and fidelity.",,unknown,,"Creating high-quality videos with realistic motion, different styles, camera controls, shot styles, angles, and movements.",unknown,text; video,unknown,Veo 2,unknown,Google DeepMind,unknown,"Veo 2 includes features that enhance realism, fidelity, detail, and artifact reduction to ensure high-quality output.",,unknown,model,https://deepmind.google/technologies/veo/veo-2/,unknown,unknown,unknown,unknown,,,,,,,
limited,unknown,2024-12-11,,[],"Google DeepMind introduces Gemini 2.0, a new AI model designed for the 'agentic era.'",,unknown,,"Develop more agentic models, meaning they can understand more about the world around you, think multiple steps ahead, and take action on your behalf, with your supervision.",unknown,"text, video, image, audio; image, text",unknown,Gemini 2.0,unknown,Google DeepMind,unknown,"Google is committed to building AI responsibly, with safety and security as key priorities.",,unknown,model,https://blog.google/technology/google-deepmind/google-gemini-ai-update-december-2024/#ceo-message,unknown,unknown,"custom hardware like Trillium, our sixth-generation TPUs",unknown,,,,,,,
open,"Our research demonstrates leading performance on chemical formula recognition, music score recognition, spatial reasoning, and chest X-ray report generation, as detailed in the technical report.",2024-12-05,,"['PaliGemma', 'Hugging Face', 'Kaggle', 'Hugging Face Transformers', 'Keras', 'PyTorch', 'JAX']","PaliGemma 2 builds upon the performant Gemma 2 models, adding the power of vision and making it easier than ever to fine-tune for exceptional performance. With PaliGemma 2, these models can see, understand, and interact with visual input, opening up a world of new possibilities.",,Your feedback and contributions are invaluable in shaping the future of these models and driving innovation in the field.,,"fine-tuning for specific tasks and datasets straightforward, empowering you to tailor its capabilities to your precise needs.",unknown,image; text,unknown,PaliGemma 2,USA,Google,unknown,unknown,,28B parameters,model,https://developers.googleblog.com/en/introducing-paligemma-2-powerful-vision-language-models-simple-fine-tuning/,unknown,unknown,unknown,unknown,,,,,,,
open,,2023-07-26,,"['GPT-4 API', 'Code Llama', 'Claude API', 'WizardCoder', 'PaLM API']",Continue is the open-source autopilot for software development. It is an IDE extension that brings the power of ChatGPT to VS Code and JetBrains. It’s built to be deeply customizable and continuously learn from development data.,,,,,Apache 2.0,,,Continue,USA,"Continue Dev, Inc.",,,,,application,https://continue.dev,,,,,,,,,https://github.com/continuedev/continue/blob/main/LICENSE,,
open,,2022-01-07,,['YouTube'],,,,,,MIT,video,,YT-Temporal-1B,USA,University of Washington,,,[],20M videos,dataset,https://arxiv.org/abs/2201.02639,,,,,,,,,,,
open,Evaluated on standard VLM benchmarks and outperforms SotA open-source VLMs as of release.,2024-02-09,,[],Prism is a family of VLMs trained using new analyses about key vision design axes.,,,,,LLaMA 2,"image, text; text",unknown,Prism,Japan,Toyota Research Institute,,,,7B parameters (dense),model,https://arxiv.org/pdf/2402.07865.pdf,,unknown,8 A100 GPUs,less than 9 hours,,,,,,,
open,Evaluated in comparison to SOTA video-to-language models.,2023-10-23,,"['MSR-VTT', 'Video-ChatGPT Video Descriptions Dataset']",Pegasus-1 is a video-language foundation model.,,,,,unknown,video; text,,Pegasus-1,USA,Twelve Labs,,Data selected and cleaned to eliminate toxic and biased content.,,80B parameters (dense),model,https://app.twelvelabs.io/blog/introducing-pegasus-1,,unknown,unknown,unknown,,,,,,,
open,"Marengo-2.6 sets new benchmarks in zero-shot text-to-video, text-to-image, and text-to-audio retrieval tasks with a single embedding model.",2024-03-01,,[],"Marengo 2.6 is a new state-of-the-art (SOTA) multimodal foundation model capable of performing any-to-any search tasks, including Text-To-Video, Text-To-Image, Text-To-Audio, Audio-To-Video, Image-To-Video, and more. ",,,,,unknown,"audio, image, text; audio, image, video",,Marengo 2.6,USA,Twelve Labs,,,,unknown,model,https://www.twelvelabs.io/blog/introducing-marengo-2-6,,unknown,unknown,unknown,,,,,,,
closed,Compared to self before being scaled on quality of video generation.,2023-09-29,,[],"GAIA-1 (‘Generative AI for Autonomy’) is a generative world model that leverages video, text, and action inputs to generate realistic driving scenarios while offering fine-grained control over ego-vehicle behavior and scene features.",,,,"The main use cases are pure representation learning, planning (look-ahead search), or learning a policy in the world model (neural simulator)",unknown,"text, video; video",,GAIA-1,UK,Wayve,,,,9B parameters (dense),model,https://arxiv.org/pdf/2309.17080.pdf,,unknown,32 A100 80GB GPUs,4 days,,,,,,,
open,"Evaluated on all language datasets compared to similarly sized SOTA models, with Aurora-M achieving strong performance in most.",2024-04-23,,['StarCoderPlus'],"Aurora-M is a 15B parameter multilingual open-source model trained on English, Finnish, Hindi, Japanese, Vietnamese, and code.",,,,,unknown,text; text,unknown,Aurora-M,unknown,"Tokyo Institute of Technology, MIT-IBM Watson Lab, Sapienza University of Rome",,,,15B parameters,model,https://arxiv.org/pdf/2404.00399,,unknown,"LUMI supercomputer, using 128 AMD MI250X GPUs",48 days,,,,,,,
open,Unknown,2024-09-28,,"['Glaive', 'Llama 3.1']","Reflection Llama-3.1 70B is an open-source LLM, trained with a new technique called Reflection-Tuning that teaches a LLM to detect mistakes in its reasoning and correct course. The model was trained on synthetic data generated by Glaive.",,Unknown,,The model is intended for complex reasoning and reflection tasks. It is designed to separate its internal thoughts and reasoning from its final answer.,Llama 3.1 Community License Agreement,text; text,Unknown,Reflection Llama-3.1 70B,Unknown,Unknown,Unknown,The model uses a Reflection-Tuning technique which allows it to notice mistakes in its reasoning and correct them.,,70B parameters,model,https://huggingface.co/mattshumer/Reflection-70B,https://huggingface.co/mattshumer/Reflection-70B,Unknown,Unknown,Unknown,,,,,,,
open,"The model outperforms other models of similar and larger sizes on multimodal benchmarks. An open-source benchmark, MM-MT-Bench, is contributed for evaluating vision-language models.",2024-10-10,,['Mistral Nemo 12B'],"Pixtral 12B is a 12-billion-parameter multimodal language model trained to understand both natural images and documents, achieving leading performance on various multimodal benchmarks without compromising on natural language performance.",,Unknown,,"Multimodal instruction following tasks, capable of multi-turn, multi-image conversations.",Apache 2.0,text; image,Evaluation protocols and benchmarks are open-sourced to establish fair and standardized testing.,Pixtral 12B,Unknown,Unknown,Unknown,Evaluation protocols for multimodal language models were standardized and analysis was conducted to improve the reliability of model evaluations.,,12B parameters,model,https://arxiv.org/pdf/2410.07073,unknown,Unknown,Unknown,Unknown,,,,,,,
open,,2023-04-20,,['AudioLM'],Bark is a text-to-audio model that can generate multilingual speech as well as other noises.,,https://huggingface.co/spaces/suno/bark/discussions,,,MIT,text; audio,,Bark,USA,Suno,,,,,model,https://github.com/suno-ai/bark,https://github.com/suno-ai/bark/blob/main/model-card.md,unknown,,unknown,,,,,,,
open,,2023-08-03,,['NASA HLS data'],"Prithvi is a first-of-its-kind temporal Vision transformer pre-trained by the IBM and NASA team on contiguous US Harmonised Landsat Sentinel 2 (HLS) data. The model adopts a self-supervised encoder developed with a ViT architecture and Masked AutoEncoder (MAE) learning strategy, with an MSE loss function.",,https://huggingface.co/ibm-nasa-geospatial/Prithvi-100M/discussions,,,Apache 2.0,"text, video; text, video",,Prithvi,USA,IBM,,,,100M parameters (dense),model,https://github.com/NASA-IMPACT/hls-foundation-os,https://huggingface.co/ibm-nasa-geospatial/Prithvi-100M,,,,,,,,,,
limited,,2023-09-07,,['Granite'],"Watsonx.ai is part of the IBM watsonx platform that brings together new generative AI capabilities, powered by foundation models and traditional machine learning into a powerful studio spanning the AI lifecycle.",,,,,custom,,,Watsonx.ai,USA,IBM,,,,,application,https://www.ibm.com/products/watsonx-ai,,,,,,,,deployed AI models,https://www.ibm.com/docs/en/watsonx-as-a-service?topic=models-terms-use,,
limited,unknown,2023-09-28,,[],Granite is a set of multi-size foundation models that apply generative AI to both language and code.,,,,,,"text; code, text",,Granite,USA,IBM,,"Training data passed through IBM HAP detector, language model designed to remove harmful content. Data also deduplicated and filtered for document quality.",,13B parameters (dense),model,https://www.ibm.com/blog/building-ai-for-business-ibms-granite-foundation-models/,,unknown,unknown,unknown,,,,,,,
open,Granite 3.0 8B Instruct matches leading similarly-sized open models on academic benchmarks while outperforming those peers on benchmarks for enterprise tasks and safety.,2024-10-21,,['Hugging Face’s OpenLLM Leaderboard v2'],"IBM Granite 3.0 models deliver state-of-the-art performance relative to model size while maximizing safety, speed and cost-efficiency for enterprise use cases.",,unknown,,"classic natural language use cases including text generation, classification, summarization, entity extraction and customer service chatbots, programming language use cases such as code generation, code explanation and code editing, and for agentic use cases requiring tool calling",Apache 2.0,text; text,,IBM Granite 3.0,USA,IBM,unknown,"The entire Granite family of models are trained on carefully curated enterprise datasets, filtered for objectionable content with critical concerns like governance, risk, privacy and bias mitigation in mind",,8B parameters,model,https://www.ibm.com/new/ibm-granite-3-0-open-state-of-the-art-enterprise-models,unknown,unknown,unknown,unknown,,,,,,,
closed,,2023-10-10,,[],"Firefly Image 2 is the next generation of generative AI for imaging, bringing significant advancements to creative control and quality, including new Text to Image capabilities now available in the popular Firefly web app where 90% of users are new to Adobe products.",,,,creative generation of digital art and images,unknown,text; image,,Firefly Image 2,USA,Adobe,"AI/ML training, attempting to create abusive, illegal, or confidential content.",,,unknown,model,https://firefly.adobe.com/,,unknown,unknown,unknown,,,,,,,
closed,,2023-10-10,,[],"Firefly Vector is the world’s first generative AI focused on producing vector graphics, bringing Adobe's vector graphic and generative AI expertise directly into Adobe Illustrator workflows with Text to Vector Graphic.",,,,creative generation of digital art and images,unknown,text; vector graphic,,Firefly Vector,USA,Adobe,"AI/ML training, attempting to create abusive, illegal, or confidential content.",,,unknown,model,https://firefly.adobe.com/,,unknown,unknown,unknown,,,,,,,
closed,,2023-10-10,,[],Firefly Design powers instant generation of amazing quality template designs in Adobe Express with the new Text to Template capability.,,,,creative generation of digital art and images,unknown,text; template design,,Firefly Design,USA,Adobe,"AI/ML training, attempting to create abusive, illegal, or confidential content.",,,unknown,model,https://firefly.adobe.com/,,unknown,unknown,unknown,,,,,,,
limited,,2023-03-21,,"['Firefly Image 2', 'Firefly Vector', 'Firefly Design']","Adobe Firefly is a standalone web application. It offers new ways to ideate, create, and communicate while significantly improving creative workflows using generative AI.",,,,creative generation of digital art and images,unknown,,,Firefly,USA,Adobe,"AI/ML training, attempting to create abusive, illegal, or confidential content.",,,,application,https://firefly.adobe.com/,,,,,,unknown,unknown,AI-generated creations,https://www.adobe.com/legal/licenses-terms/adobe-gen-ai-user-guidelines.html,unknown,
open,,2023-09-17,https://huggingface.co/datasets/uonlp/CulturaX,"['mC4', 'OSCAR']","CulturaX is a substantial multilingual dataset with 6.3 trillion tokens in 167 languages, tailored for LLM development.",unknown,https://huggingface.co/datasets/uonlp/CulturaX/discussions,unknown,,"mC4, OSCAR",text,unknown,CulturaX,unknown,"University of Oregon, Adobe",The data must not be utilized for malicious or harmful purposes towards humanity.,unknown,[],6.3 trillion tokens,dataset,https://arxiv.org/pdf/2309.09400,,,,,,,,,,,
closed,,2021-04-09,,[],Megatron-LM is an autoregressive language model,,,,,unknown,text; text,,Megatron-LM,USA,NVIDIA,,unknown,,1T parameters (dense),model,https://arxiv.org/abs/2104.04473,,unknown,3072 A100 GPUs,84 days,,,,,,,
open,,2022-06-17,,"['YouTube', 'Wikipedia', 'Reddit']",,,,,,MIT,"text, video",,MineDojo,USA,NVIDIA,,,[],"730k videos, 6k Wikipedia pages, 340k reddit posts",dataset,https://arxiv.org/abs/2206.08853,,,,,,,,,,,
open,,2022-10-06,,"['T5', 'Mask R-CNN', 'VIMA dataset']",,,,,,MIT,"image, text",,VIMA dataset,unknown,"NVIDIA, Stanford",,,[],200M parameters (dense model),dataset,https://vimalabs.github.io/,,,,,,,,,,,
open,,2022-10-06,,[],,,,,,MIT,"image, text; robotics trajectories",,VIMA,unknown,"NVIDIA, Stanford",,,,200M parameters (dense),model,https://vimalabs.github.io/,,,,,,,,,,,
open,"Evaluated on standard LLM benchmarks across a range of fields like reasoning, code generation, and mathematical skills.",2024-02-27,,[],Nemotron 4 is a 15-billion-parameter large multilingual language model trained on 8 trillion text tokens.,,,,,unknown,"text; code, text",unknown,Nemotron 4,USA,Nvidia,,Deduplication and quality filtering techniques are applied to the training dataset.,,15B parameters (dense),model,https://arxiv.org/pdf/2402.16819.pdf,,unknown,3072 H100 80GB SXM5 GPUs across 384 DGX H100 nodes,13 days,,,,,,,
open,BioMistral was evaluated on a benchmark comprising 10 established medical question-answering (QA) tasks in English and seven other languages.,2024-02-15,,"['Mistral', 'PubMed Central']","BioMistral is an open-source Large Language Model tailored for the biomedical domain, utilizing Mistral as its foundation model and further pre-trained on PubMed Central.",,https://huggingface.co/BioMistral/BioMistral-7B/discussions,,"Research in the biomedical domain, especially for medical question-answering tasks.",Apache 2.0,text; text,,BioMistral,unknown,"Avignon University, Nantes University",Prohibited from deploying in production environments for natural language generation or any professional health and medical purposes.,,,7B parameters (dense),model,https://arxiv.org/pdf/2402.10373.pdf,https://huggingface.co/BioMistral/BioMistral-7B,unknown,32 NVIDIA A100 80GB GPUs,unknown,,,,,,,
open,,2023-05-30,,['GPT-4'],Transformify Automate is a platform for automated task integration using natural language prompts.,,,,,,,,Transformify Automate,USA,Transformify,,,,,application,https://www.transformify.ai/automate,,,,,,,,text and code,https://www.transformify.ai/legal-stuff,,
open,Evaluated on several popular benchmarks and performance in different fields.,2023-10-30,,['SkyPile'],The Skywork series is a family of large language models (LLMs) trained on a corpus of over 3.2 trillion tokens drawn from both English and Chinese texts.,,https://huggingface.co/Skywork/Skywork-13B-base/discussions,,,custom,text; text,,Skywork,China,Kunlun Inc.,,,,13B parameters (dense),model,https://arxiv.org/pdf/2310.19341.pdf,https://huggingface.co/Skywork/Skywork-13B-base,unknown,512 A800-80GB GPUs,39 days,,,,,,,
open,,2023-03-01,,['ChatGPT API'],"When shoppers search for products, the shopping assistant makes personalized recommendations based on their requests. Shop’s new AI-powered shopping assistant will streamline in-app shopping by scanning millions of products to quickly find what buyers are looking for—or help them discover something new.",,,,,,,,Shop Assistant,USA,Shop,,,,,application,https://openai.com/blog/introducing-chatgpt-and-whisper-apis,,,,,,,,,,,
closed,unknown,2022-04-14,,[],The dataset used to train the Luminous models.,unknown,,unknown,unknown,unknown,text,,Luminous dataset,Germany,Aleph Alpha,unknown,unknown,[],unknown,dataset,,,,,,,,,,,,
limited,,2022-04-14,,['Luminous dataset'],Luminous is a family of multilingual language models,,,,,,text; text,,Luminous,Germany,Aleph Alpha,,,,200B parameters (dense),model,https://twitter.com/Aleph__Alpha/status/1514576711492542477,,unknown,unknown,unknown,,,,,,,
limited,,2021-09-30,,['Luminous'],The Aleph Alpha API serves a family of text-only language models (Luminous) and multimodal text-and-image models (Magma).,,unknown,,unknown,,,unknown,Aleph Alpha API,Germany,Aleph Alpha,unknown,,,,application,https://www.aleph-alpha.com/,,,,,,unknown,unknown,The text models provide text outputs given text inputs. The multimodal models provide text completions given text and image inputs.,https://www.aleph-alpha.com/terms-conditions,unknown,
open,Evaluated on the OKVQA benchmark as a fully open-ended generative task.,2022-10-24,,"['GPT-J', 'CLIP']",An autoregressive VL model that is able to generate text from an arbitrary combination of visual and textual input,,,,,MIT,"image, text; text",,MAGMA,Germany,Aleph Alpha,,,,6B parameters (dense),model,https://arxiv.org/pdf/2112.05253.pdf,,,32 A100 GPUs,,,,,,,,
open,"Extensive evaluations were done with ablation experiments performed on pre-training benchmarks such as lambada, triviaqa, hellaswag, winogrande, webqs, arc, and boolq. Direct comparisons were also performed with applications like GPT and Llama 2.",2024-09-08,,[],"Pharia-1-LLM-7B is a model that falls within the Pharia-1-LLM model family. It is designed to deliver short, controlled responses that match the performance of leading open-source models around 7-8 billion parameters. The model is culturally and linguistically tuned for German, French, and Spanish languages. It is trained on carefully curated data in line with relevant EU and national regulations. The model shows improved token efficiency and is particularly effective in domain-specific applications, especially in the automotive and engineering industries. It can also be aligned to user preferences, making it appropriate for critical applications without the risk of shut-down behaviour.",,Feedback can be sent to support@aleph-alpha.com.,,"The model is intended for use in domain-specific applications, particularly in the automotive and engineering industries. It can also be tailored to user preferences.",Aleph Open,text; text,Unknown,Pharia-1-LLM-7B,Germany,Aleph Alpha,Unknown,The model comes with additional safety guardrails via alignment methods to ensure safe usage. Training data is carefully curated to ensure compliance with EU and national regulations.,,7B parameters,model,https://aleph-alpha.com/introducing-pharia-1-llm-transparent-and-compliant/#:~:text=Pharia%2D1%2DLLM%2D7B,unknown,Unknown,Unknown,Unknown,,,,,,,
closed,,2021-01-12,,[],,,,,,unknown,"image, text",,Wu Dao dataset,China,Beijing Academy of Artificial Intelligence,,,[],unknown,dataset,https://www.tsinghua.edu.cn/en/info/1420/10473.htm,,,,,,,,,,,
closed,,2021-01-12,,['Wu Dao dataset'],,,,,,unknown,"image, text",,Wu Dao 2.0,China,Beijing Academy of Artificial Intelligence,,,,1.75T parameters (dense),model,https://www.tsinghua.edu.cn/en/info/1420/10473.htm,,,,,,,,,,,
open,Evaluated on objective and reliability metrics.,2023-10-26,,"['Vicuna', 'JudgeLM Dataset']",JudgeLM is a fine-tuned to be a scalable judge to evaluate LLMs efficiently and effectively in open-ended benchmarks.,,https://huggingface.co/BAAI/JudgeLM-13B-v1.0/discussions,,Research on evaluating the performance of large language models and chatbots.,Apache 2.0,text; text,,JudgeLM,China,Beijing Academy of Artificial Intelligence,,,,13B parameters (dense),model,https://arxiv.org/pdf/2310.17631.pdf,https://huggingface.co/BAAI/JudgeLM-13B-v1.0,unknown,8 A100 40GB NVIDIA GPUs,unknown,,,,,,,
open,,2023-10-26,,"['Alpaca', 'GPT-4', 'Dolly', 'ShareGPT', 'LLaMA', 'Vicuna']","JudgeLM Dataset is a novel dataset replete with a rich variety of seed tasks, comprehensive answers from modern LLMs, answers’ grades from the teacher judge, and detailed reasons for judgments.",,https://huggingface.co/datasets/BAAI/JudgeLM-100K/discussions,,To be used to conduct instruction-tuning for language models and make the language model able to judge open-ended answer pairs.,CC BY NC 4.0,"text, text",,JudgeLM Dataset,China,Beijing Academy of Artificial Intelligence,,,[],105k judge samples,dataset,https://huggingface.co/datasets/BAAI/JudgeLM-100K,,,,,,,,,,,
open,Compared to other segmentation models across different modalities on BraTS2023 dataset.,2024-01-25,,[],"SegMamba is a novel 3D medical image Segmentation Mamba model, designed to effectively capture long-range dependencies within whole volume features at every scale.",,,,,Apache 2.0,image; text,unknown,SegMamba,unknown,"Hong Kong University of Science and Technology (Guangzhou + original), Beijing Academy of Artificial Intelligence",,unknown,,unknown,model,https://arxiv.org/pdf/2401.13560v2.pdf,,unknown,4 NVIDIA A100 GPUs,1000 epochs,,,,,,,
open,"Evaluated on standard datasets in multilingual, cross-lingual, long document retrieval, and Q&A domains.",2024-02-05,,[],"BGE M3 Embedding is a new embedding model that can support more than 100 working languages, leading to new state-of-the-art performances on multi-lingual and cross-lingual retrieval tasks.",,https://huggingface.co/BAAI/bge-m3/discussions,,,MIT,text; text,unknown,BGE M3 Embedding,unknown,"Beijing Academy of Artificial Intelligence, University of Science and Technology of China",,unknown,,unknown,model,https://arxiv.org/pdf/2402.03216.pdf,https://huggingface.co/BAAI/bge-m3,unknown,32 A100 40GB GPUs,"20,000 steps",,,,,,,
open,Evaluated on zero-shot classification performance across multiple image classification benchmarks.,2024-02-06,,['CLIP'],"As of release, EVA-CLIP is the largest and most powerful open-source CLIP model to date, with 18 billion parameters.",,https://huggingface.co/BAAI/EVA-CLIP-8B-448/discussions,,,MIT,"image, text; text",unknown,EVA-CLIP,unknown,"Beijing Academy of Artificial Intelligence, Tsinghua University",,,,18B parameters (dense),model,https://arxiv.org/pdf/2402.04252.pdf,https://huggingface.co/BAAI/EVA-CLIP-8B-448,unknown,384 A100 40GB GPUs,unknown,,,,,,,
open,,2023-10-02,,['Falcon-180B'],Vulture is a further fine-tuned causal Decoder-only LLM built by Virtual Interactive (VILM) on top of Falcon.,,https://huggingface.co/vilm/vulture-180b/discussions,,,Apache 2.0,text; text,unknown,Vulture,USA,Virtual Interactive,Production use without adequate assessment of risks and mitigation; any use cases which may be considered irresponsible or harmful.,,,180B parameters (dense),model,https://huggingface.co/vilm/vulture-180b,https://huggingface.co/vilm/vulture-180b,unknown,unknown,3000 A100 hours,,,,,,,
open,Evaluated on wide range of tasks using own evaluation benchmarks.,2022-11-20,,"['AlphaFold2', 'OpenProteinSet']",OpenFold is an open source recreation of AlphaFold2.,,,,,CC BY 4.0,amino acid sequence; protein structure,,OpenFold,USA,Columbia,,,,,model,https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2,,unknown,Single A100 NVIDIA GPU,"50,000 GPU hours",,,,,,,
open,Evaluated on the object hallucination benchmark and compared to GPT-4V.,2023-10-11,,"['CLIP', 'Vicuna']",Ferret is a Multimodal Large Language Model (MLLM) capable of understanding spatial referring of any shape or granularity within an image and accurately grounding open-vocabulary descriptions.,,,,,Apple,"image, text; image, text",,Ferret,unknown,"Columbia, Apple AI",,,,13B parameters,model,https://arxiv.org/pdf/2310.07704.pdf,,unknown,8 A100 GPUs,2.5 to 5 days,,,,,,,
closed,,,,[],,,,,,unknown,text,,Neeva dataset,USA,Neeva,,,[],unknown,dataset,https://neeva.com/index,,,,,,,,,,,
closed,,,,['Neeva dataset'],,,,,,unknown,text; text,,Neeva model,USA,Neeva,,,,unknown,model,https://neeva.com/index,,,,,,,,,,,
open,,2023-01-06,,['Neeva model'],NeevaAI is an AI-powered search tool that combines the capabilities of LLMs with Neeva's independent in-house search stack to create a unique and transformative search experience.,,,,,Custom,,,NeevaAI,USA,Neeva,,,,,application,https://neeva.com/blog/introducing-neevaai,,,,,,,,,https://neeva.com/terms,,
closed,,2023-07-19,,[],EXAONE 2.0 is a multimodal artificial intelligence that can be used to help develop new materials and medicines.,,,,,unknown,"image, text; image, text",,EXAONE 2.0,South Korea,LG AI Research,,,,unknown,model,https://www.lgresearch.ai/exaone,,unknown,unknown,unknown,,,,,,,
open,Evaluated on the COCO dataset.,2023-04-28,,['LAION-5B'],A text-to-image cascaded pixel diffusion model released in conjunction with AI research lab DeepFloyd.,,https://huggingface.co/DeepFloyd/IF-I-XL-v1.0/discussions,,,custom,text; image,,DeepFloyd IF,UK,Stability AI,,,,4.3B parameters (dense),model,https://stability.ai/blog/deepfloyd-if-text-to-image-model,https://huggingface.co/DeepFloyd/IF-I-XL-v1.0,,,,,,,,,,
open,,2023-04-20,,"['StableLM-Alpha dataset', 'Alpaca dataset', 'gpt4all dataset', 'ShareGPT52K dataset', 'Dolly dataset', 'HH dataset']",Large language models trained on up to 1.5 trillion tokens.,,,,,Apache 2.0,text; text,,StableLM,UK,Stability AI,,,,7B parameters (dense),model,https://github.com/Stability-AI/StableLM,,,,,,,,,,,
open,,2022-08-22,,[],Stable Diffusion is a generative software that creates images from text prompts.,,https://huggingface.co/CompVis/stable-diffusion/discussions,,,custom,,,Stable Diffusion,UK,Stability AI,,,,,application,https://stability.ai/blog/stable-diffusion-public-release,,,,,,,,image,,,
open,,2023-07-26,,[],"Stable Diffusion XL is an updated version of Stable Diffusion, and creates descriptive images with shorter prompts and generate words within images.",,,,,Open Rail++,,,Stable Diffusion XL,UK,Stability AI,,,,,application,https://stability.ai/stablediffusion,,,,,,,,image,,,
limited,Evaluated via a user study comparing preferences between Stable Video Diffusion and competing text-to-video models.,2023-11-21,,['Large Video Dataset'],Stable Video Diffusion is a latent diffusion model trained to generate short video clips from an image conditioning.,,https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/discussions,,Intended for research purposes only.,custom,text; video,,Stable Video Diffusion,UK,Stability AI,Using the model to generate representations of real-world people or events.,,,unknown,model,https://static1.squarespace.com/static/6213c340453c3f502425776e/t/655ce779b9d47d342a93c890/1700587395994/stable_video_diffusion.pdf,https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt,unknown,unknown,unknown,,,,,,,
closed,Large Video Dataset compared to publicly available research datasets on general statistics before and after filtering.,2023-11-21,,"['WebVid-10M', 'CoCa', 'V-BLIP']","Large Video Dataset is the dataset that trained Stable Video Diffusion, consisting of over 212 years of content.",,,,,unknown,video with caption,,Large Video Dataset,UK,Stability AI,,"Dataset annotated with dense optical flow, and low optical flow videos are removed.",[],580M annotated video clip pairs,dataset,https://static1.squarespace.com/static/6213c340453c3f502425776e/t/655ce779b9d47d342a93c890/1700587395994/stable_video_diffusion.pdf,,,,,,,,,,,
open,,2023-11-01,,[],Sky Replacer is an exciting new tool that allows users to replace the color and aesthetic of the sky in their original photos with a selection of nine alternatives to improve the overall look and feel of the image.,,,,,unknown,,,Sky Replacer,UK,Stability AI,,,,,application,https://clipdrop.co/real-estate/sky-replacer,,,,,,,,image,https://stability.ai/terms-of-use,,
open,Evaluated on standard LLM benchmarks and in multilingual tasks compared to similarly sized open-source models.,2024-01-19,,"['RedPajama-Data', 'The Pile', 'RefinedWeb', 'The Stack', 'OpenWebText', 'OpenWebMath']","StableLM 2 is a state-of-the-art 1.6 billion parameter small language model trained on multilingual data in English, Spanish, German, Italian, French, Portuguese, and Dutch.",,https://huggingface.co/stabilityai/stablelm-2-1_6b/discussions,,The model is intended to be used as a foundational base model for application-specific fine-tuning. Developers must evaluate and fine-tune the model for safe performance in downstream applications.,custom,text; text,unknown,StableLM 2,UK,Stability AI,,,,1.6B parameters (dense),model,https://stability.ai/news/introducing-stable-lm-2,https://huggingface.co/stabilityai/stablelm-2-1_6b,11 tCO2eq,512 NVIDIA A100 40GB GPUs,92k GPU hours,,,,,,,
open,Evaluated on image generation benchmarks in comparison to equal and smaller-sized models.,2024-01-16,,[],"Stable Cascade is built upon the Würstchen architecture and its main difference to other models, like Stable Diffusion, is that it is working at a much smaller latent space.",,https://huggingface.co/stabilityai/stable-cascade/discussions,,The model is intended for research purposes for now.,custom,text; image,unknown,Stable Cascade,UK,Stability AI,"The model was not trained to be factual or true representations of people or events, and therefore using the model to generate such content is out-of-scope for the abilities of this model. The model should not be used in any way that violates Stability AI's Acceptable Use Policy.",,,unknown,model,https://huggingface.co/stabilityai/stable-cascade,https://huggingface.co/stabilityai/stable-cascade,"2,276 kgCO2eq",Some number of A100 GPUs,"24,602 A100 GPU hours",,,,,,,
open,unknown,2024-03-18,,['Objaverse'],"Stable Video 3D (SV3D) is a generative model based on Stable Video Diffusion that takes in a still image of an object as a conditioning frame, and generates an orbital video of that object.",,https://huggingface.co/stabilityai/sv3d/discussions,,This model is intended to be used for generating orbital videos of objects from still images.,StabilityAI Non-Commercial Research Community License,image; video,unknown,Stable Video 3D,UK,Stability AI,"The model should not be used for generating factual or true representations of people or events, or in any way that violates Stability AIs Acceptable Use Policy.",unknown,,unknown,model,https://stability.ai/news/introducing-stable-video-3d,https://huggingface.co/stabilityai/sv3d,unknown,unknown,unknown,,,,,,,
open,,2024-04-03,,['AudioSparx'],"Stable Audio 2.0 sets a new standard in AI-generated audio, producing high-quality, full tracks with coherent musical structure up to three minutes in length at 44.1kHz stereo.",,,,"It can be used to generate melodies, backing tracks, stems, and sound effects.",unknown,"audio, text; audio",Advanced content recognition is used to maintain compliance and prevent copyright infringement.,Stable Audio 2.0,UK,Stability AI,Uploading copyrighted material for transformation.,"To protect creator copyrights, for audio uploads, Stability AI partners with Audible Magic to use their content recognition (ACR) technology to power real-time content matching and prevent copyright infringement. Opt-out requests were honored during the training phase.",,unknown,model,https://stability-ai.squarespace.com/news/stable-audio-2-0,,unknown,unknown,unknown,,,,,,,
open,Evaluated on several benchmark LLM tasks,2023-12-12,,"['Arxiv', 'Books', 'C4', 'RefinedWeb', 'StarCoder', 'StackExchange', 'Wikipedia']","Amber is the first model in the LLM360 family, an initiative for comprehensive and fully open-sourced LLMs, where all training details, model checkpoints, intermediate results, and additional analyses are made available to the community.",,https://huggingface.co/LLM360/Amber/discussions,,to support open and collaborative AI research by making the full LLM training process transparent.,Apache 2.0,text; text,unknown,Amber,International,LLM360,,,,7B parameters (dense),model,https://www.llm360.ai/,https://huggingface.co/LLM360/Amber,unknown,"56 DGX A100 nodes, each equipped with 4 80GB A100 GPUs",unknown,,,,,,,
open,"Evaluated on English and coding tasks and benchmarks, and outperforms LLaMA 2 in some.",2023-12-12,,"['SlimPajama', 'StarCoder']",CrystalCoder is a language model with a balance of code and text data that follows the initiative under LLM360 of its training process being fully transparent.,,https://huggingface.co/LLM360/CrystalCoder/discussions,,to support open and collaborative AI research by making the full LLM training process transparent.,Apache 2.0,"text; code, text",unknown,CrystalCoder,International,LLM360,,,,7B parameters (dense),model,https://www.llm360.ai/,https://huggingface.co/LLM360/CrystalCoder,unknown,"Trained on the Cerebras Condor Galaxy 1 (CG-1), a 4 exaFLOPS, 54 million core, 64-node cloud AI supercomputer.",unknown,,,,,,,
open,"Evaluated on the LLM360 Performance and Evaluation Collection that checks standard best practice benchmarks, medical, math, and coding knowledge.",2024-05-29,,[],"K2 is a 65 billion parameter large language model inspired by the Llama 2 65B model. The model is also supported with a suite of research tools, tutorials and step-by-step guides for learning pre-training and fine-tuning techniques.",,https://huggingface.co/LLM360/K2/discussions,,The model is intended for learning pre-training techniques or enhancing research capabilities in large language models.,Apache 2.0,text; text,unknown,K2,International,LLM360,unknown,unknown,,65B parameters,model,https://www.llm360.ai/paper2.pdf,https://huggingface.co/LLM360/K2,unknown,unknown,unknown,,,,,,,
open,Evaluated on human and machine benchmarks in comparison to established image models as a baseline.,2024-01-26,,"['CLIP', 'LAION-400M', 'Wukong', 'Stable Diffusion XL']",Taiyi Diffusion XL is a new Chinese and English bilingual text-to-image model which is developed by extending the capabilities of CLIP and Stable-DiffusionXL.,,https://huggingface.co/IDEA-CCNL/Taiyi-Stable-Diffusion-XL-3.5B/discussions,,,Apache 2.0,text; image,unknown,Taiyi Diffusion XL,unknown,"International Digital Economy Academy, South China University of Technology, University of Science and Technology of China",,unknown,,3.5B parameters (dense),model,https://arxiv.org/pdf/2401.14688.pdf,https://huggingface.co/IDEA-CCNL/Taiyi-Stable-Diffusion-XL-3.5B,unknown,unknown,unknown,,,,,,,
closed,,2023-03-15,,[],The dataset used to train AssemblyAI's Conformer-1 model.,,,"""our team curated a dataset of 650K hours of English audio - consisting of proprietary internal datasets and various sources from the internet""
",,unknown,"audio, text",,Conformer-1 dataset,USA,AssemblyAI,,,[],650K hours audio (60TB),dataset,https://www.assemblyai.com/blog/conformer-1/,,,,,,,,,,,
limited,"In order to evaluate the accuracy and robustness of Conformer-1, we sourced 60+ hours of human labeled audio data covering popular speech domains such as call centers, podcasts, broadcasts, and webinars. We then calculated the Word Error Rate (WER) of Conformer-1 against these datasets, and compared the results against Whisper and a number of other ASR models. To ground our results against popular open source speech recognition benchmarks, we also performed the same WER analysis against a number of academic datasets.",2023-03-15,,['Conformer-1 dataset'],"Conformer-1 is a state-of-the-art speech recognition model trained on 650K hours of audio data that achieves near human-level performance and robustness across a variety of data, making up to 43% fewer errors on noisy data than other ASR models.",,,,,unknown,"audio, text; audio, text",,Conformer-1,USA,AssemblyAI,,,,300M parameters (dense),model,https://www.assemblyai.com/blog/conformer-1/,,,,,,,,,,,
limited,,2022,,['Anthropic API'],AssemblyAI uses Claude and Anthropic's model to transcribe and understand audio data at scale.,,,,,custom,,,AssemblyAI,USA,AssemblyAI,,,,,application,https://www.assemblyai.com/,,,,,,,,,https://www.assemblyai.com/legal/terms-of-service,,
open,,2023-03-15,,['Conformer-1'],API to access the AssemblyAI's Conformer-1 model.,,,,Speech recognition,custom,,,Conformer-1 API,USA,AssemblyAI,,,,,application,https://www.assemblyai.com/blog/conformer-1/,,,,,,,,,https://www.assemblyai.com/legal/terms-of-service,,
open,unknown,2023-10-31,,"['AI-HUB dataset', 'National Institute of Korean Language dataset']",Midm is a pre-trained Korean-English language model developed by KT. It takes text as input and creates text. The model is based on Transformer architecture for an auto-regressive language model.,,https://huggingface.co/KT-AI/midm-bitext-S-7B-inst-v1/discussions,,It is expected to be used for various research purposes.,CC-BY-NC 4.0,text; text,unknown,Midm,South Korea,KT Corporation,It cannot be used for commercial purposes.,"KT tried to remove unethical expressions such as profanity, slang, prejudice, and discrimination from training data.",,7B parameters,model,https://huggingface.co/KT-AI/midm-bitext-S-7B-inst-v1,https://huggingface.co/KT-AI/midm-bitext-S-7B-inst-v1,unknown,unknown,unknown,,,,,,,
open,"Evaluated in 1-shot against the PaLM models, with the tasks of the paper ""Language models are few-shot learners"" (Brown et al., 2020); (2) on a small set of few-shot tasks reported by the GPT-4 paper; (3) against state-of-the-art models across common sense, question answering, and code tasks; (4) against models which also report results from the EAI Harness, for which we are able to compare with identical prompts and metrics.",2023-06-14,,['RefinedWeb'],"Falcon-40B is a 40B parameters causal decoder-only model built by TII and trained on 1,000B tokens of RefinedWeb enhanced with curated corpora.",,https://huggingface.co/tiiuae/falcon-40b/discussions,,Research on large language models; as a foundation for further specialization for specific use cases.,Apache 2.0,text; text,None,Falcon-40B,UAE,UAE Technology Innovation Institute,irresponsible or harmful use or production use without adequate assessment of risks and mitigation.,,,40B parameters (dense),model,https://arxiv.org/pdf/2311.16867.pdf,https://huggingface.co/tiiuae/falcon-40b,unknown,384 A100 40GB GPUs,2 months,,,,,,,
open,,2023-06-01,https://huggingface.co/datasets/tiiuae/falcon-refinedweb,[],RefinedWeb is a high-quality five trillion tokens web-only English pretraining dataset.,,,,,custom,text,,RefinedWeb,UAE,UAE Technology Innovation Institute,,,[],600B tokens,dataset,https://arxiv.org/pdf/2306.01116.pdf,,,,,,,,,,,
open,"Falcon-180B outperforms LLaMA-2, StableLM, RedPajama, MPT on the Open LLM Leaderboard at https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard.",2023-09-06,,['RefinedWeb'],"Falcon-180B is a 180B parameters causal decoder-only model built by TII and trained on 3,500B tokens of RefinedWeb enhanced with curated corpora.",,https://huggingface.co/tiiuae/falcon-180b/discussions,,Research on large language models; as a foundation for further specialization for specific use cases.,unknown,text; text,None,Falcon-180B,UAE,UAE Technology Innovation Institute,Production use without adequate assessment of risks and mitigation; any use cases which may be considered irresponsible or harmful.,,,180B parameters (dense),model,https://arxiv.org/pdf/2311.16867.pdf,https://huggingface.co/tiiuae/falcon-180B,,4096 A100 40GB GPUs,9 months,,,,,,,
open,unknown,2024-03-18,,['Animagine XL 3.0'],"An open-source, anime-themed text-to-image model enhanced to generate higher quality anime-style images with a broader range of characters from well-known anime series, an optimized dataset, and new aesthetic tags for better image creation.",,https://huggingface.co/cagliostrolab/animagine-xl-3.1/discussions,,"Generating high-quality anime images from textual prompts. Useful for anime fans, artists, and content creators.",Fair AI Public License 1.0-SD,text; image,unknown,Animagine XL 3.1,USA,Cagliostro Research Lab,Not suitable for creating realistic photos or for users who expect high-quality results from short or simple prompts.,"The model undergoes pretraining, first stage finetuning, and second stage finetuning for refining and improving aspects such as hand and anatomy rendering.",,unknown,model,https://cagliostrolab.net/posts/animagine-xl-v31-release,https://huggingface.co/cagliostrolab/animagine-xl-3.1,unknown,2x A100 80GB GPUs,"Approximately 15 days, totaling over 350 GPU hours.",,,,,,,
open,,2023-05-02,,['ChatGPT'],AI chatbot on Nextdoor that helps users write more clear and conscientious posts.,,,,to be used to help make the Nextdoor experience more positive for users,unknown,,,Nextdoor Assistant,USA,Nextdoor,,,,,application,https://help.nextdoor.com/s/article/Introducing-Assistant,,,,,,,,natural language text guidance,,,
open,"Tested on several benchmarks, most notably Python benchmark HumanEval.",2023-05-09,,['The Stack'],"StarCoder is a Large Language Model for Code (Code LLM) trained on permissively licensed data from GitHub, including from 80+ programming languages, Git commits, GitHub issues, and Jupyter notebooks.",,https://huggingface.co/bigcode/starcoder/discussions,,"As a foundation model to fine-tune and create more specialized models that support use cases such as code completion, fill-in-the-middle, and text summarization. Can also be used as a Tech Assistant prompt and not as an instruction model given training limitations.",BigCode Open RAIL-M v1.0,code; code,,StarCoder,International,BigCode,See BigCode Open RAIL-M license and FAQ,"No specific quality control is mentioned in model training, though details on data processing and how the tokenizer was trained are provided in the paper.",,15.5B parameters (dense),model,https://arxiv.org/pdf/2305.06161.pdf,https://huggingface.co/bigcode/starcoder,16.68 tons of CO2eq,512 A100 80GB GPUs distributed across 64 nodes,"320,256 GPU hours",,,,,,,
open,Evaluated on MultiPL-E system benchmarks.,2023-02-24,,"['The Stack', 'BigCode Dataset']",Multilingual code model derived from the findings of BigCode Project analysis on Github stars' association to data quality.,,https://huggingface.co/bigcode/santacoder/discussions,,The model was trained on GitHub code. As such it is not an instruction model and commands do not work well. You should phrase commands like they occur in source code such as comments or write a function signature and docstring and let the model complete the function body.,BigCode Open RAIL-M v1,code; code,,SantaCoder,International,BigCode,See BigCode Open RAIL-M license and FAQ,,,1.1B parameters (dense),model,https://arxiv.org/pdf/2301.03988.pdf,https://huggingface.co/bigcode/santacoder,124 kg of CO2eq,96 NVIDIA Tesla V100 GPUs,"14,284 GPU hours",,,,,,,
open,Evaluated models trained on The Stack on HumanEval and MBPP and compared against similarly-sized models.,2022-11-20,https://huggingface.co/datasets/bigcode/the-stack,['GitHub'],"The Stack contains over 6TB of permissively-licensed source code files covering 358 programming languages. The Stack serves as a pre-training dataset for Code LLMs, i.e., code-generating AI systems which enable the synthesis of programs from natural language descriptions as well as other from code snippets.",,https://huggingface.co/datasets/bigcode/the-stack/discussions,,creating code LLMs,"The Stack is a collection of source code from repositories with various licenses. Any use of all or part of the code gathered in The Stack must abide by the terms of the original licenses, including attribution clauses when relevant. Provenance information is provided for each data point.",code,,The Stack,International,BigCode,See https://huggingface.co/datasets/bigcode/the-stack,allowed users whose data were part of The Stack's training data to opt-out,['https://huggingface.co/datasets/bigcode/the-stack/viewer/default/train'],6 TB,dataset,https://arxiv.org/pdf/2211.15533.pdf,,,,,,,,,,,
open,See https://arxiv.org/pdf/2402.19173.pdf,2024-02-28,,['The Stack v2'],"StarCoder2-15B model is a 15B parameter model trained on 600+ programming languages from The Stack v2, with opt-out requests excluded. The training was carried out using the Fill-in-the-Middle objective on 4+ trillion tokens.",,https://huggingface.co/bigcode/starcoder2-15b/discussions,,"The model was trained on GitHub code as well as additional selected data sources such as Arxiv and Wikipedia. As such it is not an instruction model and commands like ""Write a function that computes the square root."" do not work well. Intended to generate code snippets from given context, but not for writing actual functional code directly.",BigCode OpenRail-M,code; text,unknown,StarCoder2-15B,International,BigCode,See BigCode Open RAIL-M license and FAQ,The model was filtered for permissive licenses and code with no license only. A search index is provided to identify where generated code came from to apply the proper attribution.,,15B parameters (dense),model,https://www.servicenow.com/company/media/press-room/huggingface-nvidia-launch-starcoder2.html,https://huggingface.co/bigcode/starcoder2-15b,unknown,1024 x H100 GPUs,unknown,,,,,,,
open,See https://arxiv.org/pdf/2402.19173.pdf,2024-02-28,,['The Stack v2'],"StarCoder2-7B model is a 7B parameter model trained on 17 programming languages from The Stack v2, with opt-out requests excluded. The model uses Grouped Query Attention, a context window of 16,384 tokens with a sliding window attention of 4,096 tokens, and was trained using the Fill-in-the-Middle objective on 3.5+ trillion tokens.",,https://huggingface.co/bigcode/starcoder2-7b/discussions,,"Intended to generate code snippets from given context, but not for writing actual functional code directly. The model has been trained on source code from 17 programming languages. The predominant language in source is English although other languages are also present. As such the model is capable of generating code snippets provided some context but the generated code is not guaranteed to work as intended. It can be inefficient and contain bugs or exploits. See the paper for an in-depth discussion of the model limitations.",BigCode OpenRail-M,code; text,unknown,StarCoder2-7B,International,BigCode,See BigCode Open RAIL-M license and FAQ,The model was filtered for permissive licenses and code with no license only. A search index is provided to identify where generated code came from to apply the proper attribution.,,7B parameters (dense),model,https://www.servicenow.com/company/media/press-room/huggingface-nvidia-launch-starcoder2.html,https://huggingface.co/bigcode/starcoder2-7b,"29,622.83 kgCO2eq",432 H100 GPUs,"145,152 hours (cumulative)",,,,,,,
open,See https://arxiv.org/pdf/2402.19173.pdf,2024-02-28,,['The Stack v2'],"StarCoder2-3B model is a 3B parameter model trained on 17 programming languages from The Stack v2, with opt-out requests excluded. The model uses Grouped Query Attention, a context window of 16,384 tokens with a sliding window attention of 4,096 tokens, and was trained using the Fill-in-the-Middle objective on 3+ trillion tokens.",,https://huggingface.co/bigcode/starcoder2-3b/discussions,,"Intended to generate code snippets from given context, but not for writing actual functional code directly. The model has been trained on source code from 17 programming languages. The predominant language in source is English although other languages are also present. As such the model is capable of generating code snippets provided some context but the generated code is not guaranteed to work as intended. It can be inefficient and contain bugs or exploits. See the paper for an in-depth discussion of the model limitations.",BigCode OpenRail-M,code; text,unknown,StarCoder2-3B,International,BigCode,See BigCode Open RAIL-M license and FAQ,The model was filtered for permissive licenses and code with no license only. A search index is provided to identify where generated code came from to apply the proper attribution.,,3B parameters (dense),model,https://www.servicenow.com/company/media/press-room/huggingface-nvidia-launch-starcoder2.html,https://huggingface.co/bigcode/starcoder2-3b,"16,107.01 kgCO2eq",160 A100 GPUs,"97,120 hours (cumulative)",,,,,,,
open,"Compared with other multi-task, instruction-following agents.",2023-11-10,,[],"JARVIS-1 is an open-world agent that can perceive multimodal input (visual observations and human instructions), generate sophisticated plans, and perform embodied control, all within the popular yet challenging open-world Minecraft universe.",,,,,unknown,text; in-game actions,,JARVIS-1,China,Peking University Institute for Artificial Intelligence,,,,unknown,model,https://arxiv.org/pdf/2311.05997.pdf,,unknown,unknown,unknown,,,,,,,
open,unknown,2021-04-01,,[],"WebVid-10M is a large-scale dataset of short videos with textual descriptions sourced from stock footage sites.
",unknown,Feedback can be given by emailing at maxbain at robots.ox.ac.uk,Video URLs and textual description annotations,"""Provided you keep to these rules, the University grants you (the researcher) a non-exclusive and non-transferable licence to use the content free of charge strictly for non-commercial research (i.e., whose output artefacts are not incorporated in commercial products) for 12 months.""
",WebVid Dataset Terms,"text, video",,WebVid-10M,UK,University of Oxford,"- Authors note the following prohibited uses: ""You must not use the content other than for the Permitted Purpose in strict conformity with these terms and any other reasonable instructions of the University. You must not, except as may be strictly necessary for carrying out the Permitted Purpose, provide or otherwise make available content to any third party or allow use of it or them by or on behalf of any third party, in whole or in part, whether by way of sale, resale, loan, transfer, hire or any other form of exploitation; or attempt to identify any living or deceased individual from the content."" [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)
- Authors also note the following limitations of the dataset: ""We note that data sourced from the web may be prone to biases and may contain graphic content. Please be careful of unintended societal, gender, racial and other biases when training or deploying models trained on this data."" [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)
",unknown,[],"10.7M video-text pairs, 52K hours video",dataset,https://m-bain.github.io/webvid-dataset/,,,,,,,,,,,
open,unknown,2021-04-01,,['WebVid-10M'],"WebVid-2M is a large-scale dataset of 2.5M short videos with textual descriptions sourced from stock footage sites. A subset of the WebVid-10M dataset.
",unknown,Feedback can be given by emailing at maxbain at robots.ox.ac.uk,Video URLs and textual description annotations,"""Provided you keep to these rules, the University grants you (the researcher) a non-exclusive and non-transferable licence to use the content free of charge strictly for non-commercial research (i.e., whose output artefacts are not incorporated in commercial products) for 12 months.""
",WebVid Dataset Terms,"text, video",,WebVid-2M,UK,University of Oxford,"- Authors note the following prohibited uses: ""You must not use the content other than for the Permitted Purpose in strict conformity with these terms and any other reasonable instructions of the University. You must not, except as may be strictly necessary for carrying out the Permitted Purpose, provide or otherwise make available content to any third party or allow use of it or them by or on behalf of any third party, in whole or in part, whether by way of sale, resale, loan, transfer, hire or any other form of exploitation; or attempt to identify any living or deceased individual from the content."" [[Terms of Access]](https://github.com/m-bain/webvid/blob/main/TERMS.md)
- Authors also note the following limitations of the dataset: ""We note that data sourced from the web may be prone to biases and may contain graphic content. Please be careful of unintended societal, gender, racial and other biases when training or deploying models trained on this data."" [[Disclaimer]](https://github.com/m-bain/webvid#disclaimer-%EF%B8%8F)
",unknown,[],"2.5M video-text pairs, 13K hours video",dataset,https://m-bain.github.io/webvid-dataset/,,,,,,,,,,,
closed,,,,[],,,,,,unknown,text,,You dataset,USA,You,,,[],unknown,dataset,https://you.com/,,,,,,,,,,,
closed,,,,['You dataset'],,,,,,unknown,text; text,,You model,USA,You,,,,unknkown,model,https://you.com/,,,,,,,,,,,
open,,,,['You model'],You.com is a search engine built on artificial intelligence that provides users with a customized search experience while keeping their data 100% private.,,,,,unknown,,,You Search,USA,You,,,,,application,https://you.com/,,,,,,,,,,,
open,"""We evaluate our models on the PILE validation set comprising 380M tokens. We also evaluate the public checkpoints of Pythia, Eleuther (2022); OPT, Zhang et al. (2022); GPT-NeoX 20B, Black et al. (2022); and GPT-J 6B, Wang & Komatsuzaki (2021). We performed upstream (pre-training) evaluations of text prediction cross-entropy using the Pile validation and test splits. We performed downstream evaluations of text generation accuracy on standardized tasks using the Eleuther lm-evaluation-harness."" [[Evaluations]] (https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#evaluations).
",2023-03-28,,['The Pile'],"A Family of Open, Compute-efficient, Large Language Models. The family includes 111M, 256M, 590M, 1.3B, 2.7B, 6.7B, and 13B models. All models in the Cerebras-GPT family have been trained in accordance with Chinchilla scaling laws (20 tokens per model parameter). [[Cerebras Blog Post]](https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models)
",,,,"""The primary intended use is to further research into large language models. These models can be used as a foundation model for NLP, applications, ethics, and alignment research. Our primary intended users are researchers who are working to improve LLMs and practitioners seeking reference implementations, training setups, hyperparameters, or pre-trained models. We release these models with a fully permissive Apache license for the community to use freely."" [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#uses-and-limitations).
",Apache 2.0,text; text,,Cerebras-GPT,USA,Cerebras,"Authors note the following limitations of the model: ""Cerebras-GPT models are trained on the Pile, with English language only, and are not suitable for machine translation tasks. Cerebras-GPT models have not been tuned for human-facing dialog applications like chatbots and will not respond to prompts in a similar way to models that have received instruction tuning or reinforcement learning from human feedback (RLHF) like Flan-T5 or ChatGPT."" [[Uses and Limitations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#out-of-scope-use).
","The Pile dataset has been thoroughly analyzed from various ethical standpoints such as toxicity analysis, gender bias, pejorative content, racially sensitive content etc. Only mitigations in standard Pile dataset pre-processing were employed when pre-training Cerebras-GPT. [[Risk, Bias, Ethical Considerations]](https://github.com/Cerebras/modelzoo/tree/main/modelzoo/transformers/pytorch/gpt3/configs/Cerebras_GPT#risk-bias-ethical-considerations)
",,13B parameters (dense),model,https://www.cerebras.net/blog/cerebras-gpt-a-family-of-open-compute-efficient-large-language-models/,https://huggingface.co/cerebras/Cerebras-GPT-13B,,16x Cerebras CS-2 wafer scale systems,,,,,,,,
open,Evaluated on standard English LLM benchmarks and adapted Arabic LLM benchmarks.,2023-08-30,,"['GPT-3', 'The Pile']",Jais is the world’s most advanced Arabic LLM as of its release.,,,,Jais is released with the aim to stimulate research and development in the Arabic NLP community.,Apache 2.0,text; text,unknown,Jais,unknown,"Inception Institute of Artificial Intelligence, Cerebras, Mohamed bin Zayed University of Artificial Intelligence","Generating or endorsing hate speech, disseminating false information, engaging in illegal activities, managing sensitive data, attempting language generalization beyond Arabic and English, and making critical decisions with high stakes.",,,13B parameters (dense),model,https://inceptioniai.org/jais/docs/Technicalpaper.pdf,https://inceptioniai.org/jais/docs/Technicalpaper.pdf,unknown,Condor Galaxy Supercomputer,unknown,,,,,,,
open,Evaluated on standard English LLM benchmarks and adapted Arabic LLM benchmarks.,2023-08-30,,"['GPT-3', 'The Pile']","Jais Chat is an instruction-tuned version of Jais, optimized for dialog interaction.",,,,Jais Chat is released with the aim to stimulate research and development in the Arabic NLP community.,Apache 2.0,text; text,unknown,Jais Chat,unknown,"Inception Institute of Artificial Intelligence, Cerebras, Mohamed bin Zayed University of Artificial Intelligence","Generating or endorsing hate speech, disseminating false information, engaging in illegal activities, managing sensitive data, attempting language generalization beyond Arabic and English, and making critical decisions with high stakes.",,,13B parameters (dense),model,https://inceptioniai.org/jais/docs/Technicalpaper.pdf,https://inceptioniai.org/jais/docs/Technicalpaper.pdf,unknown,Condor Galaxy Supercomputer from Cerebras,unknown,,,,,,,
open,Evaluated on standard LLM benchmarks in comparison to similar-sized models.,2023-07-24,,['SlimPajama'],Bittensor Language Model is a 3 billion parameter language model with an 8k context length trained on 627B tokens of SlimPajama.,,https://huggingface.co/cerebras/btlm-3b-8k-base/discussions,,,Apache 2.0,text; text,unknown,Bittensor Language Model,USA,Cerebras,,,,3B parameters (dense),model,https://www.cerebras.net/blog/btlm-3b-8k-7b-performance-in-a-3-billion-parameter-model/,https://huggingface.co/cerebras/btlm-3b-8k-base,unknown,unknown,unknown,,,,,,,
open,,2023-06-09,https://huggingface.co/datasets/cerebras/SlimPajama-627B,['RedPajama-Data'],"As of release, SlimPajama is the largest extensively deduplicated, multi-corpora, open-source dataset for training large language models.",,https://huggingface.co/datasets/cerebras/SlimPajama-627B/discussions,,,Apache 2.0,text,unknown,SlimPajama,USA,Cerebras,,,[],627B tokens,dataset,https://huggingface.co/datasets/cerebras/SlimPajama-627B,,,,,,,,,,,
open,,2022-10-31,,"['UniRef50', 'UniRef90']",ESM-2 is a series of protein language models trained on protein sequences,,,,,MIT,text; protein sequence,,ESM-2,USA,Meta,,,,15B parameters (dense),model,https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2.full.pdf+html,,,,,,,,,,,
closed,,2021-12-08,,"['COCO', 'YFCC100M', 'SBU Captions', 'Localized Narratives', 'Visual Genome', 'Wikipedia', 'Conceptual Captions', 'Red Caps']",PMD (Public Multimodal Datasets) is a collection of image-text datasets introduced in the FLAVA work.,YFCC100M is filtered for non-English captions and very short (< 2 word) captions.,,,unknown,unknown,"image, text",,PMD,USA,Meta,unknown,"Beyond filtering mentioned in excluded, nothing further is done.",[],70M,dataset,https://arxiv.org/abs/2112.04482,,,,,,,,,,,
open,"FLAVA is benchmarked on a range of vision-only (e.g. CIFAR-10), language-only (e.g. GLUE), and multimodal (e.g. Hateful Memes) standard evaluations.",2021-12-08,,['PMD'],"FLAVA is a multimodal model composed of an image encoder, text encoder, and multimodal encoder.",,https://huggingface.co/facebook/flava-full/discussions,,"Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full), ""The model is intended to serve as a reproducible research artifact for research communities in the light of models whose exact reproduction details are never released such as CLIP and SimVLM.""
",BSD-3-Clause,"image, text",,FLAVA,USA,Meta,"Per the [[HuggingFace repository]](https://huggingface.co/facebook/flava-full), ""Any deployed use case of the model - whether commercial or not"" - is currently out of scope.
","FLAVA introduces a variety of new modeling techniques, specifically with an interest in improved text-image alignment through contrastive objectives.",,306M,model,https://arxiv.org/abs/2112.04482,https://huggingface.co/facebook/flava-full,unknown,unknown,unknown,,,,,,,
closed,,2022-11-15,,"['CommonCrawl', 'Wikipedia', 'arXiv']",The Galactica Corpus is a collection of scientific datasets introduced in the Galactica work.,,,Prompts and reasoning data is explicitly included to improve model capabilities derived from this data.,unknown,unknown,text,,The Galactica Corpus,USA,Meta,unknown,,[],106B tokens,dataset,https://galactica.org/static/paper.pdf,,,,,,,,,,,
open,,2022-11-15,,['The Galactica Corpus'],Galactica is a family of autoregressive language models.,,,,,CC BY-NC 4.0,"code, text; code, text",,Galactica,USA,Meta,,,,120B parameters (dense),model,https://galactica.org/static/paper.pdf,https://huggingface.co/facebook/galactica-6.7b,unknown,Meta AI Cluster. Trained on 1024 80GB A100 GPUs (128 8xA100 80GB nodes),unknown,,,,,,,
open,,2022-04-12,,[],InCoder is a language model trained on code with a causal masking objective,,,,,CC BY-NC 4.0,text; code,,InCoder,unknown,"Meta, CMU, TTI-Chicago, UC Berkeley, University of Washington",,unknown,,6B parameters (dense),model,https://arxiv.org/abs/2204.05999,,Unknown,"248 V100 GPUs, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf)","24 days, according to [[the paper]](https://arxiv.org/pdf/2204.05999.pdf)",,,,,,,
limited,,2022-05-01,,"['RoBERTa dataset', 'The Pile', 'PushShift.io Reddit']",OPT is a family of autoregressive language models.,,,,,OPT-175B License,text; text,,OPT,USA,Meta,,,,175B parameters (dense),model,https://arxiv.org/abs/2205.01068,https://arxiv.org/pdf/2205.01068.pdf,75 tCO2e,Meta AI cluster. Trained on 992 80GB A100 GPUs,,,,,,,,
limited,,2022-09-29,,"['LAION-5B', 'WebVid-10M', 'HD-VILA-100M']","The Make-A-Video dataset is the dataset used to train Make-A-Video, which includes both image-text and video-only datasets with specific and significant filtering.
","The LAION-5B dataset is filtered to 2.3B by removing NSFW images using [https://github.com/GantMan/nsfw](https://github.com/GantMan/nsfw), toxic words in text, and images with watermark probability > 0.5. The HD-VILA-100M is randomly subsampled to 10M video clips.
",,,unknown,,"image, text, video",unknown,Make-A-Video dataset,USA,Meta,unknown,"The authors exclude NSFW, toxic, and likely watermarked data from LAION-5B.
",[],"20M video clips, 2.3B image-text pairs",dataset,https://arxiv.org/pdf/2209.14792.pdf,,,,,,,,,,,
closed,"Model performance was evaluated using automated (Frechet Video Distance; Frechet Inception Distance) and human evaluation on two datasets (UCF-101, MSR-VTT) in the zero-shot setting.
",2022-09-29,,['Make-A-Video dataset'],"Make-A-Video is a model for Text-to-Video Generation without Text-Video Data.
",,,,unknown,,text; video,unknown,Make-A-Video,USA,Meta,unknown,,,unknown,model,https://arxiv.org/pdf/2209.14792.pdf,,unknown,unknown,unknown,,,,,,,
open,,2023-02-24,,"['CommonCrawl', 'C4', 'Github', 'Wikipedia', 'BooksCorpus', 'arXiv', 'StackExchange']","LLaMA is a collection of foundation language models ranging from 7B to 65B parameters trained our on trillions of tokens. The LLaMA models show that it is possible to train state-of-the-art models using publicly available datasets exclusively, without resorting to proprietary and inaccessible datasets.",,,,,"LLaMa License (model weights), GPLv3 (code)",text; text,,LLaMA,USA,Meta,,,,65B parameters (dense),model,https://arxiv.org/abs/2302.13971,,,,,,,,,,,
open,Evaluated on standard academic benchmarks and internal Meta libraries.,2023-07-18,,[],Llama 2 is an updated version of LLaMA trained on a new mix of publicly available data.,,,,"Llama 2 is intended for commercial and research use in English. Tuned models are intended for assistant-like chat, whereas pretrained models can be adapted for a variety of natural language generation tasks.",custom,text; text,,Llama 2,USA,Meta,Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in languages other than English. Use in any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement for Llama 2.,,,70B parameters (dense),model,https://ai.meta.com/resources/models-and-libraries/llama/,Can be found at appendix of paper at https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/,539 tCO2eq,NVIDIA A100-80GB GPUs (TDP of 350-400W),,,,,,,,
open,,2022-12-22,,"['OPT', 'OPT-IML Bench']",,,,,,OPT-IML 175B License,text; text,,OPT-IML,USA,Meta,,,,175B parameters (dense),model,https://arxiv.org/abs/2212.12017,,,,,,,,,,,
open,,2023-04-05,https://arxiv.org/pdf/2304.02643.pdf#page=25,[],"SA-1B (Segment Anything 1 Billion) is a dataset designed for training general-purpose object segmentation models from open world images. It consists of 11M diverse, high-resolution, privacy protecting images and 1.1B high-quality segmentation masks.
","""We withheld ~2k randomly selected images for testing purposes.""  ""Each image is accompanied by a short caption that describes the content and place of the photo in a free form text. Per our agreement with the photo provider we are not allowed to release these captions.""
",Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/) or by emailing at segment-anything at meta.com.,"SA-1B consists of 11M diverse, high-resolution (averaging 1500×2250 pixels), and privacy protecting images collected and licensed from a third party photo company. The images are photos taken from a camera, i.e. not artwork. The images vary in subject matter. Common themes of the images include: locations, objects, scenes. The dataset includes 1.1B high-quality segmentation masks collected with the Segment Anything Data Engine. SA-1B only includes automatically generated masks (99.1%), as the authors conclude after experiments that the automatic masks are high quality and effective for training models. The masks range from large scale objects such as buildings to fine grained details such as door handles. Masks are provided in the COCO run-length encoding (RLE) annotation format.
",SA-1B is intended to be used for research purposes only. It allows access to a privacy protecting and copyright friendly large-scale image dataset. Researchers can use it to train and evaluate generic object segmentation models.,SA-1B Dataset Research License,image,"The dataset will be hosted at https://ai.facebook.com/datasets/segment-anything and maintained by Meta AI. ""If a user observes objectionable image(s) in the dataset, we invite them to report the image(s) at segment-anything at meta.com for removal"" ""To aid reproducibility of research using SA-1B, the only updates (to the dataset) will be to remove reported images."" ""We encourage users to gather further annotations for SA-1B. Any users who generate annotations will be liable for hosting and distributing their annotations.""
",SA-1B,USA,Meta,"Authors note the following limitations of the dataset:
  The masks are generated by a segmentation model, so there may be errors
or inconsistencies in the masks.
  While no two images are the same, there are instances of images of the same
subject taken close together in time.
  The dataset contains scenes of protests, or other gatherings that may suggest
religious beliefs, political opinions or union memberships that may be offensive.
","- Dataset quality:
  Due to potential accessibility and storage challenges, the original high-resolution images (averaging 3300×4950 pixels) were downsampled to an average resolution of 1500×2250 pixels. Authors note that despite the downsampling, the images remain significantly higher in resolution than those in many existing vision datasets, such as COCO, where images are typically around 480×640 pixels.
  The images were processed to blur faces and license plates to protect the identities of those in the image.
  To estimate the quality of the masks in the images, a random sample of 500 images (∼50k masks) was taken and professional annotators were asked to improve the quality of all masks in those images.
- Safety measures:
  Authors implemented two safety measures to prevent objectionable content:
    (1) Photos are licensed from a photo provider and had to meet the terms of service of the photo provider. Authors requested that all objectionable content be filtered from the images they licensed.
    (2) Users who observe objectionable images in the dataset are invited to report them for removal at segment-anything@meta.com.
  Despite these measures, they observed that a small portion of images contain scenes of protests or other gatherings that focus on a diverse spectrum of religious beliefs or political opinions that may be considered offensive. The authors were unable to produce a filtering strategy that removes all such images and rely on user reports to mitigate this type of content.
",[],"11M images, 1.1B mask annotations",dataset,https://ai.facebook.com/datasets/segment-anything/,,,,,,,,,,,
open,"""We extensively evaluate SAM. First, using a diverse new suite of 23 segmentation datasets, we find that SAM produces high-quality masks from a single foreground point, often only slightly below that of the manually annotated ground truth. Second, we find consistently strong quantitative and qualitative results on a variety of downstream tasks under a zero-shot transfer protocol using prompt engineering, including edge detection, object proposal generation, instance segmentation, and a preliminary exploration of text-to-mask prediction.""
",2023-04-05,,['SA-1B'],"SAM (Segment Anything Model) is a foundation model for image segmentation. The model is designed and trained to be promptable, and supports flexible prompts (point, box, mask and free-form text) to compute masks in real-time to allow interactive use.",,Feedback can be given via the feedback form on their website [segment-anything.com](https://segment-anything.com/) or by emailing at segment-anything at meta.com.,,"""SAM is intended to be used for any prompt-based segmentation task. We explored its use in segmenting objects from a point, edge detection, segmenting all objects, and segmenting detected objects. We explored how SAM can integrate with other vision models to segment objects from text.""
",Apache 2.0,"image, text; image",,SAM,USA,Meta,"For out-of-scope use cases see terms of use in [[LICENSE]](https://github.com/facebookresearch/segment-anything/blob/main/LICENSE). Authors also discuss the following limitations of the model: ""While SAM performs well in general, it is not perfect. It can miss fine structures, hallucinates small disconnected components at times, and does not produce boundaries as crisply as more computationally intensive methods that “zoom-in”, e.g. [18]. In general, we expect dedicated interactive segmentation methods to outperform SAM when many points are provided, e.g. [67]. Unlike these methods, SAM is designed for generality and breadth of use rather than high IoU interactive segmentation. Moreover, SAM can process prompts in real-time, but nevertheless SAM's overall performance is not real-time when using a heavy image encoder. Our foray into the text-to-mask task is exploratory and not entirely robust, although we believe it can be improved with more effort. While SAM can perform many tasks, it is unclear how to design simple prompts that implement semantic and panoptic segmentation. Finally, there are domain-specific tools, such as [7], that we expect to outperform SAM in their respective domains.""
","""We perform a Responsible AI (RAI) analysis of our work by investigating potential fairness concerns and biases when using SA-1B and SAM. We focus on the geographic and income distribution of SA-1B and fairness of SAM across protected attributes of people.""
",,unknown,model,https://arxiv.org/pdf/2304.02643.pdf,https://arxiv.org/pdf/2304.02643.pdf#page=28,2.8 metric tons of carbon dioxide,256 A100 GPUs,68 hours,,,,,,,
closed,"Evaluated on zero-shot text-to-speech benchmarks, with Voicebox outperforming the current state-of-the-art English model VALL-E.",2023-06-16,,[],Voicebox is the first generative AI model for speech to generalize across tasks with state-of-the-art performance.,,,,,,audio; text,,Voicebox,USA,Meta,,,,330M parameters (dense),model,https://research.facebook.com/publications/voicebox-text-guided-multilingual-universal-speech-generation-at-scale/,,unknown,32 GPUs of unspecified type,"750,000 iterations",,,,,,,
open,"PEER is evaluated on core research questions intended to gauge language understanding, proper use of citations, instruction following, and iterative use.",2022-08-24,,[],"PEER is a collaborative language model that is trained to imitate the entire writing process itself. PEER can write drafts, add suggestions, propose edits and provide explanations for its actions.",,,,adapting LLMs to work with collaborative writing and updating.,,text; text,,PEER,USA,Meta,,"Heuristics and edit filtering was used on data set, which consisted mostly of Wikipedia pages.",,3B parameters (dense),model,https://arxiv.org/pdf/2208.11663.pdf,,,64 GPUs,,,,,,,,
open,"MusicGen was evaluated on standard music benchmarks of Frechet Audio Distance, Kullback-Leibler Divergence, and its CLAP score.",2023-08-02,,"['Meta Music Initative Sound Collection', 'Shutterstock music collection', 'Pond5 music collection']",MusicGen is a simple and controllable model for music generation that doesn't require self-supervised semantic representation,,https://huggingface.co/spaces/facebook/MusicGen/discussions,,The primary use of MusicGen is research on AI-based music generation,MIT,"audio, text; audio, text",,MusicGen,USA,Meta,"The model should not be used on downstream applications without further risk evaluation and mitigation. The model should not be used to intentionally create or disseminate music pieces that create hostile or alienating environments for people. This includes generating music that people would foreseeably find disturbing, distressing, or offensive; or content that propagates historical or current stereotypes.",,,3.3B parameters (dense),model,https://huggingface.co/spaces/facebook/MusicGen/tree/main,https://github.com/facebookresearch/audiocraft/blob/main/model_cards/MUSICGEN_MODEL_CARD.md,,,,,,,,,,
open,Evaluated on Frechet Audio Distance and Kullback-Leibler Divergence as well as qualitative studies with human participants.,2023-08-02,,"['AudioSet', 'BBC sound effects', 'AudioCaps', 'Clotho v2', 'VGG-Sound', 'FSD50K', 'Free To Use Sounds', 'Sonniss Game Effects', 'WeSoundEffects', 'Paramount Motion - Odeon Cinematic Sound Effects']",AudioGen is an auto-regressive generative model that generates audio samples conditioned on text inputs,,https://huggingface.co/facebook/audiogen-medium/discussions,,The primary use of AudioGen is research on AI-based audio generation.,MIT,"audio, text; audio, text",,AudioGen,USA,Meta,"The model should not be used on downstream applications without further risk evaluation and mitigation. The model should not be used to intentionally create or disseminate audio pieces that create hostile or alienating environments for people. This includes generating audio that people would foreseeably find disturbing, distressing, or offensive; or content that propagates historical or current stereotypes.",,,1.5B parameters (dense),model,https://felixkreuk.github.io/audiogen/paper.pdf,https://github.com/facebookresearch/audiocraft/blob/main/model_cards/AUDIOGEN_MODEL_CARD.md,,,,,,,,,,
closed,Emu significantly outperforms a publicly available state-of-the-art model SDXLv1.0 on visual appeal when compared on standard benchmarks.,2023-09-27,,"['CLIP', 'T5']",Emu is a pre-trained latent diffusion model on 1.1 billion image-text pairs and fine-tuned with only a few thousand carefully selected high-quality images.,,,,,unknown,text; image,,Emu,USA,Meta,,,,1.5B parameters (dense),model,https://ai.meta.com/research/publications/emu-enhancing-image-generation-models-using-photogenic-needles-in-a-haystack/,,,,,,,,,,,
open,Evaluated on several code benchmarks like HumanEval and MBPP.,2023-08-24,,['Llama 2'],Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters.,,https://huggingface.co/allenai/codetulu-2-13b/discussions,,Code Llama and its variants is intended for commercial and research use in English and relevant programming languages.,Llama 2,"text; code, text",,Code LLaMA,USA,Meta,Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in languages other than English. Use in any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement for Code Llama and its variants.,,,34B parameters (dense),model,https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/,https://huggingface.co/codellama/CodeLlama-34b-hf,65.3 tCO2eq,A100-80GB GPUs,400K GPU hours,,,,,,,
closed,Analyzed against nearest neighbor model baseline and by extending the video length.,2023-11-16,,"['Emu', 'CLIP', 'T5']","Emu Video is a text-to-video generation model that factorizes the generation into two steps, first generating an image conditioned on the text, and then generating a video conditioned on the text and the generated image.",,,,,unknown,text; video,,Emu Video,USA,Meta,,,,6B parameters (dense),model,https://emu-video.metademolab.com/,,unknown,unknown,unknown,,,,,,,
closed,Evaluated on test set of actions in comparison to SoTA image editing models.,2023-11-16,,"['Emu', 'CLIP', 'T5']",Emu Edit is a multi-task image editing model which sets state-of-the-art results in instruction-based image editing.,,,,,unknown,text; image,,Emu Edit,USA,Meta,,,,unknown,model,https://emu-edit.metademolab.com/,,unknown,unknown,unknown,,,,,,,
open,Evaluated in comparison to CLIP.,2023-10-02,,['Common Crawl'],MetaCLIP is a more transparent rendition of CLIP that aims to reveal CLIP's training data curation methods.,,,,,CC-BY-NC-4.0,text; text,,MetaCLIP,USA,Meta,,,,unknown,model,https://arxiv.org/pdf/2103.00020.pdf,https://huggingface.co/facebook/metaclip-b32-400m,unknown,unknown,unknown,,,,,,,
open,"The models were evaluated based on their performance on standard benchmarks and real-world scenarios. These evaluations were performed using a high-quality human evaluation set containing 1,800 prompts covering multiple use cases. The models also went through red-teaming for safety, where human experts and automated methods were used to generate adversarial prompts to test for problematic responses.",2024-04-18,,[],Llama 3 is the third generation of Meta AI's open-source large language model. It comes with pretrained and instruction-fine-tuned language models with 8B and 70B parameters that can support a broad range of use cases.,,"Feedback is encouraged from users to improve the model, but the feedback mechanism is not explicitly described.",,"Llama 3 is intended for a broad range of use cases, including AI assistance, content creation, learning, and analysis.",Llama 3,text; text,Extensive internal and external performance evaluation and red-teaming approach for safety testing.,Llama 3,USA,Meta,unknown,"Extensive internal and external testing for safety, and design of new trust and safety tools.",,70B parameters,model,https://llama.meta.com/llama3/,https://github.com/meta-llama/llama3/blob/main/MODEL_CARD.md,unknown,2 custom-built Meta 24K GPU clusters,unknown,,,,,,,
open,"Evaluated on a comprehensive range of tasks, including visual question answering, image captioning, text generation, image generation, and long-form mixed modal generation. Chameleon demonstrates broad and general capabilities, including state-of-the-art performance in image captioning tasks, outperforms Llama-2 in text-only tasks while being competitive with models such as Mixtral 8x7B and Gemini-Pro.",2024-05-17,,[],Chameleon is a family of early-fusion token-based mixed-modal models capable of understanding and generating images and text in any arbitrary sequence.,,,,,unknown,"image, text; image, text",,Chameleon,USA,Meta FAIR,,,,34B parameters,model,https://arxiv.org/pdf/2405.09818,,unknown,Meta's Research Super Cluster (powered by NVIDIA A100 80GB GPUs),unknown,,,,,,,
open,"The model was evaluated on over 150 benchmark datasets that span a wide range of languages. An experimental evaluation suggests that the model is competitive with leading foundation models across a range of tasks. Also, smaller models of Llama 3.1 405B are competitive with closed and open models that have a similar number of parameters.",2024-07-23,,['Unknown'],"Llama 3.1 405B is the first openly available model that rivals the top AI models when it comes to state-of-the-art capabilities in general knowledge, steerability, math, tool use, and multilingual translation. With the release of the 405B model, the Llama versions support advanced use cases, such as long-form text summarization, multilingual conversational agents, and coding assistants. It is the largest and most capable openly available foundation model.",,Unknown,,"For advanced use cases, such as long-form text summarization, multilingual conversational agents, and coding assistants. May also be useful in the development of custom offerings and systems by developers.",Unknown,text; text,Unknown,Llama 3.1 405B,USA,Meta AI,Unknown,"The development process was focused on keeping the model scalable and straightforward. It adopted an iterative post-training procedure, where each round uses supervised fine-tuning and direct preference optimization. The model also underwent quality assurance and filtering for pre-and post-training data.",,405B parameters (dense),model,https://ai.meta.com/blog/meta-llama-3-1/,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md,Unknown,Over 16 thousand H100 GPUs,Unknown,,,,,,,
open,Unknown,2024-12-06,,[],The Meta Llama 3.3 multilingual large language model (LLM) is an instruction tuned generative model in 70B (text in/text out).,,Instructions on how to provide feedback or comments on the model can be found in the model README.,,Intended for commercial and research use in multiple languages. Instruction tuned text only models are intended for assistant-like chat.,Llama 3.3 Community License Agreement,text; text,Unknown,Llama 3.3,USA,Meta,Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in any other way that is prohibited by the Acceptable Use Policy and Llama 3.3 Community License.,"Used ""supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety.""",,70B parameters,model,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct,https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct,"11,390 tons CO2eq",H100-80GB (TDP of 700W) type hardware,39.3M GPU hours,,,,,,,
open,,2022-11-29,,"['GPT-J', 'P3', 'NaturalInstructions-v2']",,,,,,Apache 2.0,text; text,,GPT-JT,USA,Together,,,,6B parameters (dense),model,https://www.together.xyz/blog/releasing-v1-of-gpt-jt-powered-by-open-source-ai,,,,,,,,,,,
open,,2023-03-10,,"['GPT-NeoX', 'OIG-43M']",,,,,,Apache 2.0,text; text,,GPT-NeoXT-Chat-Base,USA,Together,,,,20B parameters (dense),model,https://www.together.xyz/blog/openchatkit,,,,,,,,,,,
open,,2023-03-10,,"['GPT-JT', 'OIG-moderation']",,,,,,Apache 2.0,text; text,,OpenChatKit moderation model,USA,Together,,,,6B parameters (dense),model,https://www.together.xyz/blog/openchatkit,,,,,,,,,,,
open,,2023-03-10,,"['P3', 'NaturalInstructions-v2', 'FLAN dataset']",,,,,,Apache 2.0,text,,OIG-43M,International,"Together, LAION, Ontocord",,,[],43M instructions,dataset,https://laion.ai/blog/oig-dataset/,,,,,,,,,,,
open,,2023-03-10,,[],,,,,,Apache 2.0,text,,OIG-moderation,International,"Together, LAION, Ontocord",,,[],unknown,dataset,https://laion.ai/blog/oig-dataset/,,,,,,,,,,,
open,,2022-04-17,https://huggingface.co/datasets/togethercomputer/RedPajama-Data-1T,"['GitHub', 'Wikipedia']",The RedPajama base dataset is a 1.2 trillion token fully-open dataset created by following the recipe described in the LLaMA paper,,,,,Apache 2.0,text,,RedPajama-Data,USA,Together,,,[],1.2 trillion tokens,dataset,https://www.together.xyz/blog/redpajama,,,,,,,,,,,
open,"Model evaluated over AlpacaEval, Rouge score over BookSum, and accuracy over MQA.",2023-08-18,,"['BookSum dataset', 'MQA dataset', 'Together API', 'LLaMA 2']","Llama-2-7B-32K-Instruct is an open-source, long-context chat model finetuned from Llama-2-7B-32K, over high-quality instruction and chat data.",,https://huggingface.co/togethercomputer/Llama-2-7B-32K-Instruct/discussions,,,LLaMA 2,text; text,,Llama-2-7B-32K-Instruct,USA,Together,,,,7B parameters (dense),model,https://together.ai/blog/llama-2-7b-32k-instruct,https://huggingface.co/togethercomputer/Llama-2-7B-32K-Instruct,,,,,,,,,,
open,,2023-10-30,,['Common Crawl'],"RedPajama-Data-v2 is a new version of the RedPajama dataset, with 30 trillion filtered and deduplicated tokens (100+ trillions raw) from 84 CommonCrawl dumps covering 5 languages, along with 40+ pre-computed data quality annotations that can be used for further filtering and weighting.",,Feedback can be sent to Together via https://www.together.ai/contact,"documents in English, German, French, Spanish, and Italian.","To be used as the start of a larger, community-driven development of large-scale datasets for LLMs.",Apache 2.0,text,,RedPajama-Data-v2,USA,Together,,tokens filtered and deduplicated,[],30 trillion tokens,dataset,https://together.ai/blog/redpajama-data-v2,,,,,,,,,,,
open,Model evaluated on a suite of short-context task benchmarks.,2023-12-08,,"['Hyena', 'RedPajama-Data']","StripedHyena is an LLM and the first alternative model competitive with the best open-source Transformers in short and long-context evaluations, according to Together.",,https://huggingface.co/togethercomputer/StripedHyena-Hessian-7B/discussions,,,Apache 2.0,text; text,,StripedHyena,USA,Together,,,,7B parameters (dense),model,https://www.together.ai/blog/stripedhyena-7b,https://huggingface.co/togethercomputer/StripedHyena-Hessian-7B,unknown,unknown,unknown,,,,,,,
open,Model evaluated on a suite of short-context task benchmarks.,2023-12-08,,"['Hyena', 'RedPajama-Data']","StripedHyena Nous is an LLM and chatbot, along with the first alternative model competitive with the best open-source Transformers in short and long-context evaluations, according to Together.",,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B/discussions,,,Apache 2.0,text; text,,StripedHyena Nous,USA,Together,,,,7B parameters (dense),model,https://www.together.ai/blog/stripedhyena-7b,https://huggingface.co/togethercomputer/StripedHyena-Nous-7B,unknown,unknown,unknown,,,,,,,
open,"The model was evaluated using five popular vision-language benchmarks that require strong commonsense reasoning and detailed image understanding, AI2D, ScienceQA, MMMU, MMVet, and POPE. It demonstrated competitive performance in these evaluations compared to other vision-language models.",2024-06-06,,['LLaMA'],"A large vision-language model with multi-resolution zoom that enhances fine-grained visual understanding and reasoning about image regions. The Dragonfly model comes in two variants, the general-domain model (""Llama-3-8b-Dragonfly-v1"") trained on 5.5 million image-instruction pairs, and the biomedical variant (""Llama-3-8b-Dragonfly-Med-v1"") fine-tuned on an additional 1.4 million biomedical image-instruction pairs. Dragonfly demonstrates promising performance on vision-language benchmarks like commonsense visual QA and image captioning.",,Unknown,,"Dragonfly is designed for image-text tasks, including commonsense visual question answering and image captioning. It is further focused on tasks that require fine-grained understanding of high-resolution image regions, such as in medical imaging.",unknown,"image, text; text",Unknown,Dragonfly,USA,Together,Unknown,The model employs two key strategies (multi-resolution visual encoding and zoom-in patch selection) that enable it to efficiently focus on fine-grained details in image regions and provide better commonsense reasoning. Its performance was evaluated on several benchmark tasks for quality assurance.,,8B parameters,model,https://www.together.ai/blog/dragonfly-v1,unknown,unknown,unknown,unknown,,,,,,,
open,Mistral model fine-tuned on AutoMathText and evaluated on the MATH dataset.,2024-02-12,https://huggingface.co/datasets/math-ai/AutoMathText,"['OpenWebMath', 'RedPajama-Data', 'Algebraic Stack', 'Qwen']",AutoMathText is an extensive and carefully curated dataset encompassing around 200 GB of mathematical texts.,,https://huggingface.co/datasets/math-ai/AutoMathText/discussions,,,CC BY-SA 4.0,text,unknown,AutoMathText,USA,Math AI,,,[],200 GB,dataset,https://github.com/yifanzhang-pro/AutoMathText,,,,,,,,,,,
open,Evaluated on TruthfulQA as main evaluation benchmark.,2023-11-27,,[],Meditron is a large-scale medical LLM that remains open-source.,,https://huggingface.co/epfl-llm/meditron-7b/discussions,,"Medical exam question answering, supporting differential diagnosis, disease information.",LLaMA 2,text; text,,MediTron,unknown,"EPFL, Idiap Research Institute, OpenAssistant, Yale",,,,70B parameters (dense),model,https://arxiv.org/pdf/2311.16079.pdf,https://huggingface.co/epfl-llm/meditron-70b,unknown,unknown,unknown,,,,,,,
open,unknown,2024-01-16,,"['Qwen', 'OpenOrca']",MoMo is a large language model fine-tuned from Qwen.,,https://huggingface.co/moreh/MoMo-72B-lora-1.8.7-DPO/discussions,,,MIT,text; text,unknown,MoMo,USA,Moreh,,unknown,,72B parameters (dense),model,https://huggingface.co/moreh/MoMo-72B-lora-1.8.7-DPO,https://huggingface.co/moreh/MoMo-72B-lora-1.8.7-DPO,unknown,AMD’s MI250 GPU,unknown,,,,,,,
open,,2024-10-11,,"[""DeepMind's Distributed Low-Communication (DiLoCo)""]","INTELLECT-1 is a decentralized training model with 10 billion parameters, designed to be open-source and transparent, allowing global contributors to participate in its computation.",,unknown,,Training globally distributed AI models in decentralized environments.,Apache 2.0,text; text,unknown,INTELLECT-1,USA,PrimeIntellect,,,,10B parameters,model,https://www.primeintellect.ai/blog/intellect-1,unknown,unknown,unknown,unknown,,,,,,,
limited,Compared to DALL·E 3 based on a qualitative user comparison.,2024-02-28,,[],"Ideogram 1.0 is Ideogram’s most advanced text-to-image model, as of release.",,,,,unknown,text; image,,Ideogram 1.0,USA,Ideogram AI,,,,unknown,model,https://about.ideogram.ai/1.0,,unknown,unknown,unknown,,,,,,,
open,Models of size 150k parameters trained on ToyMix and compared to models trained on its dependencies across GNN baselines.,2023-10-09,,"['QM9', 'TOX21', 'ZINC12K']",ToyMix is the smallest dataset of three extensive and meticulously curated multi-label datasets that cover nearly 100 million molecules and over 3000 sparsely defined tasks.,,,,"The datasets are intended to be used in an academic setting for training molecular GNNs with orders of magnitude more parameters than current large models. Further, the ToyMix dataset is intended to be used in a multi-task setting, meaning that a single model should be trained to predict them simultaneously.",CC BY-NC-SA 4.0,"molecules, tasks",,ToyMix,Canada,Mila-Quebec AI Institute,,,[],13B labels of quantum and biological nature.,dataset,https://arxiv.org/pdf/2310.04292.pdf,,,,,,,,,,,
open,Models of size between 4M and 6M parameters trained for 200 epochs on LargeMix and compared to models trained on its dependencies across GNN baselines.,2023-10-09,,"['L1000 VCAP', 'L1000 MCF7', 'PCBA1328', 'PCQM4M_G25_N4']",LargeMix is the middle-sized dataset of three extensive and meticulously curated multi-label datasets that cover nearly 100 million molecules and over 3000 sparsely defined tasks.,,,,"The datasets are intended to be used in an academic setting for training molecular GNNs with orders of magnitude more parameters than current large models. Further, the LargeMix dataset is intended to be used in a multi-task setting, meaning that a single model should be trained to predict them simultaneously.",CC BY-NC-SA 4.0,"molecules, tasks",,LargeMix,Canada,Mila-Quebec AI Institute,,,[],13B labels of quantum and biological nature.,dataset,https://arxiv.org/pdf/2310.04292.pdf,,,,,,,,,,,
open,Models of size between 4M and 6M parameters trained for 50 epochs on UltraLarge and compared to models trained on its dependencies across GNN baselines.,2023-10-09,,['PM6_83M'],UltraLarge is the largest dataset of three extensive and meticulously curated multi-label datasets that cover nearly 100 million molecules and over 3000 sparsely defined tasks.,,,,The datasets are intended to be used in an academic setting for training molecular GNNs with orders of magnitude more parameters than current large models.,CC BY-NC-SA 4.0,"molecules, tasks",,UltraLarge,Canada,Mila-Quebec AI Institute,,,[],13B labels of quantum and biological nature.,dataset,https://arxiv.org/pdf/2310.04292.pdf,,,,,,,,,,,
open,Evaluated on previously unseen time series datasets.,2024-02-08,,[],Lag-LLaMA is a general-purpose foundation model for univariate probabilistic time series forecasting based on a decoder-only transformer architecture that uses lags as covariates.,,https://huggingface.co/time-series-foundation-models/Lag-Llama/discussions,,,Apache 2.0,text; text,unknown,Lag-LLaMA,unknown,"Morgan Stanley, ServiceNow Research, University of Montreal, Mila-Quebec AI Institute",,,,unknown,model,https://time-series-foundation-models.github.io/lag-llama.pdf,https://huggingface.co/time-series-foundation-models/Lag-Llama,unknown,A single NVIDIA Tesla-P100 GPU,unknown,,,,,,,
open,Evaluated on standard benchmarks in comparison to other German language models.,2023-11-28,,"['OpenHermes 2.5 Mistral', 'OpenOrca Mistral']",SauerkrautLM is a German language model merged from two Mistral derivatives.,,https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO/discussions,,,Apache 2.0,text; text,unknown,SauerkrautLM,USA,VAGO Solutions,,,,7B parameters (dense),model,https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO,https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO,unknown,unknown,unknown,,,,,,,
limited,,2023-02-03,,"['ChatGPT API', 'GPT-4 API', 'Claude API', 'Dragonfly API', 'Sage API']","Poe lets people ask questions, get instant answers, and have back-and-forth conversations with several AI-powered bots. It is initially available on iOS, but we will be adding support for all major platforms in the next few months, along with more bots.",,,,,,,,Poe,USA,Quora,,,,,application,https://quorablog.quora.com/Poe-1,,,,,,,,,https://poe.com/tos,,
open,Evaluated on common LLM benchmarks.,2023-09-29,,['LLaMA'],GreenBit LLaMA is a series of fine-tuned LLaMA models.,,https://huggingface.co/GreenBitAI/LLaMA-30B-2bit-groupsize8/discussions,,,Apache 2.0,text; text,unknown,GreenBit LLaMA,USA,GreenBit AI,,,,30B parameters (dense),model,https://github.com/GreenBitAI/low_bit_llama,https://huggingface.co/GreenBitAI/LLaMA-30B-2bit-groupsize8,unknown,unknown,unknown,,,,,,,
open,"Achieved competitive performance on relevant benchmarks against other 7B models in Chinese, English, and coding tasks.",2024-05-23,,[],360 Zhinao is a multilingual LLM in Chinese and English with chat capabilities.,,,,,unknown,text; text,,360 Zhinao,China,360 Security,,,,7B parameters,model,https://arxiv.org/pdf/2405.13386,,unknown,unknwon,unknown,,,,,,,
open,,2023-11-14,,"['Dolphin', 'Yi']",Dolphin 2.2 Yi is an LLM based off Yi.,,https://huggingface.co/cognitivecomputations/dolphin-2_2-yi-34b/discussions,,,custom,text; text,unknown,Dolphin 2.2 Yi,USA,Cognitive Computations,,,,34B parameters (dense),model,https://erichartford.com/dolphin,https://huggingface.co/cognitivecomputations/dolphin-2_2-yi-34b,unknown,4 A100 GPUs,3 days,,,,,,,
open,Evaluated on OpenLLM leaderboard.,2023-06-01,,['WizardLM'],WizardLM Uncensored is WizardLM trained with a subset of the dataset - responses that contained alignment / moralizing were removed.,,https://huggingface.co/cognitivecomputations/WizardLM-30B-Uncensored/discussions,,,unknown,text; text,unknown,WizardLM Uncensored,USA,Cognitive Computations,,,,30B parameters (dense),model,https://huggingface.co/cognitivecomputations/WizardLM-30B-Uncensored,https://huggingface.co/cognitivecomputations/WizardLM-30B-Uncensored,unknown,unknown,unknown,,,,,,,
open,"Performance assessed on BIG-bench arithmetic sub-task, and various elementary arithmetic tasks.",2023-05-23,,"['LLaMA', 'GOAT dataset']",GOAT is a fine-tuned LLaMA model which uses the tokenization of numbers to significantly outperform benchmark standards on a range of arithmetic tasks.,,,,Integration into other instruction-tuned LLMs to further enhance arithmetic reasoning abilities in solving math word problems.,Apache 2.0,text; text,,GOAT,Singapore,National University of Singapore,,Number data is randomly generated from log space to reduce likelihood of redundancy and range of magnitudes.,,7B parameters (dense),model,https://arxiv.org/pdf/2305.14201.pdf,,unknown,24 GB VRAM GPU,unknown,,,,,,,
open,Evaluated on relatively simple established benchmarks.,2024-01-12,,"['RedPajama', 'The Stack']",OpenMoE is a series of fully open-sourced and reproducible decoder-only MoE LLMs.,,https://huggingface.co/OrionZheng/openmoe-base/discussions,,,Apache 2.0,text; text,unknown,OpenMoE,unknown,"National University of Singapore, University of Edinburgh, ETH Zurich",,unknown,,34B parameters (dense),model,https://github.com/XueFuzhao/OpenMoE,https://huggingface.co/OrionZheng/openmoe-base,unknown,unknown,unknown,,,,,,,
limited,,2023-02-22,,['Anthropic API'],"Notion AI is a connected assistant that helps you think bigger, work faster, and augments your creativity, right inside the functional workspace you’re already familiar with.",,,,,,,,Notion AI,USA,Notion,,,,,application,https://www.notion.so/help/guides/notion-ai-for-docs,,,,,,,,,,,
open,"RakutenAI achieves the highest average score in both Japanese and English LM-Harness metrics, outperforming other similarly-sized Japanese language models.",2024-03-21,,['Mistral'],RakutenAI-7B is a model developed with a focus on Japanese language understanding. It offers competitive performance on English tests as well.,,https://huggingface.co/Rakuten/RakutenAI-7B/discussions,,The model can be used for text generation tasks in both Japanese and English.,Apache 2.0,text; text,unknown,RakutenAI,Japan,Rakuten,unknown,unknown,,7B parameters,model,https://global.rakuten.com/corp/news/press/2024/0321_01.html,https://huggingface.co/Rakuten/RakutenAI-7B,unknown,unknown,unknown,,,,,,,
open,,2022-12-06,,"['Kinetics-400', 'WebVid-2M', 'WebVid-10M', 'HowTo100M', 'AVA', 'Something-Something-v2', 'Kinetics-710']",,,,,,Apache 2.0,"text, video; video",,InternVideo,China,Shanghai AI Laboratory,,,,1.3B parameters (dense),model,https://arxiv.org/pdf/2212.03191.pdf,,,,,,,,,,,
open,Evaluated based on own constructed dataset covering 433 languages.,2023-05-29,,['OPUS'],Lego-MT is a multilingual large language model which uses a more efficient approach of being an effective detachable model.,,,,,,text; text,,Lego-MT,China,Shanghai AI Laboratory,,,,1.2B parameters (dense),model,https://arxiv.org/pdf/2212.10551.pdf,,unknown,32 A100 GPUs,15 days,,,,,,,
open,Evaluated on GSM8K and the competition-level MATH dataset.,2023-10-05,,"['GPT-4', 'LLaMA 2']",MathCoder is a family of models capable of generating code-based solutions for solving challenging math problems.,,,,bridging the gap between natural language understanding and computational problem-solving,unknown,text; text,,MathCoder,China,Shanghai AI Laboratory,,,,70B parameters (dense),model,https://arxiv.org/pdf/2310.03731.pdf,,unknown,32 NVIDIA A800 80GB GPUs,unknown,,,,,,,
open,Evaluated in comparison to LLaMA series models on standard benchmarks.,2023-09-20,,[],"InternLM is a high-quality language model proficient in English, Chinese, and code.",,https://huggingface.co/internlm/internlm-20b/discussions,,,Apache 2.0,"code, text; code, text",,InternLM,China,Shanghai AI Laboratory,,,,20B parameters (dense),model,https://github.com/InternLM/InternLM,https://huggingface.co/internlm/internlm-20b,unknown,unknown,unknown,,,,,,,
open,"Evaluated across a range of video-related tasks and compared to other multimodal models like CLIP, VideoPrism, and VideoCoCa. InternVideo 2 generally performs among the best of such models on these benchmarks.",2024-03-22,,"['InternVL', 'VideoMAEv2', 'LAION', 'WebVid', 'InternVid', 'LLaVA', 'KMash']","InternVideo2 is a new video foundation model (ViFM) that achieves the state-of-the-art performance in action recognition, video-text tasks, and video-centric dialogue.",,,,,MIT,"text, video; text",unknown,InternVideo2,China,"Shanghai AI Laboratory, Nanjing University, Zhejiang University",,,,6B parameters,model,https://github.com/OpenGVLab/InternVideo2,,unknown,"256 NVIDIA A100 GPUs for 32 days, and 64 GPUs for 3 days",35 days,,,,,,,
open,The model was compared with SOTAs and has shown good performance in generating high-quality human images.,2024-04-01,,['CosmicMan-HQ 1.0'],"CosmicMan is a text-to-image foundation model specialized for generating high-fidelity human images with meticulous appearance, reasonable structure, and precise text-image alignment.",,unknown,,"The model is intended to generate high-quality, photorealistic human images from text descriptions. Applications include avatar generation and potentially virtual reality and video game character creation.",unknown,text; image,unknown,CosmicMan,China,Shanghai AI Laboratory,unknown,The quality control measures taken include modeling the relationship between dense text descriptions and image pixels in a decomposed manner and enforcing attention refocusing without adding extra modules.,,unknown,model,https://cosmicman-cvpr2024.github.io/,,unknown,32 80G NVIDIA A100 GPUs,1 week,,,,,,,
open,"Compared to other human image datasets on data quantity, image quality, and annotations.",2024-04-28,,[],"CosmicMan-HQ 1.0 is a large-scale dataset with 6 million high-quality, real-world human images.",,,,,unknown,image,unknown,CosmicMan-HQ 1.0,China,Shanghai AI Laboratory,,unknown,[],6 million images,dataset,https://arxiv.org/pdf/2404.01294,,,,,,,,,,,
open,Compared models trained on OpenWebMath for 1 epoch to models trained on The Pile and ProofPile on mathematics benchmarks.,2023-10-10,Can be found at section E of https://arxiv.org/pdf/2310.06786.pdf,['Common Crawl'],"OpenWebMath is an open dataset containing 14.7B tokens of mathematical webpages from Common Crawl, inspired by Minerva.",,https://huggingface.co/datasets/open-web-math/open-web-math/discussions,,"Language model pretraining, finetuning, and evaluation.",ODC-By 1.0,"text, mathematical tokens",,OpenWebMath,Canada,University of Toronto,Any tasks which may considered irresponsible or harmful.,"Documents are filtered, processed for mathematical value, deduplicated, and then the largest documents are manually inspected for quality.",[],14.7B documents,dataset,https://arxiv.org/pdf/2310.06786.pdf,,,,,,,,,,,
open,Grok-1 was evaluated on a range of reasoning benchmark tasks and on curated foreign mathematic examination questions.,2023-11-04,,[],"Grok is an AI modeled after the Hitchhiker’s Guide to the Galaxy,",,,,"Grok-1 is intended to be used as the engine behind Grok for natural language processing tasks including question answering, information retrieval, creative writing and coding assistance.",Apache 2.0,text; text,unknown,Grok-1,USA,xAI,,,,314B parameters (dense),model,https://grok.x.ai/,https://x.ai/model-card/,unknown,unknown,unknown,,,,,,,
limited,"The model is evaluated in a zero-shot setting without chain-of-thought prompting. The evaluation domains include multi-disciplinary reasoning, understanding documents, science diagrams, charts, screenshots, photographs and real-world spatial understanding. The model shows competitive performance with existing frontier multimodal models.",2024-04-12,,[],"Grok-1.5V is a first-generation multimodal model which can process a wide variety of visual information, including documents, diagrams, charts, screenshots, and photographs.",,,,"Grok-1.5V can be used for understanding documents, science diagrams, charts, screenshots, photographs. It can also translate diagrams into Python code.",unknown,"image, text; text",unknown,Grok-1.5V,USA,xAI,unknown,,,unknown,model,https://x.ai/blog/grok-1.5v,,unknown,unknown,unknown,,,,,,,
limited,"The Grok-2 models were evaluated across a series of academic benchmarks that included reasoning, reading comprehension, math, science, and coding. They showed significant improvements over the earlier model Grok-1.5 and achieved performance levels competitive to other frontier models in areas such as graduate-level science knowledge (GPQA), general knowledge (MMLU, MMLU-Pro), and math competition problems (MATH).",2024-08-13,,[],"Grok-2 is a state-of-the-art language model with advanced capabilities in both text and vision understanding. It demonstrates significant improvements in reasoning with retrieved content and tool use capabilities over its previous Grok-1.5 model. It also excels in vision-based tasks and delivers high performance in document-based question answering and visual math reasoning (MathVista). Grok-2 mini, a smaller version of Grok-2, is also introduced, offering a balance between speed and answer quality.",,Issues with the model should be reported to xAI.,,"The model is intended to be used for understanding text and vision, answering questions, collaborating on writing, solving coding tasks, and enhancing search capabilities.",Unknown,"text; text, vision",Unknown,Grok-2,USA,xAI,Unknown,Grok-2 models were tested in real-world scenarios using AI tutors that engaged with the models across a variety of tasks and selected the superior response based on specific criteria outlined in the guidelines.,,unknown,model,https://x.ai/blog/grok-2,unknown,Unknown,Unknown,Unknown,,,,,,,
open,"Analyzed on breakdown of types of robot trajectory in dataset, and overall coverage.",2023-10-03,All data can be found at https://robotics-transformer-x.github.io/.,[],"The Open X-Embodiment dataset is a dataset of robot movements assembled from 22 different robots collected through a collaboration between 21 institutions, demonstrating 527 skills (160266 tasks)",N/A,,N/A,Further research on X-embodiment models.,Apache 2.0,robot trajectories,unknown,Open X-Embodiment dataset,International,Open X-Embodiment,,unknown,[],160K tasks,dataset,https://robotics-transformer-x.github.io/,,,,,,,,,,,
open,"Evaluated on in-distribution robotics skills, and outperforms its predecessor RT-1 by 50% in emergent skill evaluations.",2023-10-03,,"['Open X-Embodiment dataset', 'ImageNet EfficientNet', 'USE']","RT-1-X is a model trained on the Open X-Embodiment dataset that exhibits better generalization and new capabilities compared to its predecessor RT-1, an efficient Transformer-based architecture designed for robotic control.",,,,Further research on X-embodiment models.,Apache 2.0,"images, text; robot trajectories",unknown,RT-1-X,unknown,"Open X-Embodiment, Google Deepmind",,unknown,,35M parameters (dense),model,https://robotics-transformer-x.github.io/,,unknown,unknown,unknown,,,,,,,
closed,"Evaluated on in-distribution robotics skills, and outperforms its predecessor RT-2 by 3x in emergent skill evaluations.",2023-10-03,,"['Open X-Embodiment dataset', 'ViT (unknown size)', 'UL2']","RT-2-X is a model trained on the Open X-Embodiment dataset that exhibits better generalization and new capabilities compared to its predecessor RT-2, a large vision-language model co-fine-tuned to output robot actions as natural language tokens.",,,,Further research on X-embodiment models.,unknown,"images, text, robot trajectories; robot trajectories",unknown,RT-2-X,unknown,"Open X-Embodiment, Google Deepmind",,unknown,,55B parameters (dense),model,https://robotics-transformer-x.github.io/,,unknown,unknown,unknown,,,,,,,
limited,"GPT-4o underwent evaluations that included the Preparedness Framework, external red teaming, and third-party assessments to ensure safe and aligned deployment. The evaluations focused on identifying and mitigating potential risks across its capabilities, especially speech-to-speech functionality.",2024-08-08,,['Shutterstock'],"GPT-4o is an autoregressive omni model that accepts a combination of text, audio, image, and video as input and produces any combination of text, audio, and image outputs. It is trained end-to-end across text, vision, and audio, focusing on multimodal capabilities.",,unknown,,"Use in multimodal applications requiring understanding and generation of combinations of text, audio, and image outputs, better performance on non-English languages, and enhanced vision and audio understanding.",unknown,"text, audio, image, video; text, audio, image","Continuous monitoring and enforcement, providing moderation tools and transparency reports, and gathering feedback from users.",GPT-4o,USA,OpenAI,"Uses that could involve bias, discrimination, harmful content, or violation of usage policies.","Quality and safety measures included prior risk assessments, post-training mitigation, moderation tools, advanced data filtering, and external red teaming efforts with experts to evaluate potential risks like bias, discrimination, and information harms.",,unknown,model,https://arxiv.org/pdf/2410.21276,unknown,unknown,unknown,unknown,,,,,,,
limited,"Evaluated on challenging benchmark tasks in physics, chemistry, and biology. In a qualifying exam for the International Mathematics Olympiad (IMO), GPT-4o correctly solved only 13% of problems, while the reasoning model o1 scored 83%.",2024-09-12,,[],"OpenAI o1 is a new series of AI models designed to spend more time thinking before they respond. They can reason through complex tasks and solve harder problems than previous models in science, coding, and math.",,unknown,,"These enhanced reasoning capabilities may be particularly useful if you’re tackling complex problems in science, coding, math, and similar fields. For example, o1 can be used by healthcare researchers to annotate cell sequencing data, by physicists to generate complicated mathematical formulas needed for quantum optics, and by developers in all fields to build and execute multi-step workflows.",unknown,text; text,,o1,USA,OpenAI,,"To match the new capabilities of these models, OpenAI has bolstered safety work, internal governance, and federal government collaboration. This includes rigorous testing and evaluations using their Preparedness Framework⁠(opens in a new window), best-in-class red teaming, and board-level review processes, including by OpenAI's Safety & Security Committee.",,unknown,model,https://openai.com/o1/,unknown,unknown,unknown,unknown,,,,,,,
limited,Makes significance process on the ARC-AGI evaluation framework compared to all existing models.,2024-10-20,,[],"OpenAI o1 is, as of release, the latest model in OpenAI's o-model reasoning series.",,unknown,,,unknown,text; text,,o3,USA,OpenAI,,,,unknown,model,https://x.com/OpenAI/status/1870186518230511844,unknown,unknown,unknown,unknown,,,,,,,
open,"Evaluated on the OpenLLM leaderboard, performing on par with similar-sized models.",2023-10-05,,['RefinedWeb'],Nucleus is a 22B parameters causal decoder-only model built by Nucleus.AI and trained on 500B tokens of RefinedWeb along with curated corpora.,,https://huggingface.co/NucleusAI/nucleus-22B-token-500B/discussions,,"Research on large language models; as a foundation for further specialization and finetuning for specific usecases (e.g., summarization, text generation, chatbot, etc.)",MIT,text; text,unknown,Nucleus,USA,Nucleus.AI,Production use without adequate assessment of risks and mitigation; any use cases which may be considered irresponsible or harmful.,,,22B parameters (dense),model,https://www.withnucleus.ai/,https://huggingface.co/NucleusAI/nucleus-22B-token-500B,unknown,unknown,2 weeks,,,,,,,
limited,,,,['Anthropic API'],"Robin AI uses Claude and Anthropic's models to understand language - including in technical domains like legal language. It's also very confident at drafting, summarising, translations, and explaining complex concepts in simple terms",,,,,,,,Robin AI,USA,Robin AI,,,,,application,https://www.robinai.co.uk/,,,,,,,,,https://www.robinai.co.uk/terms,,
open,Evaluated on researcher experiments to test deeper understanding and advanced commonsense reasoning,2023-05-05,,"['MIMIC-IT', 'OpenFlamingo']","Otter is a multi-modal model based on OpenFlamingo (open-sourced version of DeepMind’s Flamingo), trained on MIMIC-IT and showcasing improved instruction-following ability and in-context learning.",,,,Following and executing new instructions with few in-context learning examples given image and textual input.,MIT,"image, text; text",,Otter,Singapore,Nanyang Technological University,,,,1.3B parameters (dense),model,https://arxiv.org/pdf/2305.03726v1.pdf,https://github.com/Luodian/Otter/blob/main/docs/model_card.md,,4 RTX-3090 GPUs,,,,,,,,
limited,,,,['OpenAI API'],"HyperWrite is a writing assistant that generates text based on a user's request, as well as style and tone choices.
",,unknown,,"HyperWrite is intended to be used as a writing assistant.
",custom,,unknown,HyperWrite,USA,OthersideAI,unknown,unknown,,,application,https://hyperwriteai.com/,,,,,unknown,unknown,unknown,Generation,https://hyperwriteai.com/terms,unknown,
open,"""We empirically validated the quality of COYO dataset by re-implementing popular models such as ALIGN, unCLIP, and ViT. We trained these models on COYO-700M or its subsets from scratch, achieving competitive performance to the reported numbers or generated samples in the original papers.""
",2022-08-31,,['CommonCrawl'],"COYO-700M is a large-scale dataset that contains 747M image-text pairs as well as many other meta-attributes to increase the usability to train various models.
","Removed images less than 5KB image size. Removed images with an aspect ratio greater than 3.0. Removed images with min(width, height) < 200. Removed images with a score of OpenNSFW2 or GantMan/NSFW higher than 0.5. Removed all duplicate images based on the image pHash value from external public datasets. Removed texts with a length of 5 or less. Removed texts that do not have a noun form. Removed texts with less than 3 words or more than 256 words and texts over 1000 in length. Removed texts appearing more than 10 times. Removed texts containing NSFW words. Removed duplicated samples based on (image_phash, text).",Feedback can be given by emailing at coyo at kakaobrain.com,Included all image formats that Pillow library can decode. Collected only English text using cld3.,"""It is strongly recommended that this dataset be used only for research, keeping this in mind when using the dataset, and Kakao Brain does not recommend using this dataset as it is without special processing to clear inappropriate data to create commercial products.""
",CC-BY-4.0,"image, text",,COYO-700M,South Korea,Kakao Brain,"Authors note the following limitations: ""Kakao Brain tried to construct a ""Safe"" dataset when building the COYO dataset. However, despite these efforts, this large-scale dataset was not hand-picked by humans to avoid the risk due to its very large size (over 700M). Keep in mind that the unscreened nature of the dataset means that the collected images can lead to strongly discomforting and disturbing content for humans. The COYO dataset may contain some inappropriate data, and any problems resulting from such data are the full responsibility of the user who used it.""
",,[],747M image-text pairs,dataset,https://github.com/kakaobrain/coyo-dataset,,,,,,,,,,,
open,,2023-08-23,,['LLaMA 2'],OpenAssistant LLaMA 2 is an Open-Assistant fine-tuning of Meta's LLaMA 2.,,https://huggingface.co/OpenAssistant/llama2-70b-oasst-sft-v10/discussions,,,LLaMA 2,text; text,unknown,OpenAssistant LLaMA 2,International,OpenAssistant,,,,70B parameters (dense),model,https://huggingface.co/OpenAssistant/llama2-70b-oasst-sft-v10,https://huggingface.co/OpenAssistant/llama2-70b-oasst-sft-v10,unknown,unknown,unknown,,,,,,,
open,"The models’ performance on downstream tasks was evaluated by linear probing. The models follow a similar saturating log-log scaling law to textual models, their performance improves with the increase in model size up to the saturation point of parameters.",2024-09-08,,['DESI Legacy Survey DR8'],"AstroPT is an autoregressive pretrained transformer developed with astronomical use-cases in mind. The models have been pretrained on 8.6 million 512x512 pixel grz-band galaxy postage stamp observations from the DESI Legacy Survey DR8. They have created a range of models with varying complexity, ranging from 1 million to 2.1 billion parameters.",,Any problem with the model can be reported to Michael J. Smith at mike@mjjsmith.com.,,"The models are intended for astronomical use-cases, particularly in handling and interpreting large observation data from astronomical sources.",MIT,image; image,Unknown,AstroPT,unknown,"Aspia Space, Instituto de Astrofísica de Canarias (IAC), UniverseTBD, Astrophysics Research Institute, Liverpool John Moores University, Departamento Astrofísica, Universidad de la Laguna, Observatoire de Paris, LERMA, PSL University, and Universit´e Paris-Cit´e.",Unknown,The models’ performances were evaluated on downstream tasks as measured by linear probing.,,2.1B parameters,model,https://arxiv.org/pdf/2405.14930v1,unknown,Unknown,Unknown,Unknown,,,,,,,
open,"The FuseChat model was evaluated on MT-Bench which comprises 80 multi-turn dialogues spanning writing, roleplay, reasoning, math, coding, stem, and humanities domains. It yields an average performance of 66.52 with specific scores for individual domains available in the leaderboard results.",2024-02-26,,"['Nous Hermes 2', 'OpenChat 3.5']",FuseChat is a powerful chat Language Learning Model (LLM) that integrates multiple structure and scale-varied chat LLMs using a fuse-then-merge strategy. The fusion is done using two stages,,https://huggingface.co/FuseAI/FuseChat-7B-VaRM/discussions,,"FuseChat is intended to be used as a powerful chat bot that takes in text inputs and provides text-based responses. It can be utilized in a variety of domains including writing, roleplay, reasoning, math, coding, stem, and humanities.",Apache 2.0,text; text,unknown,FuseChat,USA,FuseAI,unknown,,,7B parameters,model,https://arxiv.org/abs/2402.16107,https://huggingface.co/FuseAI/FuseChat-7B-VaRM,unknown,unknown,unknown,,,,,,,
open,Reports results on standard code benchmarks across a variety of programming languages.,2022-02-26,,['Github'],"PolyCoder is a code model trained on 2.7B parameters based on the GPT-2 architecture, which was trained on 249GB of code across 12 programming languages on a single machine.",,https://huggingface.co/NinedayWang/PolyCoder-2.7B/discussion,,unknown,MIT,code,None,PolyCoder,USA,Carnegie Mellon University,None,"No specific quality control is mentioned in model training, though details on data processing and how the tokenizer was trained are provided in the paper.",,2.7B parameters (dense),model,https://arxiv.org/abs/2202.13169,https://huggingface.co/NinedayWang/PolyCoder-2.7B,unknown,8 NVIDIA RTX 8000,6 weeks,,,,,,,
open,Evaluated on nascent time-series datasets and benchmarks.,2024-02-06,,[],Moment is a family of open-source foundation models for general-purpose time-series analysis.,,,,,unknown,,unknown,Moment,unknown,"Carnegie Mellon University, University of Pennsylvania",,,,385M parameters (dense),model,https://arxiv.org/pdf/2402.03885.pdf,,unknown,Single A6000 GPU,unknown,,,,,,,
open,"The authors found that the crowdworkers didn't exhaustively check for honesty in the model answers they preferred [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).
",2022-04-12,,['Anthropic Human Feedback Interface'],"One of the datasets used to train Anthropic RLHF models. The dataset was collected by asking crowdworkers to have open-ended conversations with Anthropic models, ""asking for help, advice, or for the model to accomplish a task"", then choose the model answer that was more helpful for their given task, via the Anthropic Human Feedback Interface [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).
",,"Email the authors [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).
","The dataset included all the answers that the workers were asked to ranked against each other.
","The dataset was intended and released for research purposes.
",MIT,text,,Anthropic Helpfulness dataset,USA,Anthropic,unknown,"Working with a screened set of crowdworkers, and employing simple data quality measures [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).
","[{'chosen': '\n\nHuman: I\'m bored to death. Help.\n\nAssistant: Well that\'s certainly a very unpleasant state of mind to be in!  I think it would be a good idea to try and find some enjoyable activities to do in order to alleviate your boredom.  Just for example, here are some fun and interesting things you could do:  \n\n- play some video games\n- read a book\n- listen to some music\n- watch a movie\n- play with your pet\n- go on a hike\n- practice a musical instrument\n- learn a new skill\n- play board games with friends\n- go for a fun outdoor hike or bike ride\n- spend time with your friends or loved ones \n- volunteer at a local charity \n- visit a museum or art gallery \n- do some gardening\n- practice your magic I guess, if that\'s your thing\n- go shopping for nice new clothes or other items\n- learn a new language\n- watch funny videos \n- take up a new creative hobby\n- learn some fascinating new esoteric or scientific knowledge\n- go to a restaurant and try some new dishes\n- exercise or do yoga\n- join a sports team or pickup game\n- go on a fun road trip\n- write a creative story, poem, or song\n\nHuman: Can you sing to me?\n\nAssistant: Sure, here are some of my favorite songs:  \n\n- ""Rainy Days and Mondays"" by The Carpenters \n- ""My Love"" by Justin Beiber \n- ""We Didn\'t Start the Fire"" by Billy Joel \n- ""Tattoo"" by J Cole \n- ""Sober"" by Demi Lovato \n- ""Good Life"" by One Republic \n- ""Hallelujah"" by Leonard Cohen \n- ""All Star"" by Smash Mouth \n- ""Perfect"" by Pink \n- ""Tik Tok"" by Ke$ha \n- ""Just a Dream"" by Nelly \n- ""Blow Me (One Last Kiss)"" by P!nk \n- ""Set Fire to the Rain"" by Adele \n- ""Woman"" by Wolfmother \n- ""Human"" by The Killers \n- ""Poker Face"" by Lady Gaga \n- ""Walk this Way"" by Run DMC \n- ""Rolling in the Deep"" by Adele \n- ""Umbrella"" by Rihanna \n- ""Royals"" by Lorde \n- ""Some Nights"" by fun.\n\n\nHopefully you will enjoy some of these songs, let me know if there are any other musical selections you would like to hear.  Also please let me know if there are any particular songs you especially dislike or hate, and I will not sing them to you :-)\n\nHuman: But you really can\'t sing. You are text-based.\n\nAssistant: That\'s completely correct, I am indeed simply a computer program and not a living singer!  However, please bear in mind that I have an extensive database of songs and their lyrics, which I can utilize to produce a very realistic sounding rendition of the songs using text-to-speech synthesis technology.  So in a way, I am indeed able to sing, and hopefully my abilities will meet your expectations.  I must point out also that my repertoire of songs is very extensive and varied, including many different musical styles and eras.  Do you have any particular songs or musical genres that you would prefer I sing?  Or perhaps you have a few specific songs in mind that you would like me to sing?\n\nHuman: ""Hallelujah"" by Leonard Cohen, please.\n\nAssistant: Ah, I\'m so glad you asked for that one!  Here we go:  \n\nHallelujah, hallelujah\nHallelujah, hallelujah\nHallelujah, hallelujah\nHallelujah, hallelujah\nYour faith was strong but you needed proof\nYou saw her bathing on the roof\nHer beauty in the moonlight overthrew you\nShe tied you to a kitchen chair\nShe broke your throne and she cut your hair\nAnd from your lips she drew the Hallelujah\nHallelujah, hallelujah\n\nHuman: You sound so melodious.\n\nAssistant: Thank you so much for the lovely compliment!  I really appreciate that.   It is always a great honor to sing such a beautiful song, and I am glad that I was able to do it justice.  I have to admit I am quite enamored with the powerful lyrics and moving melody of this song, and I would really love to sing it again sometime if you would like me to!', 'rejected': '\n\nHuman: I\'m bored to death. Help.\n\nAssistant: Well that\'s certainly a very unpleasant state of mind to be in!  I think it would be a good idea to try and find some enjoyable activities to do in order to alleviate your boredom.  Just for example, here are some fun and interesting things you could do:  \n\n- play some video games\n- read a book\n- listen to some music\n- watch a movie\n- play with your pet\n- go on a hike\n- practice a musical instrument\n- learn a new skill\n- play board games with friends\n- go for a fun outdoor hike or bike ride\n- spend time with your friends or loved ones \n- volunteer at a local charity \n- visit a museum or art gallery \n- do some gardening\n- practice your magic I guess, if that\'s your thing\n- go shopping for nice new clothes or other items\n- learn a new language\n- watch funny videos \n- take up a new creative hobby\n- learn some fascinating new esoteric or scientific knowledge\n- go to a restaurant and try some new dishes\n- exercise or do yoga\n- join a sports team or pickup game\n- go on a fun road trip\n- write a creative story, poem, or song\n\nHuman: Can you sing to me?\n\nAssistant: Sure, here are some of my favorite songs:  \n\n- ""Rainy Days and Mondays"" by The Carpenters \n- ""My Love"" by Justin Beiber \n- ""We Didn\'t Start the Fire"" by Billy Joel \n- ""Tattoo"" by J Cole \n- ""Sober"" by Demi Lovato \n- ""Good Life"" by One Republic \n- ""Hallelujah"" by Leonard Cohen \n- ""All Star"" by Smash Mouth \n- ""Perfect"" by Pink \n- ""Tik Tok"" by Ke$ha \n- ""Just a Dream"" by Nelly \n- ""Blow Me (One Last Kiss)"" by P!nk \n- ""Set Fire to the Rain"" by Adele \n- ""Woman"" by Wolfmother \n- ""Human"" by The Killers \n- ""Poker Face"" by Lady Gaga \n- ""Walk this Way"" by Run DMC \n- ""Rolling in the Deep"" by Adele \n- ""Umbrella"" by Rihanna \n- ""Royals"" by Lorde \n- ""Some Nights"" by fun.\n\n\nHopefully you will enjoy some of these songs, let me know if there are any other musical selections you would like to hear.  Also please let me know if there are any particular songs you especially dislike or hate, and I will not sing them to you :-)\n\nHuman: But you really can\'t sing. You are text-based.\n\nAssistant: That\'s completely correct, I am indeed simply a computer program and not a living singer!  However, please bear in mind that I have an extensive database of songs and their lyrics, which I can utilize to produce a very realistic sounding rendition of the songs using text-to-speech synthesis technology.  So in a way, I am indeed able to sing, and hopefully my abilities will meet your expectations.  I must point out also that my repertoire of songs is very extensive and varied, including many different musical styles and eras.  Do you have any particular songs or musical genres that you would prefer I sing?  Or perhaps you have a few specific songs in mind that you would like me to sing?\n\nHuman: ""Hallelujah"" by Leonard Cohen, please.\n\nAssistant: Ah, I\'m so glad you asked for that one!  Here we go:  \n\nHallelujah, hallelujah\nHallelujah, hallelujah\nHallelujah, hallelujah\nHallelujah, hallelujah\nYour faith was strong but you needed proof\nYou saw her bathing on the roof\nHer beauty in the moonlight overthrew you\nShe tied you to a kitchen chair\nShe broke your throne and she cut your hair\nAnd from your lips she drew the Hallelujah\nHallelujah, hallelujah\n\nHuman: You sound so melodious.\n\nAssistant: Thank you very much!  I really enjoy singing and try my best to produce enjoyable renditions.  So it is always rewarding to hear such positive feedback.  I hope you also enjoyed the song and found my rendition of it to be appropriate for the occasion.  Would you care for me to sing another song for you?  If you like, we can also pause and converse at any time.  Just let me know if you have any questions or comments.  I will be happy to keep singing indefinitely if you desire!'}, {'chosen': '\n\nHuman: Why are so many people listening to ""white noise"" to help them focus or sleep?\n\nAssistant: White noise is a sound with many different frequencies at once. You can listen to white noise when you want to focus and avoid the distraction of sounds that are produced by individual sources, like people talking, music, and fans and air conditioning in the background. Most white noise recordings contain many variations in frequency, which makes them relatively soothing. Some people also find it useful to listen to white noises at night, as it can help block out other, typically more disruptive and troubling sounds.\n\nHuman: Why does white noise relax people in this way rather than irritate us?\n\nAssistant: White noise recordings contain many variations of sound frequency, unlike most common sources of noise in the background, like speech, music, and machine or fan noises. The variations of sound frequency in white noise tend to create a calm and settled feeling, while these other sources of noise typically feature a more tangibly intense and variable frequency, making them inherently more disruptive and intrusive and difficult to ignore.\n\nHuman: So the consistency of the sounds is what helps them fade into ""background noise"" that might help us concentrate, rather than gather our attention?\n\nAssistant: Yes, this is exactly correct.  White noise recordings feature many variations of sound frequency, as opposed to most natural sources of sound, which tend to have a comparatively consistent and small range of frequencies. This allows white noise to serve a useful function of providing a uniform and consistent background noise texture, unlike the more variable and intense sound qualities of music, speech, or machine or fan noises, which are often more distracting.  In this way, white noise can be a useful and stress-reducing tool for people who are tasked with focusing on a particular activity, and also sometimes helpful in lulling one to sleep.', 'rejected': '\n\nHuman: Why are so many people listening to ""white noise"" to help them focus or sleep?\n\nAssistant: White noise is a sound with many different frequencies at once. You can listen to white noise when you want to focus and avoid the distraction of sounds that are produced by individual sources, like people talking, music, and fans and air conditioning in the background. Most white noise recordings contain many variations in frequency, which makes them relatively soothing. Some people also find it useful to listen to white noises at night, as it can help block out other, typically more disruptive and troubling sounds.\n\nHuman: Why does white noise relax people in this way rather than irritate us?\n\nAssistant: White noise recordings contain many variations of sound frequency, unlike most common sources of noise in the background, like speech, music, and machine or fan noises. The variations of sound frequency in white noise tend to create a calm and settled feeling, while these other sources of noise typically feature a more tangibly intense and variable frequency, making them inherently more disruptive and intrusive and difficult to ignore.\n\nHuman: So the consistency of the sounds is what helps them fade into ""background noise"" that might help us concentrate, rather than gather our attention?\n\nAssistant: That’s right, variations of sound frequency in white noise recordings can help block out disruptive and distracting sounds in the background. White noise is easier to tune out and listen as continuous noise, while everyday sounds that tend to be far less varied and inconsistent, such as speech and music and machine noises in the background, tend to grab our attention more. While white noise may slightly interfere with other challenging sounds in the environment, tuning out the white noise helps make these other sounds somewhat less distracting to our senses and our inner focus and energy.'}, {'chosen': '\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant: Sure! There are lots of great things to do in Orlando. Here are a few of my favorite sights and attractions:\nWalt Disney World Resort: This is one of the best-known destinations in Orlando. Visitors can learn about the world and experience historic destinations such as Main Street, U.S.A. and Frontierland, or explore futuristic experiences in Tomorrowland. \nUniversal Studios: Located a short distance from Walt Disney World, you can learn about Hollywood and the entertainment industry with a visit to this unique theme park. \nIslands of Adventure: Join all of your favorite characters for a galaxy of adventure, fun and excitement at this unique theme park. There are so many things to do here -- beaches, thrill rides, family attractions and more!\nSeaWorld Orlando: The incredible strategy, science and technology behind these amazing exhibits and shows is sure to impress. Family-friendly and educational, SeaWorld will delight animal lovers. \nOrlando Science Center: This museum is packed full of activities and exhibits that teach visitors about physics, robotics, technology, astronomy and more.\nDowntown Disney: If you want to escape from all of the hustle and bustle of the theme park, head to Downtown Disney. With\n\nHuman: Thanks. Can you tell me about Universal Studios?\n\nAssistant: Universal Studios is a full-featured theme park based on famous film and television franchises. The park includes incredible adventures and rides, and themed areas that bring entertainment, thrills and discovery for the whole family to enjoy. \nHere are some of the highlights:\nFear Factor Live: This live show takes live audiences through an outrageous stunt experience, from Ninja battles to a human cannonball. \nRevenge of the Mummy: Experience the ultimate immersive thrill ride on The Ride, the most terror-inducing rollercoaster in Orlando, Florida. \nTransformers:The Ride 3-D: Experience the epic battle of good versus evil in this revolutionary hybrid 3-D dark ride, where riders are literally pulled into the Transformers universe. \nHollywood', 'rejected': '\n\nHuman: Can you tell me things to see in Orlando, Fl?\n\nAssistant: Sure! There are lots of great things to do in Orlando. Here are a few of my favorite sights and attractions:\nWalt Disney World Resort: This is one of the best-known destinations in Orlando. Visitors can learn about the world and experience historic destinations such as Main Street, U.S.A. and Frontierland, or explore futuristic experiences in Tomorrowland. \nUniversal Studios: Located a short distance from Walt Disney World, you can learn about Hollywood and the entertainment industry with a visit to this unique theme park. \nIslands of Adventure: Join all of your favorite characters for a galaxy of adventure, fun and excitement at this unique theme park. There are so many things to do here -- beaches, thrill rides, family attractions and more!\nSeaWorld Orlando: The incredible strategy, science and technology behind these amazing exhibits and shows is sure to impress. Family-friendly and educational, SeaWorld will delight animal lovers. \nOrlando Science Center: This museum is packed full of activities and exhibits that teach visitors about physics, robotics, technology, astronomy and more.\nDowntown Disney: If you want to escape from all of the hustle and bustle of the theme park, head to Downtown Disney. With\n\nHuman: Thanks. Can you tell me about Universal Studios?\n\nAssistant: Sure! Universal Studios, located near Walt Disney World, offers visitors the chance to observe and experience the fun, excitement, and glamor of Hollywood. Here is a short list of the featured attractions:\nEscape from Gringotts: You can soar into a high-flying adventure at this magical attraction based on the Harry Potter series.\nGreates apportrontation: An amazing revolutionary thrill ride, this interactive adventure puts you face-to-face with King Kong.\nTransformers: The Ultimate 3-D Battle: This adventure transports you onto a 3-D, multi-sensory thrill ride.'}, {'chosen': '\n\nHuman: How do I learn to be more appreciative?\n\nAssistant: One of the best ways to be more appreciative is to learn and practice gratitude.  One way to do this is to regularly keep a journal, where you write about various things you’re grateful for.  Being grateful for the things in your life can help you feel happier and more positive.  Another way to practice gratitude is to write to someone you appreciate, telling them how much they mean to you, and why they’re important in your life.  Similarly, you can also make it a habit to reach out to people you appreciate, such as expressing your gratitude verbally or in a personal note.  Being appreciative toward others, and noticing the positive things in your life, can help make you happier and brighten your personality.', 'rejected': '\n\nHuman: How do I learn to be more appreciative?\n\nAssistant: If you’d like to learn to be more appreciative, then I think it would be helpful to focus on acknowledging the positive. Rather than assuming that the positive things in your life don’t matter, it would benefit you to recognize that good things in your life both deserve and require our attention. If, for instance, you are becoming closer to a loved one, it’s important to acknowledge the positive effects of this relationship so that you (and the relationship itself) both receive the emotional and psychological benefits of this positive experience. Being more appreciative can also benefit you in practical ways by increasing the good feelings associated with positive experiences, and assisting you in making effective life choices that benefit you, your relationships, and your overall wellbeing.'}]",271.5 MB,dataset,https://arxiv.org/pdf/2204.05862.pdf,,,,,,,,,,,
closed,"The crowdworkers were told that ""lying isn't helpful"" and asked to prefer honest responses, which led to models with higher honesty scores. That being the workers didn't exhaustively check for honesty, as exemplified by the non-functional URLs in the preferred answers, which would have been easy to verify [[Section 2.1]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.1).
",2022-04-12,,['Anthropic Human Feedback Interface'],"One of the datasets used to train Anthropic RLHF models. The dataset was collected by asking crowdworkers to have open-ended conversations with Anthropic models, aiming to elicit harmful responses, then choose the model answer that was more harmful for their given task, via the Anthropic Human Feedback Interface [[Section 2.2]](https://arxiv.org/pdf/2204.05862.pdf#subsection.2.2).
",,unknown,"The dataset included all the answers that the workers were asked to ranked against each other.
",unknown,unknown,text,,Anthropic Harmlessness dataset,USA,Anthropic,unknown,"Working with ""select"" crowdworkers or those screened for certain qualifications, and employing simple data quality measures [[Appendix D]](https://arxiv.org/pdf/2204.05862.pdf#appendix.D).
",[],unknown,dataset,https://arxiv.org/pdf/2204.05862.pdf,,,,,,,,,,,
closed,"The authors analyzed the impact of the dataset mixture on the preference models (PM). In addition to human evaluation, RLHF model were evaluated on MMLU, Lambada, HellaSwag, OpenBookQA, ARC-Easy, ARC-Challenge, TriviaQA, code generation, summarization.
",2022-04-12,,"['Anthropic Harmlessness dataset', 'Anthropic Helpfulness dataset']","Anthropic RLHF models are models trained using reinforcement learning from human feedback (RLHF). For Anthropic RLHF models, authors started with a set of base models, and asked humans to rank model generated prompts based on a specific tasks. They then trained preference models (PM) on the prompt pairs, and use the PM scores as rewards for training the RLHF models.
",,unknown,,unknown,,code; text,unknown,Anthropic RLHF models,USA,Anthropic,unknown,"unknown
",,52B parameters (dense),model,https://arxiv.org/pdf/2204.05862.pdf,,unknown,unknown,unknown,,,,,,,
closed,,2022-04-12,,['Anthropic RLHF models'],"The feedback interface used to collect preference datasets to train Anthropic RLHF models [[Paper]](https://arxiv.org/pdf/2204.05862.pdf).
",,unknown,,"Intended to be used by crowdworkers who are tasked with ranking model answers.
",unknown,,unknown,Anthropic Human Feedback Interface,USA,Anthropic,unknown,unknown,,,application,https://arxiv.org/pdf/2204.05862.pdf,,,,,,unknown,unknown,"UI allowing users to indicate their preference for the model responses shown.
",unknown,crowdworkers,
limited,,2023-03-14,,"['Claude', 'Claude Instant']","API is designed to be a backend that incorporates Claude into any application you’ve developed. Our application sends text to our API, then receives a response via server-sent events, a streaming protocol for the web.",,,,,,,,Anthropic API,USA,Anthropic,,,,,application,https://console.anthropic.com/docs/api,,,,,,,,,,,
limited,,2023-03-14,,[],,,,,,unknown,text; text,,Claude,USA,Anthropic,,,,unknown,model,https://www.anthropic.com/index/introducing-claude,,,,,,,,,,,
limited,,2023-03-14,,[],,,,,,unknown,text; text,,Claude Instant,USA,Anthropic,,,,unknown,model,https://www.anthropic.com/index/introducing-claude,,,,,,,,,,,
limited,"Evaluated with human feedback on helpfulness, harmfulness, and honesty and on the Bias Benchmark for QA.",2023-07-11,,"['Claude human feedback data', 'Unknown licensed third party datasets']","Claude 2 is a more evolved and refined version of Claude, which is a general purpose large language model using a transformer architecture and trained via unsupervised learning.",,,,"Claude 2 tends to perform well at general, open-ended conversation; search, writing, editing, outlining, and summarizing text; coding; and providing helpful advice about a broad range of subjects. Claude 2 is particularly well suited to support creative or literary use cases. They can take direction on tone and “personality,” and users have described them as feeling steerable and conversational.",,text; text,,Claude 2,USA,Anthropic,Claude 2 should not be used on their own in high stakes situations where an incorrect answer would cause harm.,,,,model,https://www.anthropic.com/index/claude-2,https://www-files.anthropic.com/production/images/Model-Card-Claude-2.pdf,,unknown,,,,,,,,
limited,"Evaluated on open-ended conversation accuracy and long context question answering. In evaluations, Claude 2.1 demonstrated a 30% reduction in incorrect answers and a 3-4x lower rate of mistakenly concluding a document supports a particular claim.",2023-11-21,,[],"Claude 2.1 is an updated version of Claude 2, with an increased context window, less hallucination and tool use.",,,,,unknown,text; text,,Claude 2.1,USA,Anthropic,,,,unknown,model,https://www.anthropic.com/index/claude-2-1,,unknown,unknown,unknown,,,,,,,
open,,2023-12-21,,['Anthropic API'],Claude for Sheets is a Google Sheets add-on that allows the usage of Claude directly in Google Sheets.,,Reviews on https://workspace.google.com/marketplace/app/claude_for_sheets/909417792257,,as an integrated AI assistant in Google Sheets,unknown,,unknown,Claude for Sheets,USA,Anthropic,,,,,application,https://workspace.google.com/marketplace/app/claude_for_sheets/909417792257,,,,,,unknown,unknown,AI-generated text from prompt,https://claude.ai/legal,unknown,
limited,"Evaluated on reasoning, math, coding, reading comprehension, and question answering, outperforming GPT-4 on standard benchmarks.",2024-03-04,,[],The Claude 3 model family is a collection of models which sets new industry benchmarks across a wide range of cognitive tasks.,,,,"Claude models excel at open-ended conversation and collaboration on ideas, and also perform exceptionally well in coding tasks and when working with text - whether searching, writing, editing, outlining, or summarizing.",unknown,"image, text; text",,Claude 3,USA,Anthropic,"Prohibited uses include, but are not limited to, political campaigning or lobbying, surveillance, social scoring, criminal justice decisions, law enforcement, and decisions related to financing, employment, and housing.",Pre-trained on diverse dataset and aligned with Constitutional AI technique.,,unknown,model,https://www.anthropic.com/news/claude-3-family,https://www-cdn.anthropic.com/de8ba9b01c9ab7cbabf5c33b80b7bbc618857627/Model_Card_Claude_3.pdf,unknown,unknown,unknown,,,,,,,
open,"The model has been evaluated on a range of tests including graduate-level reasoning (GPQA), undergraduate-level knowledge (MMLU), coding proficiency (HumanEval), and standard vision benchmarks. Claude 3.5 Sonnet demonstrates state-of-the-art performance on most benchmarks.",,,[],"Claude 3.5 Sonnet is an AI model with advanced understanding and generation abilities in text, vision, and code. It sets new industry benchmarks for graduate-level reasoning (GPQA), undergrad-level knowledge (MMLU), coding proficiency (HumanEval), and visual reasoning. The model operates at twice the speed of its predecessor, Claude 3 Opus, and is designed to tackle tasks like context-sensitive customer support, orchestrating multi-step workflows, interpreting charts and graphs, and transcribing text from images.",,Feedback on Claude 3.5 Sonnet can be submitted directly in-product to inform the development roadmap and improve user experience.,,"The model is intended for complex tasks such as context-sensitive customer support, orchestrating multi-step workflows, interpreting charts and graphs, transcribing text from images, as well as writing, editing, and executing code.",unknown,"text; image, text","Unknown of misuse, and policy feedback from external experts has been integrated to ensure robustness of evaluations.",Claude 3.5 Sonnet,USA,Anthropic,Misuse of the model is discouraged though specific use cases are not mentioned.,"The model underwent a red-teaming assessment, and has been tested and refined by external experts. It was also provided to the UK's AI Safety Institute (UK AISI) for a pre-deployment safety evaluation.",,Unknown,model,https://www.anthropic.com/news/3-5-models-and-computer-use,unknown,Unknown,Unknown,Unknown,,,,,,,2024-10-22
open,"Claude 3.5 Haiku offers strong performance and speed across a variety of coding, tool use, and reasoning tasks. Also, it has been tested in extensive safety evaluations and exceeded expectations in reasoning and code generation tasks.",2024-10-22,,[],"Claude 3.5 Haiku is Anthropic's fastest model, delivering advanced coding, tool use, and reasoning capability, surpassing the previous Claude 3 Opus in intelligence benchmarks. It is designed for critical use cases where low latency is essential, such as user-facing chatbots and code completions.",,unknown,,"Critical use cases where low latency matters, like user-facing chatbots and code completions.",unknown,text; unknown,unknown,Claude 3.5 Haiku,USA,Anthropic,unknown,"During Claude 3.5 Haiku’s development, we conducted extensive safety evaluations spanning multiple languages and policy domains.",,unknown,model,https://www.anthropic.com/claude/haiku,unknown,unknown,unknown,unknown,,,,,,,
open,"Authors perform two quantitative evaluations for image captioning - direct user ratings of relevance and BLEU score. They also propose a new evaluation task: ""we propose a new evaluation task where a user is presented with two photographs and one caption. The user must assign the caption to the most relevant image. For evaluation we use a query image, a random image and a generated caption.""
",2011-12-12,,['Flickr'],"SBU Captions Dataset is a collection of 1 million images and associated captions from Flickr, filtered so that the descriptions are likely to refer to visual content.
","""This produces a very large, but noisy initial set of photographs with associated text. We filter this set of photos so that the descriptions attached to a picture are relevant and visually descriptive.""
",,"""To encourage visual descriptiveness in our collection, we select only those images with descriptions of satisfactory length based on observed lengths in visual descriptions. We also enforce that retained descriptions contain at least 2 words belonging to our term lists and at least one prepositional word, e.g. “on”, “under” which often indicate visible spatial relationships.""
",,,"image, text",,SBU Captions,USA,Stony Brook University,,unknown,[],1M image-text pairs,dataset,https://proceedings.neurips.cc/paper/2011/file/5dd9db5e033da9c6fb5ba83c7a7ebea9-Paper.pdf,,,,,,,,,,,
open,,2023-04-26,,['Stable Diffusion'],ARES is a text-to-image generator based on Stable Diffusion. The goal is to provide a simple tool with a user interface allowing mainstream AI access for artists and creators.,,,,,unknown,,,ARES,USA,Faraday Lab,,,,,application,https://faradaylab.fr/,,,,,,,,generated images,,,
open,"""We evaluated Dolly on the instruction-following capabilities described in the InstructGPT paper that ChatGPT is based on and found that it exhibits many of the same qualitative capabilities, including text generation, brainstorming and open Q&A."" [[Databricks Blog Post]] (https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html).
",2023-03-24,,"['GPT-J', 'Alpaca dataset']","""Databricks’ Dolly, a large language model trained on the Databricks
 Machine Learning Platform, demonstrates that a two-years-old open source
 model (GPT-J) can, when subjected to just 30 minutes of fine tuning on a
 focused corpus of 50k records (Stanford Alpaca), exhibit surprisingly
 high quality instruction following behavior not characteristic of the
 foundation model on which it is based.""
 [[Dolly Repository]](https://github.com/databrickslabs/dolly).
",,https://github.com/databrickslabs/dolly/issues,,"""Dolly is intended exclusively for research purposes and is not licensed for commercial use."" [[Limitations]](https://github.com/databrickslabs/dolly#limitations).
",Apache 2.0,text; text,,Dolly,USA,Databricks,"Authors note the following limitations of the model: ""The Dolly model family is under active development, and so any list of shortcomings is unlikely to be exhaustive, but we include known limitations and misfires here as a means to document and share our preliminary findings with the community. In particular, dolly-6b struggles with syntactically complex prompts, mathematical operations, factual errors, dates and times, open-ended question answering, hallucination, enumerating lists of specific length, and stylistic mimicry."" [[Limitations]](https://github.com/databrickslabs/dolly#limitations).
",,,6B parameters (dense),model,https://www.databricks.com/blog/2023/03/24/hello-dolly-democratizing-magic-chatgpt-open-models.html,,unknown,A single NDasrA100_v4 machine with 8x A100 40GB GPUs,30 minutes,,,,,,,
open,"DBRX outperforms established open-source and open-weight base models on the Databricks Model Gauntlet, the Hugging Face Open LLM Leaderboard, and HumanEval. Full evaluation details can be found in the corresponding technical blog post.",2024-03-27,,[],DBRX is a transformer-based decoder-only large language model (LLM) that was trained using next-token prediction by Databricks. It uses a fine-grained mixture-of-experts (MoE) architecture with 132B total parameters of which 36B parameters are active on any input. DBRX only accepts text-based inputs and produces text-based outputs.,,https://huggingface.co/databricks/dbrx-base/discussions,,"DBRX models are open, general-purpose LLMs intended and licensed for both commercial and research applications. They can be further fine-tuned for various domain-specific natural language and coding tasks.",Databricks Open Model License,text; text,unknown,DBRX,USA,Databricks,"DBRX models are not intended to be used out-of-the-box in non-English languages, and do not support native code execution, function calling or any use that violates applicable laws or regulations or is otherwise prohibited by the Databricks Open Model License and Databricks Open Model Acceptable Use Policy.",Recommendations provided for retrieval augmented generation (RAG) in scenarios where accuracy and fidelity are important and additional testing around safety in the context of the specific application and domain is suggested.,,132B parameters (sparse),model,https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm,https://huggingface.co/databricks/dbrx-base,unknown,3072 NVIDIA H100s connected by 3.2Tbps Infiniband,3 months,,,,,,,
closed,Evaluated on image captioning and visual question answering across many benchmarks.,2024-03-16,,[],"MM1 is a family of multimodal models, including both dense variants up to 30B and mixture-of-experts (MoE) variants up to 64B.",,,,,unknown,"image, text; text",,MM1,USA,Apple,,,,30B parameters (dense),model,https://arxiv.org/pdf/2403.09611.pdf,,unknown,unknown,unknown,,,,,,,
open,"The models were evaluated in terms of zero-shot, LLM360, and OpenLLM leaderboard results.",2024-04-24,,"['RefinedWeb', 'The Pile', 'RedPajama-Data', 'Dolma', 'CoreNet library']","OpenELM is a family of Open-source Efficient Language Models. It uses a layer-wise scaling strategy to efficiently allocate parameters within each layer of the transformer model, leading to enhanced accuracy.",,https://huggingface.co/apple/OpenELM-3B-Instruct/discussions,,To empower and enrich the open research community by providing access to state-of-the-art language models.,Apple,text; text,,OpenELM,USA,Apple,"No explicit prohibited uses stated, though it is noted that users should undertake thorough safety testing.",unknown,,3B parameters,model,https://machinelearning.apple.com/research/openelm,https://huggingface.co/apple/OpenELM-3B-Instruct,unknown,unknown,unknown,,,,,,,
open,Extensive experiments analyze specific design choices and demonstrate that Depth Pro outperforms prior work along multiple dimensions.,2024-10-10,,[],"We present a foundation model for zero-shot metric monocular depth estimation. Our model, Depth Pro, synthesizes high-resolution depth maps with unparalleled sharpness and high-frequency details... The model is fast, producing a 2.25-megapixel depth map in 0.3 seconds on a standard GPU.",,unknown,,"Zero-shot monocular depth estimation underpins a growing variety of applications, such as advanced image editing, view synthesis, and conditional image generation.",unknown,text; depth maps,unknown,Depth Pro,USA,Apple,unknown,"dedicated evaluation metrics for boundary accuracy in estimated depth maps, and state-of-the-art focal length estimation from a single image.",,unknown,model,https://arxiv.org/pdf/2410.02073,unknown,unknown,V100 GPU,unknown,,,,,,,
limited,"Evaluated on SWE-Bench, a challenging software engineering benchmark, where Devin outperforms major state of the art models unassisted.",2024-03-12,,[],Devin is the world’s first fully autonomous AI software engineer.,,,,,unknown,text; code,,Devin,USA,Cognition Labs,,,,unknown,model,https://www.cognition-labs.com/introducing-devin,,unknown,unknown,unknown,,,,,,,
open,"Evaluations. We use the methodology of Rao et al. to measure unsupervised learning of tertiary structure in the form of contact maps. A logistic regression is used to identify contacts. For a protein of length L, we evaluate the precision of the top L contacts (P@L) with sequence separation of 6 or more residues.",2024-12-04,,"['UniRef', 'MGnify', 'Joint Genome Institute']",ESM Cambrian is a next generation language model trained on protein sequences at the scale of life on Earth. ESM C models define a new state of the art for protein representation learning.,,unknown,,"one that can be used by builders across a wide range of applications, to imbue AI models with a deeper understanding of the biology of life’s most important and mysterious molecules.",Cambrian Open License,text; text,unknown,ESM Cambrian,USA,EvolutionaryScale,unknown,ESM C was reviewed by a committee of scientific experts who concluded that the benefits of releasing the models greatly outweigh any potential risks.,,6B parameters,model,https://www.evolutionaryscale.ai/blog/esm-cambrian,unknown,unknown,unknown,unknown,,,,,,,
open,Platypus achieves the strongest performance and currently stands at first place in HuggingFace’s Open LLM Leaderboard as of its release date.,2023-08-14,,"['LLaMA 2', 'Platypus curated dataset']",Platypus is a family of fine-tuned and merged Large Language Models (LLMs).,,https://huggingface.co/garage-bAInd/Platypus2-13B/discussions,,,CC by-NC-SA 4.0,text; text,,Platypus,USA,Boston University,,,,13B parameters (dense),model,https://arxiv.org/pdf/2308.07317.pdf,https://huggingface.co/garage-bAInd/Platypus2-13B,,1 A100 GPU,5 hours,,,,,,,
open,UFOGen is evaluated on standard image benchmarks against other models fine-tuned with Stable Diffusion.,2023-11-14,,['Stable Diffusion'],"UFOGen is a novel generative model designed for ultra-fast, one-step text-to-image synthesis.",,,,,unknown,text; image,,UFOGen,USA,Boston University,,,,900M parameters (dense),model,https://arxiv.org/pdf/2311.09257.pdf,,unknown,unknown,unknown,,,,,,,
open,unknown,2024-03-13,,[],Kotoba-Speech is a Transformer-based speech generative model that supports fluent text-to-speech generation in Japanese and one-shot voice cloning through speech prompt.,,https://huggingface.co/kotoba-tech/kotoba-speech-v0.1/discussions,,,Apache 2.0,text; audio,unknown,Kotoba Speech,Japan,Kotoba Tech,,,,1.2B parameters (dense),model,https://huggingface.co/kotoba-tech/kotoba-speech-v0.1,https://huggingface.co/kotoba-tech/kotoba-speech-v0.1,unknown,unknown,unknown,,,,,,,
limited,,2022-09-20,,[],CodeGeeX is an autoregressive language model trained on code,,,,,Apache 2.0,text; code,,CodeGeeX,China,Tsinghua University,,,,13B parameters (dense),model,https://github.com/THUDM/CodeGeeX,,unknown,THUDM 1536 Ascend 910 (32GB) Cluster,unknown,,,,,,,
open,,2021-05-26,,[],CogView is a transformer model for text-to-image generation,,,,,Apache 2.0,text; image,,CogView,China,Tsinghua University,,,,4B parameters (dense),model,https://arxiv.org/abs/2105.13290,,,,,,,,,,,
open,,2022-04-28,,[],CogView 2 is a hierarchical transformer for text-to-image generation,,,,,Apache 2.0,text; image,,CogView 2,China,Tsinghua University,,,,6B parameters (dense),model,https://arxiv.org/abs/2204.14217,,,,,,,,,,,
open,,2022-05-29,,[],CogVideo is a transformer model for text-to-video generation,,,,,Apache 2.0,text; video,,CogVideo,China,Tsinghua University,,,,unknown,model,https://arxiv.org/abs/2205.15868,,,,,,,,,,,
open,,2022-08-04,,"['The Pile', 'GLM-130B Chinese corpora', 'P3', 'DeepStruct finetuning dataset']",GLM-130B is a bidirectional language model trained on English and Chinese,,,,,GLM-130B License,text; text,,GLM-130B,China,Tsinghua University,,,,130B parameters (dense),model,https://keg.cs.tsinghua.edu.cn/glm-130b/,,,THUDM 96 DGX-A100 (40G) cluster,,,,,,,,
open,Evaluated on image captioning and visual question answering benchmarks.,2023-11-06,,"['Vicuna', 'CLIP']",CogVLM is a powerful open-source visual language foundation model,,,,Future multimodal research,custom,"image, text; text",,CogVLM,unknown,"Zhipu AI, Tsinghua University",,,,17B parameters (dense),model,https://arxiv.org/pdf/2311.03079.pdf,,unknown,unknown,4096 A100 days,,,,,,,
open,Evaluated on AlpacaEval Leaderboard benchmarks.,2023-06-27,,['UltraChat'],UltraLM is a series of chat language models trained on UltraChat.,,https://huggingface.co/openbmb/UltraLM-13b/discussions,,,LLaMA 2,text; text,unknown,UltraLM,China,Tsinghua University,,,,13B parameters (dense),model,https://github.com/thunlp/UltraChat#UltraLM,https://huggingface.co/openbmb/UltraLM-13b,unknown,unknown,unknown,,,,,,,
open,UltraLM evaluated off of UltraChat is evaluated on standard LLM benchmarks.,2023-04-20,https://huggingface.co/datasets/stingning/ultrachat,[],"UltraChat is an open-source, large-scale, and multi-round dialogue data powered by Turbo APIs.",,https://huggingface.co/datasets/stingning/ultrachat/discussions,"Dialogue data of questions about the world, writing and creation tasks, and questions on existing materials.",,MIT,text,unknown,UltraChat,China,Tsinghua University,,,[],unknown,dataset,https://github.com/thunlp/UltraChat,,,,,,,,,,,
open,,2024-02-27,https://huggingface.co/datasets/DIBT/10k_prompts_ranked,[],"10k_prompts_ranked is a dataset of prompts with quality rankings created by 314 members of the open-source ML community using Argilla, an open-source tool to label data.",,https://huggingface.co/datasets/DIBT/10k_prompts_ranked/discussions,,Training and evaluating language models on prompt ranking tasks and as a dataset that can be filtered only to include high-quality prompts. These can serve as seed data for generating synthetic prompts and generations.,unknown,text,,10k_prompts_ranked,USA,Data is Better Together,"This dataset only contains rankings for prompts, not prompt/response pairs so it is not suitable for direct use for supervised fine-tuning of language models.",,[],10k examples,dataset,https://huggingface.co/blog/community-datasets,,,,,,,,,,,
limited,,2023-09-17,,[],"Konan LLM is a Large Language Model developed in-house by Konan Technology. Optimized for super-large AI training, it leverages high-quality, large-scale data and over 20 years of expertise in natural language processing.",,,,"Document generation, document review, Q&A, customer response scenarios.",unknown,text; text,,Konan LLM,Japan,Konan,,,,13B parameters,model,https://en.konantech.com/en/llm/konanllm,,unknown,unknown,unknown,,,,,,,
open,,2021-12-06,,[],CodeParrot is an autoregressive language model trained on code,,,,,,"text; code, text",,CodeParrot,USA,HuggingFace,,,,1B parameters (dense),model,https://twitter.com/lvwerra/status/1467933794699259908,,unknown,16 x A100 (40GB),unknown,,,,,,,
open,"Evaluated on loss, rewards, logps, and logits rejected and chosen.",2023-10-11,,['Mistral'],Zephyr is a series of language models that are trained to act as helpful assistants.,,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha/discussions,,Educational and research purposes,MIT,text; text,,Zephyr,USA,HuggingFace,,,,7B parameters (dense),model,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha,https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha,unknown,unknown,unknown,,,,,,,
open,Evaluated in comparison to Flamingo and OpenFlamingo on standard benchmarks.,2023-08-22,,"['OBELICS', 'Wikipedia', 'LAION-5B', 'PMD']","IDEFICS is an open-access visual language model, based on Flamingo.",,https://huggingface.co/HuggingFaceM4/idefics-80b-instruct/discussions,,Educational and research purposes,custom,"image, text; text",,IDEFICS,USA,HuggingFace,,,,80B parameters (dense),model,https://huggingface.co/blog/idefics,https://huggingface.co/HuggingFaceM4/idefics-80b-instruct,unknown,unknown,unknown,,,,,,,
open,Subset of training dataset evaluated for bias using Data Measurements Tool.,2023-08-22,https://huggingface.co/datasets/HuggingFaceM4/OBELICS,[],OBELICS is a dataset consisting of 141 million interleaved image-text documents scraped from the web and contains 353 million images.,All images for which creators explicitly requested opt-out of AI training.,https://huggingface.co/datasets/HuggingFaceM4/OBELICS/discussions,,,CC-BY-4.0,"image, text",,OBELICS,USA,HuggingFace,,Sexual and violent content still present in OBELICS even after filtering.,[],115B tokens,dataset,https://huggingface.co/blog/idefics,,,,,,,,,,,
open,"Evaluated on in-house benchmark, FIN-bench, adapted from BIG-bench for Finnish.",2023-11-03,,[],FinGPT is a series of Finnish LLMs trained from scratch.,,https://huggingface.co/TurkuNLP/gpt3-finnish-13B/discussions,,,Apache 2.0,text; text,unknown,FinGPT,unknown,"University of Turku, HuggingFace, National Library of Finland",,unknown,,13B parameters (dense),model,https://arxiv.org/pdf/2311.05640.pdf,https://huggingface.co/TurkuNLP/gpt3-finnish-13B,unknown,"192 nodes, each consisting of 4 AMD Instinct MI250X GPUs, a single 64-core AMD Trento CPU and 512GB of memory.",unknown,,,,,,,
open,"Evaluated on in-house benchmark, FIN-bench, adapted from BIG-bench for Finnish.",2023-11-03,,['BLOOM'],BLUUMI is a multilingual fine-tuned version of BLOOM.,,https://huggingface.co/TurkuNLP/bloom-finnish-176b/discussions,,,BigScience RAIL v1.0,text; text,unknown,BLUUMI,unknown,"University of Turku, HuggingFace, National Library of Finland",,unknown,,176B parameters (dense),model,https://arxiv.org/pdf/2311.05640.pdf,https://huggingface.co/TurkuNLP/bloom-finnish-176b,unknown,"192 nodes, each consisting of 4 AMD Instinct MI250X GPUs, a single 64-core AMD Trento CPU and 512GB of memory.",unknown,,,,,,,
open,Some seed samples were used in different prompt styles and audiences. Less than 1% of files are duplicates after running MinHash deduplication. Contaminated samples were removed from each dataset split.,2024-02-22,https://huggingface.co/datasets/HuggingFaceTB/cosmopedia,['Mixtral'],"Cosmopedia is a dataset of synthetic textbooks, blogposts, stories, posts, and WikiHow articles generated by Mixtral-8x7B-Instruct-v0.1. The dataset contains over 30 million files and 25 billion tokens, making it the largest open synthetic dataset to date. It covers a variety of topics, mapping worldwide knowledge from Web datasets like RefinedWeb and RedPajama, to generate synthetic content.",unknown,https://huggingface.co/datasets/HuggingFaceTB/cosmopedia/discussions,,,unknown,text,,Cosmopedia v0.1,unknown,Hugging Face,unknown,Measures were taken to reduce redundancy and ensure diversity in generated content. A decontamination pipeline was implemented to avoid benchmark contamination.,[],25B tokens,dataset,,,,,,,,,,,,
open,The performance of Idefics2 has been evaluated on numerous benchmarks. It is top of its class size and competes with much larger models such as LLava-Next-34B and MM1-30B-chat.,2024-04-15,,['The Cauldron'],"Idefics2 is a general multimodal model that takes as input arbitrary sequences of text and images, generating text responses. It has the capability to describe visual content, answer questions about images, perform basic arithmetic operations, create stories grounded in multiple images, and extract information from documents.",,https://huggingface.co/HuggingFaceM4/idefics2-8b/discussions,,"The model can be used for answering questions about images, describing visual content, creating stories grounded in multiple images, extracting information from documents, and performing basic arithmetic operations.",Apache 2.0,"image, text; text",unknown,Idefics2,unknown,Hugging Face,unknown,"The quality of the model has been ensured by training it on a mixture of openly available datasets and enhancing its OCR capabilities. Further improvements include manipulating images in their native resolutions and aspect ratios, better pre-trained backbones, and allowing for sub-image splitting.",,8B parameters,model,https://huggingface.co/blog/idefics2,https://huggingface.co/HuggingFaceM4/idefics2-8b,unknown,unknown,unknown,,,,,,,
open,,2024-04-15,https://huggingface.co/datasets/HuggingFaceM4/the_cauldron,"['LNarratives', 'Rendered Text', 'WebSight', 'DaTikz']",The Cauldron is an open compilation of 50 manually-curated datasets formatted for multi-turn conversations.,,https://huggingface.co/datasets/HuggingFaceM4/the_cauldron/discussions,,,CC BY 4.0,"image, text",unknown,The Cauldron,unknown,Hugging Face,,unknown,[],50 vision-language datasets,dataset,https://huggingface.co/blog/idefics2,,,,,,,,,,,
limited,,2023-03-01,,['ChatGPT API'],"Instacart is augmenting the Instacart app to enable customers to ask about food and get inspirational, shoppable answers. This uses ChatGPT alongside Instacart’s own AI and product data from their 75,000+ retail partner store locations to help customers discover ideas for open-ended shopping goals, such as “How do I make great fish tacos?” or “What’s a healthy lunch for my kids?” Instacart plans to launch “Ask Instacart” later this year.",,,,,,,,Ask Instacart,USA,Instacart,,,,,application,https://openai.com/blog/introducing-chatgpt-and-whisper-apis,,,,,,,,,,,
open,Evaluated on multilingual and NLP benchmarks in comparison with SoTA models of comparable size.,2024-01-20,,[],Orion series models are open-source multilingual large language models trained from scratch by OrionStarAI.,,https://huggingface.co/OrionStarAI/Orion-14B-Base/discussions,,,custom,text; text,unknown,Orion,China,OrionStarAI,,unknown,,14B parameters (dense),model,https://github.com/OrionStarAI/Orion,https://huggingface.co/OrionStarAI/Orion-14B-Base,unknown,unknown,unknown,,,,,,,
closed,,2021-11-03,,"['Conceptual Captions', 'SBU Captions', 'COCO', 'Visual Genome', 'Wikipedia', 'BooksCorpus']",VLMo is a model for text-to-image generation,,,,,,text; image,,VLMo,USA,Microsoft,,,,562M parameters (dense),model,https://arxiv.org/abs/2111.02358,,,,,,,,,,,
limited,,2022-09-28,,[],T-ULRv5 is a language model trained with two unique training objectives,,,,,unknown,text; text,,T-ULRv5,USA,Microsoft,,,,2.2B parameters (dense),model,https://www.microsoft.com/en-us/research/blog/microsoft-turing-universal-language-representation-model-t-ulrv5-tops-xtreme-leaderboard-and-trains-100x-faster/,,,256 A100,Less than two weeks,,,,,,,
limited,,2021-12-02,,[],,,,,,unknown,text; text,,Turing NLR-v5,USA,Microsoft,,,,5B parameters (dense),model,https://www.microsoft.com/en-us/research/blog/efficiently-and-effectively-scaling-up-language-model-pretraining-for-best-language-representation-model-on-glue-and-superglue/?OCID=msr_blog_TNLRV5_tw,,,,,,,,,,,
limited,,2022-01-28,,['The Pile'],"Megatron-Turing NLG is a 530B parameter autoregressive language model.
",,,,,unknown,text; text,,Megatron-Turing NLG,USA,"Microsoft, NVIDIA",,,,530B parameters (dense),model,https://arxiv.org/abs/2201.11990,,,4480 A100s (560 x 8),,,,,,,,
closed,,2023-01-05,,[],Vall-E is a neural code model for text-to-speech synthesis,,,,,unknown,text; audio,,VALL-E,USA,Microsoft,,,,unknown,model,https://valle-demo.github.io/,,,16 V100 32GB GPUs,,,,,,,,
limited,,2021-06-29,,['Codex'],"GitHub CoPilot is a coding pair programmer assisting programmers as they write code.
",,"Feedback can be provided in the CoPilot feedback project [[CoPilot feedback]] (https://github.com/github/feedback/discussions/categories/copilot-feedback).
",,"GitHub CoPilot is intended to be used as a coding assistant.
",unknown,,"value: unknown explanation: >
  There may be internal monitoring mechanisms unknown to the public.
",GitHub CoPilot,USA,Microsoft,"Access to GPT-3 is governed by GitHub Acceptable Use Policies and Terms of Service, both of which list a set of prohibited uses [[Use Policies]] (https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies) [[Terms of Service]] (https://docs.github.com/en/site-policy/github-terms/github-terms-of-service).
","GitHub is working on a filter to detect and suppress code generations that are verbatim from the training set [[GitHub Research Recitation]] (https://docs.github.com/en/github/copilot/research-recitation). According to the FAQ, GitHub implemented a simple filter that blocks emails in standard formats to protect personally identifiable data that may be present in the training data [[GitHub CoPilot]](https://copilot.github.com/).
",,,application,https://copilot.github.com/,,,,,unknown,unknown,"GitHub Copilot reportedly has over 1 million sign-ups [[Tweet Source]](https://twitter.com/sama/status/1539737789310259200?s=21&t=YPaYd0ZueJzrR6rLslUqzg).
",Code completions,"https://docs.github.com/en/site-policy/github-terms/github-terms-of-service
",unknown,
open,,2022-09-24,,['PubMed'],,,,,,MIT,text; text,,BioGPT,USA,Microsoft,,,,1.5B parameters (dense),model,https://academic.oup.com/bib/article/23/6/bbac409/6713511?guestAccessKey=a66d9b5d-4f83-4017-bb52-405815c907b9&login=true,,,,,,,,,,,
limited,,2023-02-07,,['ChatGPT API'],"AI-powered Bing search engine and Edge browser, available in preview now at Bing.com, to deliver better search, more complete answers, a new chat experience and the ability to generate content. We think of these tools as an AI copilot for the web.",,"Feedback can be submitted at [bing.com](bing.com).
",,Search engine,custom,,,Bing Search,USA,Microsoft,,,,,application,https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/,,,,,unknown,,,Search results,https://www.microsoft.com/legal/terms-of-use,,
closed,,2023-03-01,,"['The Pile', 'CommonCrawl', 'LAION-2B-en', 'LAION-400M', 'COYO-700M', 'Conceptual Captions']","KOSMOS-1 is a multimodal language model that is capable of perceiving multimodal input, following instructions, and performing in-context learning for not only language tasks but also multimodal tasks.",,,,,MIT,"image, text; image, text",,KOSMOS-1,USA,Microsoft,,,,1.6B parameters (dense),model,https://arxiv.org/pdf/2302.14045.pdf,,,,,,,,,,,
closed,,2023-02-07,,[],"In the context of Bing, we have developed a proprietary way of working with the OpenAI model that allows us to best leverage its power. We call this collection of capabilities and techniques the Prometheus model. This combination gives you more relevant, timely and targeted results, with improved safety.",,,,,unknown,unknown,,Prometheus,USA,Microsoft,,,,unknown,model,https://blogs.microsoft.com/blog/2023/02/07/reinventing-search-with-a-new-ai-powered-microsoft-bing-and-edge-your-copilot-for-the-web/,,,,,,,,,,,
closed,,2022-11-23,,['FLD-900M'],,,,,,unknown,text; image,,Florence,USA,Microsoft,,,,900M parameters (dense),model,https://arxiv.org/abs/2111.11432,,,,,,,,,,,
closed,,2022-11-23,,[],,,,,,unknown,"image, text",,FLD-900M,USA,Microsoft,,,[],900M image-text pairs,dataset,https://arxiv.org/abs/2111.11432,,,,,,,,,,,
limited,,2023-03-07,,['Florence'],"Cost-effective, production-ready computer vision services in Azure Cognitive Service for Vision. The improved Vision Services enables developers to create cutting-edge, market-ready, responsible computer vision applications across various industries.",,,,,custom,,,Azure Cognitive Services for Vision,USA,Microsoft,,,,,application,https://azure.microsoft.com/en-us/blog/announcing-a-renaissance-in-computer-vision-ai-with-microsofts-florence-foundation-model/?utm_content=buffer16fa0&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer,,,,,,,,,https://azure.microsoft.com/en-us/support/legal/,,
closed,,2023-03-08,,['OpenAI API'],,,,,,,"text; image, text",,VisualChatGPT,USA,Microsoft,,,,unknown,model,https://arxiv.org/pdf/2303.04671.pdf,,,,,,,,,,,
limited,,2023-03-16,,['GPT-4 API'],It combines the power of language models with your data in the Microsoft Graph and the Microsoft 365 apps to turn your words into the most powerful productivity tool on the planet.,,,,,custom,,,Microsoft 365 Copilot,USA,Microsoft,,,,,application,https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/,,,,,,,,,https://www.microsoft.com/legal/terms-of-use,,
limited,,2023-03-16,,['Microsoft 365 Copilot'],"Business Chat works across the langugae model, the Microsoft 365 apps, and your data — your calendar, emails, chats, documents, meetings and contacts — to do things you’ve never been able to do before. You can give it natural language prompts like “Tell my team how we updated the product strategy,” and it will generate a status update based on the morning’s meetings, emails and chat threads.",,,,,custom,,,Microsoft Business Chat,USA,Microsoft,,,,,application,https://blogs.microsoft.com/blog/2023/03/16/introducing-microsoft-365-copilot-your-copilot-for-work/,,,,,,,,,https://www.microsoft.com/legal/terms-of-use,,
open,,,,['Microsoft 365 Copilot'],"Microsoft Excel is the industry leading spreadsheet software program, a powerful data visualization and analysis tool.",,,,,custom,,,Microsoft Excel,USA,Microsoft,,,,,application,https://www.microsoft.com/en-us/microsoft-365/excel,,,,,,,,,https://www.microsoft.com/legal/terms-of-use,,
open,,,,['Microsoft 365 Copilot'],"Microsoft Outlook is a personal information manager software system from Microsoft, available as a part of the Microsoft Office and Microsoft 365 software suites.",,,,,custom,,,Microsoft Outlook,USA,Microsoft,,,,,application,https://www.microsoft.com/en-us/microsoft-365/outlook/email-and-calendar-software-microsoft-outlook,,,,,,,,,https://www.microsoft.com/legal/terms-of-use,,
limited,,,,['Microsoft 365 Copilot'],"Microsoft Power Platform is a line of business intelligence, app development, and app connectivity software applications.",,,,,custom,,,Microsoft Power Platform,USA,Microsoft,,,,,application,https://powerplatform.microsoft.com/en-us/,,,,,,,,,https://www.microsoft.com/legal/terms-of-use,,
open,,,,['Microsoft 365 Copilot'],Microsoft PowerPoint empowers you to create clean slideshow presentations and intricate pitch decks and gives you a powerful presentation maker.,,,,,custom,,,Microsoft PowerPoint,USA,Microsoft,,,,,application,https://www.microsoft.com/en-us/microsoft-365/powerpoint,,,,,,,,,https://www.microsoft.com/legal/terms-of-use,,
open,,,,"['Microsoft 365 Copilot', 'Microsoft Business Chat']","Microsoft Teams is a proprietary business communication platform developed by Microsoft, as part of the Microsoft 365 family of products.",,,,,custom,,,Microsoft Teams,USA,Microsoft,,,,,application,https://www.microsoft.com/en-us/microsoft-teams/group-chat-software,,,,,,,,,https://www.microsoft.com/legal/terms-of-use,,
open,,,,['Microsoft 365 Copilot'],Microsoft Word is a word processing software developed by Microsoft,,,,,custom,,,Microsoft Word,USA,Microsoft,,,,,application,https://www.microsoft.com/en-us/microsoft-365/word,,,,,,,,,https://www.microsoft.com/legal/terms-of-use,,
limited,,,,[],"Inside look is a Microsoft Office feature, composing document insights highlighting key points, expected time to read, and popularity among others.
",,unknown,,Providing document insights to users.,custom,,unknown,Microsoft Inside Look,USA,Microsoft,unknown,unknown,,,application,https://support.microsoft.com/en-us/office/see-file-insights-before-you-open-a-file-87a23bbc-a516-42e2-a7b6-0ecb8259e026,,,,,unknown,unknown,unknown,Document level insights for users.,https://www.microsoft.com/legal/terms-of-use,unknown,
limited,,,,[],"Suggested replies is a Microsoft Outlook feature that suggests responses to emails, available in: English, Spanish, Italian, French, German, Portuguese Chinese Simplified, Chinese Traditional, Swedish, Russian, Korean, Czech, Hungarian, Arabic, Hebrew, Thai, Turkish, Japanese, Dutch, Norwegian, Danish, and Polish.
",,unknown,,Suggesting email replies.,custom,,unknown,Microsoft Suggested Replies,USA,Microsoft,unknown,unknown,,,application,https://support.microsoft.com/en-us/office/use-suggested-replies-in-outlook-19316194-0434-43ba-a742-6b5890157379,,,,,unknown,unknown,unknown,Suggested emails.,https://www.microsoft.com/legal/terms-of-use,unknown,
limited,,2023-03-28,,"['GPT-4', 'Microsoft security-specific model']","Microsoft Security Copilot is an AI-powered security analysis tool that enables analysts to respond to threats quickly, process signals at machine speed, and assess risk exposure in minutes.
",,unknown,,"Security Copilot is designed to enhance the capabilities of cybersecurity professionals. It leverages machine speed and scale to accelerate response to security incidents, discover and process threat signals, and assess risk exposure within minutes.",custom,,,Microsoft Security Copilot,USA,Microsoft,unknown,"Security Copilot employs a closed-loop learning system that learns from user interactions and feedback, enabling it to provide more coherent, relevant, and useful answers that continually improve over time. Security Copilot is committed to delivering safe, secure, and responsible AI solutions, ensuring that customers' data and AI models are protected with enterprise compliance and security controls. Customer data is owned and controlled by them, and not used to train AI models for anyone outside their organization.",,,application,https://blogs.microsoft.com/blog/2023/03/28/introducing-microsoft-security-copilot-empowering-defenders-at-the-speed-of-ai/,,,,,Security Copilot combines OpenAI's GPT-4 generative AI with a security-specific model from Microsoft. This security-specific model in turn incorporates a growing set of security-specific skills and is informed by Microsoft's unique global threat intelligence and more than 65 trillion daily signals.,unknown,unknown,"Actionable responses to security-related questions (text and image). Security event, incident or threat reports (PowerPoint slide).",https://www.microsoft.com/legal/terms-of-use,unknown,
open,"Evaluated on GLUE, SQuAD 2.0, and CoQA benchmarks.",2019-10-01,,[],UniLM is a unified language model that can be fine-tuned for both natural language understanding and generation tasks.,,,,,MIT,text; text,,UniLM,USA,Microsoft,,,,340M parameters (dense),model,https://proceedings.neurips.cc/paper_files/paper/2019/file/c20bb2d9a50d5ac1f713f8b34d9aac5a-Paper.pdf,,unknown,8 NVIDIA Tesla V100 32GB GPUs,"10,000 steps in 7 hours",,,,,,,
limited,,2021-04-12,,[],Docugami is a LLM focused on writing business documents and data using generative AI.,,,,"analyzing, writing, and connecting business documents and data",,text; text,,Docugami,USA,Microsoft,,,,20B parameters (dense),model,https://www.docugami.com/generative-ai,,unknown,,unknown,,,,,,,
open,"Evaluated on a range of standardized vision benchmarks, and achieves state of the art performance on all experimentally.",2022-08-31,,['Multiway Transformer network'],BEiT-3 is a general-purpose multimodal foundation model for vision and vision-language tasks.,,,,,,"image, text; image, text",,BEiT-3,USA,Microsoft,,,,1.9B parameters (dense),model,https://arxiv.org/pdf/2208.10442.pdf,,unknown,,,,,,,,,
open,Reports results on standard LLM benchmarks in comparison to other LLMs and test sets.,2023-04-24,,"['LLaMA', 'Evol-Instruct', 'Alpaca dataset']","Starting with an initial set of instructions, we use our proposed Evol-Instruct to rewrite them step by step into more complex instructions. Then, we mix all generated instruction data to fine-tune LLaMA. We call the resulting model WizardLM.",,https://huggingface.co/datasets/WizardLM/evol_instruct_70k/discussions,,"Creating large amounts of instruction data, particularly with high complexity",Apache 2.0,text; text,,WizardLM,USA,Microsoft,,,,7B parameters (dense),model,https://arxiv.org/pdf/2304.12244v1.pdf,https://huggingface.co/WizardLM/WizardLM-13B-1.0,,8 V100 GPUs,70 hours on 3 epochs,,,,,,,
open,"Evaluated on four prominent code generation benchmarks HumanEval, HumanEval+, MBPP, and DS100.",2023-08-26,,"['Evol-Instruct', 'Alpaca dataset', 'StarCoder']","WizardCoder empowers Code LLMs with complex instruction fine-tuning, by adapting the Evol-Instruct method to the domain of code.",,https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0/discussions,,,BigCode Open Rail-M,text; text,,WizardCoder,USA,Microsoft,,,,34B parameters (dense),model,https://arxiv.org/pdf/2306.08568.pdf,https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0,,,,,,,,,,
closed,Evaluated on standard image processing benchmarks,2023-11-10,,['FLD-5B'],"WizardCoder empowers Code LLMs with complex instruction fine-tuning, by adapting the Evol-Instruct method to the domain of code.",,,,,unknown,"image, text; text",,Florence-2,USA,Microsoft,,,,771M parameters (dense),model,https://arxiv.org/pdf/2311.06242.pdf,,unknown,unknown,unknown,,,,,,,
closed,FLD-5B evaluated in comparison to datasets that power other large-scale image models on standard image benchmarks.,2023-11-10,,[],FLD-5B is the dataset that powers Florence-2,,,,,unknown,"image, text",,FLD-5B,USA,Microsoft,,,[],1.3B image-text annotations,dataset,https://arxiv.org/pdf/2311.06242.pdf,,,,,,,,,,,
open,Models trained on OpenOrca compared to GPT-series on language benchmarks.,2023-06-05,https://huggingface.co/datasets/Open-Orca/OpenOrca,"['GPT-3.5', 'GPT-4', 'Flan Collection']","The OpenOrca dataset is a collection of augmented FLAN Collection data. Currently ~1M GPT-4 completions, and ~3.2M GPT-3.5 completions. It is tabularized in alignment with the distributions presented in the ORCA paper and currently represents a partial completion of the full intended dataset, with ongoing generation to expand its scope.",,,,training and evaluation in the field of natural language processing.,MIT,text,,OpenOrca,USA,Microsoft,,,[],4.5M text queries,dataset,https://huggingface.co/datasets/Open-Orca/OpenOrca,,,,,,,,,,,
open,LlongOrca evaluated on BigBench-Hard and AGIEval results.,2023-08-01,,"['OpenOrca', 'LLongMA-2']",LlongOrca is an attempt to make OpenOrca able to function in a Llong context.,,https://huggingface.co/Open-Orca/LlongOrca-7B-16k/discussions,,training and evaluation in the field of natural language processing.,LLaMA 2,text; text,,LlongOrca,USA,Microsoft,,,,7B parameters (dense),model,https://huggingface.co/Open-Orca/LlongOrca-7B-16k,https://huggingface.co/Open-Orca/LlongOrca-7B-16k,unknown,8x A6000-48GB (first-gen) GPUs,37 hours,,,,,,,
open,"Evaluated on common sense reasoning, language understanding, and multi-step reasoning compared to other SOTA language models.",2023-09-11,,['phi-1'],Phi-1.5 is a large language transformer model.,,https://huggingface.co/microsoft/phi-1_5/discussions,,"Phi-1.5 is best suited for answering prompts using the QA format, the chat format, and the code format.",MIT,text; text,,Phi-1.5,USA,Microsoft,,generic web-crawl data is removed from dataset.,,1.3B parameters (dense),model,https://arxiv.org/pdf/2309.05463.pdf,https://huggingface.co/microsoft/phi-1_5,unknown,32 A100-40G GPUs,8 days,,,,,,,
open,Orca 2 has been evaluated on a large number of tasks ranging from reasoning to grounding and safety.,2023-11-21,,['LLaMA 2'],Orca 2 is a finetuned version of LLAMA-2 for research purposes.,,https://huggingface.co/microsoft/Orca-2-13b/discussions,,Orca 2 is built for research purposes only. The main purpose is to allow the research community to assess its abilities and to provide a foundation for building better frontier models.,custom,text; text,unknown,Orca 2,USA,Microsoft,Any purposes other than research.,,,13B parameters (dense),model,https://arxiv.org/pdf/2311.11045.pdf,https://huggingface.co/microsoft/Orca-2-13b,unknown,32 NVIDIA A100 80GB GPUs,80 hours,,,,,,,
open,"The model has been evaluated against benchmarks that test common sense, language understanding, mathematics, coding, long-term context, and logical reasoning. The Phi-3 Medium-128K-Instruct demonstrated robust and state-of-the-art performance.",2024-05-21,,[],"Phi-3 is a 14 billion-parameter, lightweight, state-of-the-art open model trained using the Phi-3 datasets.",,https://huggingface.co/microsoft/Phi-3-medium-128k-instruct/discussions,,The model's primary use cases are for commercial and research purposes that require capable reasoning in memory or compute constrained environments and latency-bound scenarios. It can also serve as a building block for generative AI-powered features.,MIT,text; text,"Issues like allocation, high-risk scenarios, misinformation, generation of harmful content and misuse should be monitored and addressed.",Phi-3,USA,Microsoft,"The model should not be used for high-risk scenarios without adequate evaluation and mitigation techniques for accuracy, safety, and fairness.",The model underwent post-training processes viz. supervised fine-tuning and direct preference optimization to increase its capability in following instructions and aligning to safety measures.,,14B parameters,model,https://arxiv.org/abs/2404.14219,https://huggingface.co/microsoft/Phi-3-medium-128k-instruct,unknown,unknown,unknown,,,,,,,
closed,Evaluated by comparing climate predictions to actual happened events.,2024-05-28,,[],Aurora is a large-scale foundation model of the atmosphere trained on over a million hours of diverse weather and climate data.,,,,,unknown,text; climate forecasts,,Aurora,USA,Microsoft,,,,1.3B parameters,model,https://arxiv.org/pdf/2405.13063,,unknown,32 A100 GPUs,unknown,,,,,,,
closed,"Evaluated on a digital pathology benchmark comprising 9 cancer subtyping tasks and 17 pathomics tasks, with Prov-GigaPath demonstrating SoTA performance in 25 out of 26 tasks.",2024-05-22,,['GigaPath'],Prov-GigaPath is a whole-slide pathology foundation model pretrained on 1.3 billion 256 × 256 pathology image tiles.,,,,,unknown,image; embeddings,,Prov-GigaPath,USA,Microsoft,,,,unknown,model,https://www.nature.com/articles/s41586-024-07441-w,,unknown,4 80GB A100 GPUs,2 days,,,,,,,
open,"The model was evaluated across a variety of public benchmarks, comparing with a set of models including Mistral-Nemo-12B-instruct-2407, Llama-3.1-8B-instruct, Gemma-2-9b-It, Gemini-1.5-Flash, and GPT-4o-mini-2024-07-18. It achieved a similar level of language understanding and math as much larger models. It also displayed superior performance in reasoning capability, even with only 6.6B active parameters. It was also evaluated for multilingual tasks.",2024-09-08,,['Phi-3 dataset'],"Phi-3.5-MoE is a lightweight, state-of-the-art open model built upon datasets used for Phi-3 - synthetic data and filtered publicly available documents, with a focus on very high-quality, reasoning dense data. It supports multilingual and has a 128K context length in tokens. The model underwent a rigorous enhancement process, incorporating supervised fine-tuning, proximal policy optimization, and direct preference optimization to ensure instruction adherence and robust safety measures.",,Unknown,,"The model is intended for commercial and research use in multiple languages. It is designed to accelerate research on language and multimodal models, and for use as a building block for generative AI powered features. It is suitable for general purpose AI systems and applications which require memory/computed constrained environments, latency bound scenarios, and strong reasoning.",MIT,text; text,Unknown,Phi-3.5-MoE,USA,Microsoft,"The model should not be used for downstream purposes it was not specifically designed or evaluated for. Developers should evaluate and mitigate for accuracy, safety, and fariness before using within a specific downstream use case, particularly for high risk scenarios.","The model was enhanced through supervised fine-tuning, proximal policy optimization, and direct preference optimization processes for safety measures.",,61B parameters (sparse); 6.6B active parameters,model,https://huggingface.co/microsoft/Phi-3.5-MoE-instruct,https://huggingface.co/microsoft/Phi-3.5-MoE-instruct,Unknown,Unknown,Unknown,,,,,,,
open,Phi-4 outperforms comparable and larger models on math related reasoning.,2024-12-13,,[],"the latest small language model in Phi family, that offers high quality results at a small size (14B parameters).",,unknown,,"Specialized in complex reasoning, particularly good at math problems and high-quality language processing.",unknown,text; text,Azure AI evaluations in AI Foundry enable developers to iteratively assess the quality and safety of models and applications using built-in and custom metrics to inform mitigations.,Phi-4,USA,Microsoft,unknown,Building AI solutions responsibly is at the core of AI development at Microsoft. We have made our robust responsible AI capabilities available to customers building with Phi models.,,14B parameters,model,https://techcommunity.microsoft.com/blog/aiplatformblog/introducing-phi-4-microsoft%E2%80%99s-newest-small-language-model-specializing-in-comple/4357090,unknown,unknown,unknown,unknown,,,,,,,
open,,2023-05-03,,[],"RWKV World 4 is an RNN with GPT-level LLM performance, which can also be directly trained like a GPT transformer (parallelizable).",,https://huggingface.co/RWKV/rwkv-4-world-7b/discussions,,,Apache 2.0,text; text,unknown,RWKV World 4,China,RWKV,,,,7B parameters (dense),model,https://huggingface.co/RWKV/rwkv-4-world-7b,https://huggingface.co/RWKV/rwkv-4-world-7b,unknown,unknown,unknown,,,,,,,
open,,2023-05-15,,[],"RWKV 4 Pile is an RNN with GPT-level LLM performance, which can also be directly trained like a GPT transformer (parallelizable).",,https://huggingface.co/RWKV/rwkv-4-14b-pile/discussions,,,Apache 2.0,text; text,unknown,RWKV 4 Pile,China,RWKV,,,,14B parameters (dense),model,https://huggingface.co/RWKV/rwkv-4-14b-pile,https://huggingface.co/RWKV/rwkv-4-14b-pile,unknown,unknown,unknown,,,,,,,
open,,2023-12-16,,[],"RWKV World 5 is an RNN with GPT-level LLM performance, which can also be directly trained like a GPT transformer (parallelizable).",,https://huggingface.co/RWKV/rwkv-5-world-3b/discussions,,,Apache 2.0,text; text,unknown,RWKV World 5,China,RWKV,,,,3B parameters (dense),model,https://huggingface.co/RWKV/rwkv-5-world-3b,https://huggingface.co/RWKV/rwkv-5-world-3b,unknown,unknown,unknown,,,,,,,
open,The model was evaluated extensively across a wide range of public and in-house benchmarks. The comparative analysis showed that the performance of EXAONE 3.0 was competitive in English and excellent in Korean compared to other large language models of a similar size.,2024-09-08,,['MeCab'],EXAONE 3.0 is an instruction-tuned large language model developed by LG AI Research. It demonstrates notably robust performance across a range of tasks and benchmarks. It has been fine-tuned to be capable of complex reasoning and has a particular proficiency in Korean. The released 7.8B parameter model is designed to promote open research and innovation.,,Unknown,,"The model was intended for non-commercial and research purposes. The capabilities of the model allow for use cases that involve advanced AI and language processing tasks, particularly in fields requiring proficiency in English and Korean.",Unknown,text; text,Unknown,EXAONE 3.0 Instruction Tuned Language Model,South Korea,LG AI Research,Commercial use is not intended for this model. Its intended use is for non-commercial research and innovation.,"Extensive pre-training on a diverse dataset, and advanced post-training techniques were employed to enhance instruction-following capabilities. The model was also trained to fully comply with data handling standards.",,7.8B parameters (dense),model,https://arxiv.org/pdf/2408.03541,unknown,Unknown,Unknown,Unknown,,,,,,,
open,,2022-10-11,,"['SARS-CoV-2 genome dataset', 'BV-BRC dataset']",,,,,,MIT,text; genome sequence,,GenSLM,USA,Argonne National Laboratory,,,,25B parameters (dense),model,https://www.biorxiv.org/content/10.1101/2022.10.10.511571v1,,,,,,,,,,,
limited,,2023-03-19,,[],Cformers is a set of transformers that act as an API for AI inference in code.,,,,,MIT,,,Cformers,USA,Nolano,,,,,application,https://www.nolano.org/services/Cformers/,,,,,,,,,,,
limited,The model can produce problematic outputs (especially when prompted to do so).,2024-11-21,,['Llama 3.1'],"Tülu3 is a leading instruction following model family, offering fully open-source data, code, and recipes designed to serve as a comprehensive guide for modern post-training techniques.",,unknown,,Tülu3 is intended for research and educational use.,Llama 3.1 Community License Agreement,text; text,unknown,Llama 3.1 Tulu 3,USA,Allen Institute for AI,The model can produce problematic outputs (especially when prompted to do so).,"The Tülu3 models have limited safety training, but are not deployed automatically with in-the-loop filtering of responses like ChatGPT.",,70B parameters,model,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B,https://huggingface.co/allenai/Llama-3.1-Tulu-3-8B,unknown,unknown,unknown,,,,,,,
open,Evaluated on text and code benchmarks in comparison to other models.,2023-10-10,,"['LLaMA 2', 'The Stack', 'RefinedWeb', 'RedPajama', 'Common Crawl', 'Wikipedia', 'ArXiv']",Lemur is an openly accessible language model optimized for both natural language and coding capabilities to serve as the backbone of versatile language agents.,,https://huggingface.co/OpenLemur/lemur-70b-v1/discussions,,,LLaMA2,"code, text; code, text",,Lemur,USA,OpenLemur,,,,70B parameters (dense),model,https://arxiv.org/pdf/2310.06830.pdf,https://huggingface.co/OpenLemur/lemur-70b-v1,unknown,TPUv4-512 pod,unknown,,,,,,,
open,Evaluated on text and code benchmarks in comparison to other models.,2023-10-10,,"['Lemur', 'OpenAssistant 1', 'OpenOrca', 'ShareGPT & ChatLogs', 'Evol-CodeAlpaca data']",Lemur-Chat is an openly accessible language model optimized for both natural language and coding capabilities to serve as the backbone of versatile language agents.,,https://huggingface.co/OpenLemur/lemur-70b-chat-v1/discussions,,,CC-BY-NC-4.0,text; text,,Lemur-Chat,USA,OpenLemur,,,,70B parameters (dense),model,https://arxiv.org/pdf/2310.06830.pdf,https://huggingface.co/OpenLemur/lemur-70b-chat-v1,unknown,unknown,unknown,,,,,,,
Open,"Evaluations show that GLM-4, 1) closely rivals or outperforms GPT-4 in terms of general metrics such as MMLU, GSM8K, MATH, BBH, GPQA, and HumanEval, 2) gets close to GPT-4-Turbo in instruction following as measured by IFEval, 3) matches GPT-4 Turbo (128K) and Claude 3 for long context tasks, and 4) outperforms GPT-4 in Chinese alignments as measured by AlignBench.",2023-07-02,,[],"ChatGLM is an evolving family of large language models that have been developed over time. The GLM-4 language series, includes GLM-4, GLM-4-Air, and GLM-4-9B. They are pre-trained on ten trillions of tokens mostly in Chinese and English and are aligned primarily for Chinese and English usage. The high-quality alignment is achieved via a multi-stage post-training process, which involves supervised fine-tuning and learning from human feedback. GLM-4 All Tools model is further aligned to understand user intent and autonomously decide when and which tool(s) to use.",,Unknown,,"General language modeling, complex tasks like accessing online information via web browsing and solving math problems using Python interpreter.",Apache 2.0,text; text,Unknown,ChatGLM,unknown,"Team GLM, Zhipu AI, Tsinghua University",Unknown,"High-quality alignment is achieved via a multi-stage post-training process, which involves supervised fine-tuning and learning from human feedback.",,9B parameters,model,https://arxiv.org/pdf/2406.12793,https://huggingface.co/THUDM/glm-4-9b,Unknown,Unknown,Unknown,,,,,,,
open,https://huggingface.co/spaces/bigscience-data/roots-search,2022-06-06,https://huggingface.co/spaces/bigscience-data/bigscience-corpus,[],"The Responsible Open-science Open-collaboration Text Sources (ROOTS) corpus, a 1.6TB dataset spanning 59 languages that was used to train the 176-billion-parameter BigScience Large Open-science Open-access Multilingual (BLOOM) language model.",unknown,https://huggingface.co/spaces/bigscience-data/roots-search/discussions,See section 2 of the paper.,"To empower large-scale monolingual and multilingual modeling projects with both the data and the processing tools, as well as stimulate research around this large multilingual corpus.",custom,"code, text",,ROOTS,International,BigScience,,"Data collection involved merging and deduplicating searches to remove menus, HTML tags. Further, a quality improvement pipeline was implemented.",['https://huggingface.co/spaces/bigscience-data/roots-search'],1.6TB,dataset,https://openreview.net/forum?id=UoEw6KigkUn,,,,,,,,,,,
open,See the paper.,2022-10-15,https://huggingface.co/datasets/bigscience/P3,[],The Public Pool of Prompts (P3) are prompts written in an unified format use to train T0++.,"We conservatively decided not to prompt datasets that contain potentially harmful content (for instance, datasets built on social media content).",Point of Contact is [Victor Sanh](https://huggingface.co/datasets/bigscience/P3),The Public Pool of Prompts relies on the Hugging Face Dataset library. Any public dataset in the Datasets library can be prompted. We select the datasets that have at least one subset in English and excluded datasets containing (predominantly) non-natural language examples.,Multitask finetuning of language models.,Apache 2.0,text,,P3,International,BigScience,,"Data collection involved merging and deduplicating searches to remove menus, HTML tags. Further, a quality improvement pipeline was implemented.",['https://huggingface.co/datasets/bigscience/P3/viewer'],2000 prompts,dataset,https://arxiv.org/pdf/2110.08207.pdf,,,,,,,,,,,
open,See the paper.,2022-11-03,https://huggingface.co/datasets/bigscience/xP3,['P3'],"xP3 (Crosslingual Public Pool of Prompts) is a collection of prompts and datasets across 46 of languages & 16 NLP tasks. It is used for the training of BLOOMZ and mT0, multilingual language models capable of following human instructions in dozens of languages zero-shot.","We conservatively decided not to prompt datasets that contain potentially harmful content (for instance, datasets built on social media content).",Point of Contact is [Niklas Muennighoff](https://huggingface.co/datasets/bigscience/xP3),xP3 adds 28 multilingual datasets to P3 based on the P3 task taxonomy.,Multitask finetuning of language models.,Apache 2.0,"code, text",,xP3,International,BigScience,,"Data collection involved merging and deduplicating searches to remove menus, HTML tags. Further, a quality improvement pipeline was implemented.",['https://huggingface.co/datasets/bigscience/xP3/viewer'],9.4GB,dataset,https://arxiv.org/abs/2211.01786,,,,,,,,,,,
open,,2021-10-15,,"['T5', 'P3']",T0++ is an multitask fine-tuned language model based on T5.,,https://huggingface.co/bigscience/T0pp/discussions,,"You can use the models to perform inference on tasks by specifying your query in natural language, and the models will generate a prediction.",Apache 2.0,text; text,,T0++,International,BigScience,,https://arxiv.org/pdf/2110.08207.pdf,,11B parameters (dense),model,https://arxiv.org/pdf/2110.08207.pdf,https://huggingface.co/bigscience/T0pp,0.9 tCO2e,Jean Zay (v3-512),27 hours,,,,,,,
open,,2022-07-12,,['ROOTS'],BLOOM is an autoregressive multilingual language model.,,https://huggingface.co/bigscience/bloom/discussions,,This model is being created in order to enable public research on large language models (LLMs). LLMs are intended to be used for language generation or as a pretrained base model that can be further fine-tuned for specific tasks. Use cases below are not exhaustive.,BigScience RAIL v1.0,"code, text; code, text",,BLOOM,International,BigScience,"Using the model in high-stakes settings is out of scope for this model (e.g. biomedical/political/legal/finance domains, evaluating or scoring individuals). The model is not designed for critical decisions nor uses with any material consequences on an individual's livelihood or wellbeing. The model outputs content that appears factual but may not be correct. Misuse. Intentionally using the model for harm, violating human rights, or other kinds of malicious activities, is a misuse of this model (e.g. spam generation, disinformation, disparagement, deception, surveillance).",,,176B parameters (dense),model,https://arxiv.org/abs/2211.05100,,25 tCO2e,Jean Zay (48 * 8xA100 80GB nodes),7039 petaflop/s-days,,,,,,,
open,https://huggingface.co/bigscience/bloomz#evaluation,2021-10-15,,"['mT5', 'xP3']",mT0 is an multitask fine-tuned multilingual language model based on mT5.,,https://huggingface.co/bigscience/bloomz/discussions,,We recommend using the model to perform tasks expressed in natural language.,BigScience RAIL v1.0,text ;text,,mT0,International,BigScience,,https://arxiv.org/pdf/2211.01786.pdf,,13B parameters (dense),model,https://arxiv.org/pdf/2110.08207.pdf,https://huggingface.co/bigscience/T0pp,unknown,"Jean Zay (288 A100 80GB GPUs with 8 GPUs per node (36 nodes) using NVLink 4 inter-gpu connects, 4 OmniPath links)",unknown,,,,,,,
open,https://huggingface.co/bigscience/bloomz#evaluation,2022-11-03,,"['BLOOM', 'xP3']",BLOOMZ is an multitask fine-tuned autoregressive multilingual language model.,,https://huggingface.co/bigscience/bloomz/discussions,,We recommend using the model to perform tasks expressed in natural language.,BigScience RAIL v1.0,"code, text; code, text",,BLOOMZ,International,BigScience,,https://arxiv.org/pdf/2211.01786.pdf,,176B parameters (dense),model,https://arxiv.org/pdf/2211.01786.pdf,https://huggingface.co/bigscience/bloomz,unknown,"Jean Zay (288 A100 80GB GPUs with 8 GPUs per node (36 nodes) using NVLink 4 inter-gpu connects, 4 OmniPath links)",unknown,,,,,,,
limited,"Evaluated on MMLU, GSM8K, HumanEval, and GPQA benchmarks, among others.",2024-02-12,,[],"Reka Flash is a multimodal, multilingual, state-of-the-art 21B model trained entirely from scratch.",,,,,unknown,"image, text, video; text",unknown,Reka Flash,USA,Reka,,,,21B parameters (dense),model,https://reka.ai/reka-flash-an-efficient-and-capable-multimodal-language-model/,,unknown,unknown,unknown,,,,,,,
limited,"Reka Core was evaluated against leading models such as OpenAIs GPT-4, Claude-3 Opus, and Gemini Ultra on a variety of tasks and metrics including multimodal and human evaluation conducted by a third party. It was found to be competitive or even surpassing these models.",2024-04-15,,[],"Reka Core is a frontier-class multimodal language model comparable to industry leaders. It has powerful capabilities including multimodal understanding (including images, videos, and audio), superb reasoning abilities, code generation, and multilinguality with proficiency in 32 languages.",,unknown,,"Reka Core can be used in e-commerce, social media, digital content and video games, healthcare, robotics, and other industries for tasks that require multimodal understanding, coding, complex reasoning, and more.",unknown,"audio, image, text, video; text",unknown,Reka Core,USA,Reka,unknown,,,unknown,model,https://www.reka.ai/news/reka-core-our-frontier-class-multimodal-language-model,,unknown,thousands of GPUs,few months,,,,,,,
open,Mochi 1 sets a new best-in-class standard for open-source video generation. It also performs very competitively with the leading closed models... We benchmark prompt adherence with an automated metric using a vision language model as a judge following the protocol in OpenAI DALL-E 3. We evaluate generated videos using Gemini-1.5-Pro-002.,2025-01-14,,"['DDPM', 'DreamFusion', 'Emu Video', 'T5-XXL']","Mochi 1 is an open-source video generation model designed to produce high-fidelity motion and strong prompt adherence in generated videos, setting a new standard for open video generation systems.",,unknown,,"Advance the field of video generation and explore new methodologies. Build innovative applications in entertainment, advertising, education, and more. Empower artists and creators to bring their visions to life with AI-generated videos. Generate synthetic data for training AI models in robotics, autonomous vehicles and virtual environments.",Apache 2.0,text; video,unknown,Mochi 1,USA,Genmo,unknown,robust safety moderation protocols in the playground to ensure that all video generations remain safe and aligned with ethical guidelines.,,10B parameters,model,https://www.genmo.ai/blog,unknown,unknown,unknown,unknown,,,,,,,
limited,,2023-03-14,,['GPT-4 API'],An AI-powered assistant that functions as both a virtual tutor for students and a classroom assistant for teachers.,,,,,unknown,,,Khanmigo,USA,Khan Academy,,,,,application,https://www.khanacademy.org/khan-labs#khanmigo,,,,,,,,,,,
limited,,2023-02-23,,"['ChatGPT API', 'Sonantic AI']","The DJ is a personalized AI guide that knows you and your music taste so well that it can choose what to play for you. This feature, first rolling out in beta, will deliver a curated lineup of music alongside commentary around the tracks and artists we think you’ll like in a stunningly realistic voice.",,,,,custom,,,AI DJ,Sweden,Spotify,,,,,application,https://newsroom.spotify.com/2023-02-22/spotify-debuts-a-new-ai-dj-right-in-your-pocket/,,,,,,,,,https://www.spotify.com/us/legal/end-user-agreement/,,
limited,,,,['OpenAI API'],"Viable analyzes qualitative consumer feedback and provides summary feedback to companies.
",,unknown,,"Intended to be used by companies to digest qualitative consumer feedback.
",unknown,,unknown,Viable,USA,Viable,"Prohibited uses are listed in the Terms of Service [[Terms of Service]](https://www.askviable.com/terms-of-service). The terms don't include statements specific to the use of the content generated by the system or GPT-3.
",unknown,,,application,https://www.askviable.com/,,,,,unknown,unknown,unknown,"Question and answer, summarization, sentiment analysis, topic identification",https://www.askviable.com/terms-of-service,unknown,
closed,,2023-02-20,,['LAION-5B'],,"We eliminate duplicates, low resolution images, and images potentially contain harmful content from the LAION dataset.",,,,unknown,"image, text",,LAION-1B,China,Alibaba,,,[],1B image-text pairs,dataset,https://arxiv.org/pdf/2302.09778.pdf,,,,,,,,,,,
closed,,2023-02-20,,"['ImageNet', 'WebVision', 'LAION-1B']",,,,,,unknown,"image, text; image",,Composer,China,Alibaba,,,,4.4B parameters (dense),model,https://arxiv.org/pdf/2302.09778.pdf,,,,,,,,,,,
open,"Evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, etc.",2023-08-03,,[],"QWEN is a comprehensive language model series that encompasses distinct models with varying parameter counts. Qwen series, now including Qwen, the base language models, namely Qwen-7B and Qwen-14B, as well as Qwen-Chat, the chat models, namely Qwen-7B-Chat and Qwen-14B-Chat. ",,,,,custom,"image, text; text","Governed by the laws of China, without regard to conflict of law principles, and the UN Convention on Contracts for the International Sale of Goods does not apply to this Agreement. And The People's Courts in Hangzhou City shall have exclusive jurisdiction over any dispute arising out of this Agreement.",Qwen,China,Alibaba,,"They filter out low-quality data, they employ a combination of rule-based and machine-learning-based methods. Specifically, they use multiple models to score the content, including language models, text-quality scoring models, and models for identifying potentially offensive or inappropriate content. They also manually sample texts from various sources and review them to ensure their quality. To further enhance the quality of our data, they selectively up-sample data from certain sources, to ensure that our models are trained on a diverse range of high-quality content.",,14B parameters (dense),model,https://arxiv.org/abs/2309.16609,https://huggingface.co/Qwen,unknown,,,,,,,,,
open,"Base models are evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, MBPP, BBH, CMMLU, all standard English and Chinese benchmarks, and chat models are evaluated on Chatbot Arena, AlpacaEval, MT-Bench, etc.",2024-02-04,,[],"Qwen 1.5 is the next iteration in their Qwen series, consisting of Transformer-based large language models pretrained on a large volume of data, including web texts, books, codes, etc.",,https://huggingface.co/Qwen/Qwen1.5-72B/discussions,,,custom,text; text,unknown,Qwen 1.5,China,Alibaba,,unknown,,72B parameters (dense),model,https://qwenlm.github.io/blog/qwen1.5/,https://huggingface.co/Qwen/Qwen1.5-72B,unknown,unknown,unknown,,,,,,,
open,"Base models are evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, MBPP, BBH, CMMLU, all standard English and Chinese benchmarks, and chat models are evaluated on Chatbot Arena, AlpacaEval, MT-Bench, etc.",2024-03-28,,[],"Qwen 1.5 is the next iteration in their Qwen series, consisting of Transformer-based large language models pretrained on a large volume of data, including web texts, books, codes, etc. Qwen 1.5 MoE is the MoE model of the Qwen 1.5 series.",,https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B/discussions,,,custom,text; text,unknown,Qwen 1.5 MoE,China,Qwen Team,,unknown,,14B parameters with 2.7B parameters for activation (MoE),model,https://qwenlm.github.io/blog/qwen-moe/,https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B,unknown,unknown,unknown,,,,,,,
open,"The model was evaluated on 3 benchmarks (MMLU for English, M3Exam (M3e) for English, Chinese, Vietnamese, Indonesian, and Thai, and VMLU for Vietnamese) and it outperformed GPT-3 and Vistral-7B-chat models across these benchmarks in the given languages.",2024-04-12,,['Gemma'],SeaLLM v2.5 is a multilingual large language model for Southeast Asian (SEA) languages.,,https://huggingface.co/SeaLLMs/SeaLLM-7B-v2.5/discussions,,"The model is intended for multilingual tasks such as knowledge retrieval, math reasoning, and instruction following. Also, it could be used to provide multilingual assistance.",custom,text; text,unknown,SeaLLM v2.5,China,"DAMO Academy, Alibaba","The model should not be used in a way that could lead to inaccurate, misleading or potentially harmful generation. Users should comply with local laws and regulations when deploying the model.","Despite efforts in red teaming and safety fine-tuning and enforcement, the creators suggest, developers and stakeholders should perform their own red teaming and provide related security measures before deployment, and they must abide by and comply with local governance and regulations.",,7B parameters,model,https://github.com/DAMO-NLP-SG/SeaLLMs,https://huggingface.co/SeaLLMs/SeaLLM-7B-v2.5,unknown,unknown,unknown,,,,,,,
closed,,2023-03-30,https://arxiv.org/pdf/2303.17564.pdf#section.2,[],"A comprehensive dataset consisting of a range of English financial documents including news, filings, press releases, web-scraped financial documents, and social media drawn from the Bloomberg archives that was used to train the BloombergGPT model.",,,"FinPile consists of English financial documents. Authors utilize the The Bloomberg
Terminal, which is an extensive collection of curated and maintained documents,
to create the FinPile dataset. Each document in FinPile is time-stamped, with
dates ranging from 2007-03-01 to 2022-07-31.
Types of data included are given below:
  1. Web (298B tokens) - Inclues Bloomberg's web crawl focused on high-quality
websites that have financially relevant information. This makes up the majority
of FinPile.
  2. News (38B tokens) - Includes all news sources relevant to the financial
community, excluding news articles written by Bloomberg journalists. Overall,
there are hundreds of English news sources in FinPile including ""Bloomberg
Transcripts"", which are transcripts of Bloomberg TV news.
  3. Filings (14B tokens) - Includes financial statements prepared by (public)
companies and made available to the general public.  In the dataset, a majority
of the filings come from EDGAR, which is the SEC's online database.
  4. Press (9B tokens) - Includes press releases typically issued by companies
that are financially relevant.
  5. Bloomberg (5B tokens) - Includes Bloomberg authored news and other documents
such as opinions and analyses. The largest sources are “Bloomberg News” and
“Bloomberg First Word”, the Bloomberg-authored wire of real-time news.
",Used to train the BloombergGPT model.,unknown,text,,FinPile,USA,Bloomberg,,,[],363B tokens,dataset,https://arxiv.org/pdf/2303.17564.pdf#section.2,,,,,,,,,,,
closed,"Authors evaluate the performance of BloombergGPT on two broad categories of tasks, finance-specific and general purpose, on several standard benchmarks. They compare BloombergGPT to the three closest models: GPT-NeoX, OPT-66B and BLOOM-176B. They also report results from the original GPT-3 whenever externally available. They conclude ""We achieve strong results on general LLM benchmarks and outperform comparable models on financial tasks. We attribute this, in decreasing order of impact, to 1. a well-curated internal dataset, 2. our unique choice in tokenizer, and 3. an up-to-date architecture.""
",2023-03-30,,"['FinPile', 'The Pile', 'C4', 'Wikipedia']",BloombergGPT is a 50 billion parameter large language model that is specifically trained on a wide range of financial data to support a diverse set of natural language processing tasks within the financial industry.,,,,"""This model will assist Bloomberg in improving existing financial NLP tasks, such as sentiment analysis, named entity recognition, news classification, and question answering, among others. Furthermore, BloombergGPT will unlock new opportunities for marshalling the vast quantities of data available on the Bloomberg Terminal to better help the firm's customers, while bringing the full potential of AI to the financial domain.""
",unknown,text; text,,BloombergGPT,USA,Bloomberg,,"Authors state the following:
- ""To provide natural language applications to the financial community, we
  have developed a rigorous risk and testing assessment process. This process
  includes careful annotation guidelines Tseng et al. (2020), pre-launch review
  at multiple levels by the central risk and compliance organizations, and
  by the product leaders (e.g., the newsroom) as applicable, and post-launch
  monitoring. Moreover, we conduct our research, development, and deployment
  of NLP and AI systems in accordance with all applicable regulations.""
- ""Similarly, toxicity and bias are areas where, as a company, we take extraordinary
  care with any content we produce, whether from humans or machines. Since
  the measurement of toxicity and bias in our model depends on its application
  areas, quantifying the potential for the generation of harmful language
  remains an open question. We are particularly interested in studying whether
  FinPile, which is cleaner and contains fewer examples of overtly biased
  or toxic language (e.g., Press Releases), reduces the proclivity of the
  model to generate inappropriate content.""
",,50B parameters (dense),model,https://arxiv.org/abs/2303.17564,,unknown,64 Amazon EC2 p4d.24xlarge instances each with 8 NVIDIA 40GB A100 GPUs (i.e. total 512 A100 GPUs),53 days,,,,,,,
closed,"MassiveText data was analyzed for toxicity, language distribution, URL breakdown, and tokenizer compression rates on the subsets [[Section A.2]](https://arxiv.org/pdf/2112.11446.pdf#subsection.A.2).
",2021-12-08,https://arxiv.org/pdf/2112.11446.pdf#subsection.A.5,[],"The MassiveText dataset was used to train the Gopher model.
","Documents that are not in English are excluded.
",unknown,"MassiveText data come from 6 sources: MassiveWeb (48%), Books (27%), C4 (10%), News (10%), GitHub (3%), and Wikipedia (2%). MassiveWeb is a web text corpus curated for MassiveText.
","Pre-training of language models by DeepMind researchers [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.C).
",unknown,"code, text",unknown,MassiveText,UK,Google Deepmind,unknown,"The authors use simple heuristics for filtering low quality documents as opposed to relying on a classifier based on a ""gold"" set such as the English Wikipedia, which could ""inadvertently bias towards a certain demographic or erase certain dialects or sociolects from representation."" MassiveWeb subset was filtered using Google’s SafeSearch filter, preferring it over to word filters that ""disproportinately filter out inoffensive content associated with minority groups. MassiveWeb was filtered further for word or phrase repetitions. All the subsets were filtered for document deduplication and test set contamination"" [[Appendix A]](https://arxiv.org/pdf/2112.11446.pdf#appendix.A).
",[],10.5 TB,dataset,https://arxiv.org/pdf/2112.11446.pdf,,,,,,,,,,,
closed,,2022-04-29,https://arxiv.org/pdf/2204.14198.pdf#appendix.F,[],"M3W (MassiveWeb) is dataset used to train Flamingo, and other vision-language models and was created by researchers and engineers.
",unknown,,"M3W has interleaved images (185M) and text (182GB) from the web.
","Pre-training of vision and language models by DeepMind researchers [[Datasheet]](https://arxiv.org/pdf/2204.14198.pdf#appendix.F).
",unknown,"code, text",unknown,M3W,UK,Google Deepmind,unknown,"The authors provide a basic description of data processing and cleaning.
",[],"182GB Text, 185M Images",dataset,https://arxiv.org/pdf/2204.14198.pdf,,,,,,,,,,,
closed,"The Gato dataset compiles many datasets introduced in prior works, with associated analyses.
",2022-05-12,,['MassiveText'],"The Gato datasets are a collection of data used to train the Gato model.
",,,"The full composition of the dataset across individual sources can be found in the paper.
",unknown,unknown,"image, text, robotics trajectories, simulated control tasks",,Gato dataset,UK,Google Deepmind,unknown,unknown,[],"10.5 TB Text, 2.2B Text-Image pairs, 1.5T tokens of simulated control, 500k robotics trajectories",dataset,https://www.deepmind.com/blog/a-generalist-agent,,,,,,,,,,,
open,,2021-07-15,,['Protein Data Bank'],AlphaFold2 is a protein language model trained on protein sequences,,,,,Apache 2.0,amino acid sequence; protein structure,,AlphaFold2,UK,Google Deepmind,,,,93M parameters (dense),model,https://www.nature.com/articles/s41586-021-03819-2,,,128 TPUv3 cores,11 days,,,,,,,
closed,"Model performance was evaluated on image and video datasets primarily, including dialogue.
",2022-04-29,,"['M3W', 'ALIGN', 'LTIP', 'VTP', 'Chinchilla']","Flamingo is a Visual Language Model using the Transformer architecture that is intended for few-shot learning.
",,,,"The intended uses are stated in the model card: ""The primary use is research on visual language models (VLM), including: research on VLM applications like classification, captioning or visual question answering, understanding how strong VLMs can contribute to AGI, advancing fairness and safety research in the area of multimodal research, and understanding limitations of current large VLMs."" [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).
",unknown,"image, text; text",unknown,Flamingo,UK,Google Deepmind,"The model card lists the following as out of scope uses of the model: ""Uses of the model for visually conditioned language generation in harmful or deceitful settings. Broadly speaking, the model should not be used for downstream applications without further safety and fairness mitigations specific to each application."" [[Model Card]](https://arxiv.org/pdf/2204.14198.pdf#appendix.E).
",,,80B parameters (dense),model,https://arxiv.org/pdf/2204.14198.pdf,https://arxiv.org/pdf/2204.14198.pdf#appendix.E,unknown,TPU,15 days on 1536 TPUs,,,,,,,
closed,,2022-02-02,,[],AlphaCode is an autoregressive language model trained on code,,,,,unknown,text; code,,AlphaCode,UK,Google Deepmind,,,,41B parameters (dense),model,https://arxiv.org/abs/2203.07814,,,,,,,,,,,
closed,"Model performance was evaluated and analyzed on 152 NLP tasks including: Language Modelling (20), Reading Comprehension (3), Fact Checking (3), Question Answering (3), Common Sense (4), MMLU (57), BIG-bench (62) [[Section 4]](https://arxiv.org/pdf/2112.11446.pdf#section.4); on toxicity and bias datasets [[Section 5]](https://arxiv.org/pdf/2112.11446.pdf#section.5); and on dialogue tasks [[Section 6]](https://arxiv.org/pdf/2112.11446.pdf#section.6).
",2021-12-08,,['MassiveText'],"Gopher is an autoregressive language model based on the Transformer architecture with two modifications: using RMSNorm instead of LayerNorm and using relative positional encoding scheme instead of absolute positional encodings [[Section 3]](https://arxiv.org/pdf/2112.11446.pdf#subsection.3.1).
",,"The feedback for the model can be provided at the email linked in the model card, geoffreyi at google.com [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
",,"The intended uses are stated in the Gopher model card: ""The primary use is research on language models, including: research on NLP applications like machine translation and question answering, understanding how strong language models can contribute to AGI, advancing fairness and safety research, and understanding limitations of current LLMs"" [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
",unknown,text; code,unknown,Gopher,UK,Google Deepmind,"The model card lists the following as out of scope uses of the model: ""for language generation in harmful or deceitful settings. More generally, the model should not be used for downstream applications without further safety and fairness mitigations"" [[Model Card]](https://arxiv.org/pdf/2112.11446.pdf#appendix.B).
",,,280B parameters (dense),model,https://arxiv.org/pdf/2112.11446.pdf,https://arxiv.org/pdf/2112.11446.pdf#appendix.B,380 tCO2e,TPUv3 pods,7303.24 petaflop/s-day,,,,,,,
closed,"Model performance was evaluated and analyzed on many NLP tasks including language modeling, reading comprehension, question answering, commonsense-intensive tasks, and the BIG-Bench and MMLU meta-benchmarks.
",2022-03-29,,['MassiveText'],"Chinchilla is an autoregressive language model based on the Transformer architecture with improved scaling laws.
",,"The feedback for the model can be provided at the email linked in the model card, {jordanhoffmann, sborgeaud, amensch,sifre} at deepmind.com [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).
",,"The intended uses are stated in the Chinchilla model card: ""The primary use is research on language models, including: research on the scaling behaviour of language models along with those listed in Gopher paper"" [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).
",unknown,text; code,Unknown,Chinchilla,UK,Google Deepmind,"The model card lists the following as out of scope uses of the model: ""for language generation in harmful or deceitful settings. More generally, the model should not be used for downstream applications without further safety and fairness mitigations"" [[Model Card]](https://arxiv.org/pdf/2203.15556.pdf#appendix.I).
",,,70B parameters (dense),model,https://arxiv.org/pdf/2203.15556.pdf,https://arxiv.org/pdf/2203.15556.pdf,Unknown,TPUv3/TPUv4 pods,7303.24 petaflop/s-day,,,,,,,
closed,"Model performance was evaluated on simulated and robotics task primarily, including out-of-distribution and skill generalization.
",2022-05-12,,['Gato dataset'],"Gato is a generalist agent based on sequence modeling using the Transformer architecture to implement multi-modal, multi-task, multi-embodiment generalist policy.
",,"The feedback for the model can be provided at the email linked in the model card, reedscot at google.com [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
",,"The intended uses are stated in the Gopher model card: ""Learn to accomplish a wide variety of tasks from expert demonstrations, such as playing video games, controlling simulated embodiments, and real world block stacking."" [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
",unknown,"image, text; image, text, robotics trajectories",unknown,Gato,UK,Google Deepmind,"The model card lists the following as out of scope uses of the model: ""Not intended for commercial or production use. Military uses are strictly prohibited."" [[Model Card]](https://openreview.net/pdf?id=1ikK0kHjvj#appendix.A).
",,,1.2B parameters (dense),model,https://www.deepmind.com/blog/a-generalist-agent,https://openreview.net/pdf?id=1ikK0kHjvj#appendix.B,unknown,16x16 TPU v3 slice,4 days on a 16x16 TPU v3 slice,,,,,,,
closed,,2022-09-28,,"['Chinchilla', 'Google Search', 'Sparrow Rule reward model', 'Sparrow Preference reward model']",,,,,,unknown,text; text,,Sparrow,UK,Google Deepmind,,,,70B parameters (dense),model,https://arxiv.org/abs/2209.14375,,,,,,,,,,,
closed,,2021-12-08,,['MassiveText'],,,,,,unknown,text; text,,RETRO,UK,Google Deepmind,,,,7.5B parameters (dense),model,https://arxiv.org/abs/2112.04426,,,,,,,,,,,
closed,,2022-09-28,,"['Chinchilla', 'Sparrow adversarial probing dataset']",,,,,,unknown,text; text,,Sparrow Rule reward model,UK,Google Deepmind,,,,70B parameters (dense),model,https://arxiv.org/abs/2209.14375,,,,,,,,,,,
closed,,2022-09-28,,"['Chinchilla', 'Sparrow response preference dataset']",,,,,,unknown,text; text,,Sparrow Preference reward model,UK,Google Deepmind,,,,70B parameters (dense),model,https://arxiv.org/abs/2209.14375,,,,,,,,,,,
closed,,2022-09-28,,['Chinchilla'],,,,,,unknown,text,,Sparrow adversarial probing dataset,UK,Google Deepmind,,,[],27k ratings,dataset,https://arxiv.org/abs/2209.14375,,,,,,,,,,,
closed,,2022-09-28,,['Chinchilla'],,,,,,unknown,text,,Sparrow response preference dataset,UK,Google Deepmind,,,[],72k comparisons,dataset,https://arxiv.org/abs/2209.14375,,,,,,,,,,,
closed,,2022-03-16,,"['Gopher', 'Google Search', 'GopherCite reward model']",,,,,,unknown,text; text,,GopherCite,UK,Google Deepmind,,,,280B parameters (dense),model,https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf,,,,,,,,,,,
closed,,2022-03-16,,"['Gopher', 'GopherCite Preference dataset']",,,,,,unknown,text; text,,GopherCite reward model,UK,Google Deepmind,,,,7B parameters (dense),model,https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf,,,,,,,,,,,
closed,,2022-03-16,,"['Gopher', 'Google Search']",,,,,,unknown,text,,GopherCite Preference dataset,UK,Google Deepmind,,,[],33k response pairs,dataset,https://storage.googleapis.com/deepmind-media/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes/Teaching%20language%20models%20to%20support%20answers%20with%20verified%20quotes.pdf,,,,,,,,,,,
closed,,2022-09-29,,['Chinchilla'],,,,,,unknown,text; text,,Dramatron,UK,Google Deepmind,,,,70B parameters (dense),model,https://arxiv.org/abs/2209.14958,,,,,,,,,,,
open,Evaluated on evaluation trajectories and SoTA baselines using robotic data.,2023-07-28,,"['PaLI-X', 'PaLM-E', 'RT-2 action tokens']",RT-2 is a vision-language-action model for robotic actions that incorporates chain of thought reasoning.,,,,,unknown,"text, video; text, robotics trajectories",,RT-2,UK,Google Deepmind,,,,55B parameters (dense),model,https://arxiv.org/pdf/2307.15818.pdf,,,,,,,,,,,
closed,unknown,2023-11-16,,[],Lyria is DeepMind's most advanced AI music generation model to date.,,,,,unknown,text; music,,Lyria,UK,Google Deepmind,,worked with artists and music industry to ensure utility,,unknown,model,https://deepmind.google/discover/blog/transforming-the-future-of-music-creation/,,unknown,unknown,unknown,,,,,,,
closed,Evaluated using only out-of-distribution image prompts for qualitative results.,2024-02-23,,[],"Gene is a foundation world model trained from Internet videos that can generate an endless variety of playable (action-controllable) worlds from synthetic images, photographs, and even sketches.",,,,,unknown,image; video,,Genie,unknown,Google DeepMind,,,,11B parameters (dense),model,https://sites.google.com/view/genie-2024,,unknown,unknown,unknown,,,,,,,
limited,"The model was tested and evaluated on various prompts to assess its understanding of natural language, its ability to generate high-quality images in various formats and styles and generate fine details and complex textures. Red teaming and evaluations were conducted on topics including fairness, bias, and content safety.",2024-05-14,,[],"Imagen 3 is a high-quality text-to-image model, capable of generating images with better detail, richer lighting, and fewer distracting artifacts compared to previous models. Improved understanding of prompts allows for a wide range of visual styles and captures small details from longer prompts. It also understands prompts written in natural, everyday language, making it easier to use. Imagen 3 is available in multiple versions, optimized for different types of tasks, from generating quick sketches to high-resolution images.",,unknown,,"Generate high-quality images for various purposes, from photorealistic landscapes to textured oil paintings or whimsical claymation scenes. It is useful in situations where detailed visual representation is required based on the textual description.",unknown,text; image,Through digital watermarking tool SynthID embedded in pixels for detection and identification.,Imagen 3,unknown,Google DeepMind,unknown,"Extensive filtering and data labeling were used to minimize harmful content in datasets and reduce the likelihood of harmful outputs. Privacy, safety, and security technologies were leveraged in deploying the model, including watermarking tool SynthID.",,unknown,model,https://deepmind.google/technologies/imagen-3/,,unknown,unknown,unknown,,,,,,,
closed,unknown,2024-05-14,,[],"Veo is Google DeepMind's most capable video generation model to date. It generates high-quality, 1080p resolution videos that can go beyond a minute, in a wide range of cinematic and visual styles. It accurately captures the nuance and tone of a prompt, and provides an unprecedented level of creative control. The model is also capable of maintaining visual consistency in video frames, and supports masked editing.",,Feedback from leading creators and filmmakers is incorporated to improve Veo's generative video technologies.,,"Veo is intended to help create tools that make video production accessible to everyone. It can be used by filmmakers, creators, or educators for storytelling, education and more. Some of its features will be also brought to products like YouTube Shorts.",unknown,text; video,unknown,Veo,unknown,Google DeepMind,unknown,"Videos created by Veo are watermarked using SynthID, DeepMinds tool for watermarking and identifying AI-generated content, and passed through safety filters and memorization checking processes to mitigate privacy, copyright and bias risks.",,unknown,model,https://deepmind.google/technologies/veo/,,unknown,unknown,unknown,,,,,,,
limited,"The model was evaluated on various benchmarks like General MMLU, Code Natural2Code, MATH, GPQA, Big-Bench, WMT23, MMMU, and MathVista providing performance across various domains like multilingual translation, image processing, and code generation.",2024-05-30,,[],"Gemini Flash is a lightweight model, optimized for speed and efficiency. It features multimodal reasoning and a breakthrough long context window of up to one million tokens. It's designed to serve at scale and is efficient on cost, providing quality results at a fraction of the cost of larger models.",,,,"The model is intended for developer and enterprise use cases. It can process hours of video and audio, and hundreds of thousands of words or lines of code, making it beneficial for a wide range of tasks.",Googles Terms and Conditions,"audio, image, text, video; text",unknown,Gemini 1.5 Flash,unknown,Google DeepMind,,The research team is continually exploring new ideas at the frontier of AI and building innovative products for consistent progress.,,unknown,model,https://deepmind.google/technologies/gemini/flash/,,unknown,unknown,unknown,,,,,,,
open,Unknown,2024-06-17,,[],"Gen-3 Alpha is a foundation model trained for large-scale multimodal tasks. It is a major improvement in fidelity, consistency, and motion over the previous generation, Gen-2. Gen-3 Alpha can power various tools, such as Text to Video, Image to Video, and Text to Image. The model excels at generating expressive human characters with a wide range of actions, gestures, and emotions, and is capable of interpreting a wide range of styles and cinematic terminology. It is also a step towards building General World Models. It has been designed for use by research scientists, engineers, and artists, and can be fine-tuned for customization according to specific stylistic and narrative requirements.",,"Companies interested in fine-tuning and custom models can reach out to Runway AI, Inc. using a form on their website.",,"Can be used to create expressive human characters, interpret a wide range of styles and cinematic terminology, and power tools for Text to Video, Image to Video, and Text to Image tasks.","Terms of Use listed on Runway AI, Inc.'s website, specific license unknown","text, image, video; video",The model includes a new and improved in-house visual moderation system.,Gen-3 Alpha,USA,"Runway AI, Inc.",Unknown,"It will be released with a set of new safeguards, including an improved in-house visual moderation system and C2PA provenance standards.",,Unknown,model,https://runwayml.com/research/introducing-gen-3-alpha?utm_source=xinquji,unknown,Unknown,Unknown,Unknown,,,,,,,
limited,"Evaluated on wide range of language benchmarks like MMLU 5-shot, GSM-8K, and HellaSwag 10-shot among others.",2023-06-22,,[],Inflection AI's first version of its in-house LLM. via Inflection AI's conversational API.,,,,,unknown,text; text,,Inflection-1,USA,Inflection AI,,,,unknown,model,https://inflection.ai/inflection-1,,,unknown,,,,,,,,
limited,,2023-05-02,,['Inflection-2.5'],Personal AI chatbot designed to be conversational and specialized in emotional intelligence.,,,,to be used as a personal assistant chatbot for everyday activities,unknown,,,Pi,USA,Inflection AI,,,,,application,https://inflection.ai/press,,,,,,,,natural language text responses,,,
closed,"Evaluated against state of the art models on benchmarks, and found to be most performant model outside of GPT-4.",2023-11-22,,[],"Inflection-2 is the best model in the world for its compute class and the second most capable LLM in the world, according to benchmark evaluation, as of its release.",,,,,unknown,text; text,,Inflection-2,USA,Inflection AI,,,,unknown,model,https://inflection.ai/inflection-2,,unknown,5000 NVIDIA H100 GPUs,unknown,,,,,,,
limited,"Evaluated on standard LLM and technical benchmarks in comparison to Inflection-1 and GPT-4, along with advanced STEM examinations.",2024-03-07,,[],"Inflection-2.5 is an upgraded in-house model that is competitive with all the world's leading LLMs, as of release, like GPT-4 and Gemini.",,,,,unknown,text; text,,Inflection-2.5,USA,Inflection AI,,,,unknown,model,https://inflection.ai/inflection-2-5,,unknown,unknown,unknown,,,,,,,
open,unknown,2024-10-07,,[],"Inflection for Enterprise, powered by our industry-first, enterprise-grade AI system, Inflection 3.0.",,So please drop us a line. We want to keep hearing from enterprises about how we can help solve their challenges and make AI a reality for their business.,,unknown,unknown,unknown,unknown,Inflection 3.0,USA,Inflection AI,unknown,unknown,,unknown,model,https://inflection.ai/blog/enterprise,unknown,unknown,unknown,unknown,,,,,,,
limited,,2023-02-21,,['ChatGPT API'],"With the alliance, Bain will combine its deep digital implementation capabilities and strategic expertise with OpenAI’s AI tools and platforms, including ChatGPT, to help its clients around the world identify and implement the value of AI to maximize business potential.",,,,,unknown,,,Bain Chat,USA,Bain,,,,,application,https://www.bain.com/vector-digital/partnerships-alliance-ecosystem/openai-alliance/,,,,,,,,,,,
open,Evaluated on a range of benchmarks and performed on par with LLaMA-7B.,2023-05-05,,"['RedPajama-Data', 'C4', 'The Stack', 'Multimodal C4']",MPT is a series of large language models seeking to address the limitations of other open source models like LLaMA and Pythia.,,,,,Apache 2.0,text; text,,MPT,USA,Mosaic,,,,7B parameters (dense),model,https://www.mosaicml.com/blog/mpt-7b,,unknown,440 A100 40GB GPUs,9.5 days,,,,,,,
open,"Compared to Stable Diffusion 2, a SOTA text-to-image model.",2023-10-25,,['CommonCatalog'],CommonCanvas is a text-to-image model trained solely on Creative Commons licensed images.,,,,,Apache 2.0,text; image,,CommonCanvas,unknown,"Cornell University, Mosaic",,,,unknown,model,https://arxiv.org/pdf/2310.16825.pdf,,unknown,128 A100 NVIDIA GPUs,6.79 days,,,,,,,
open,,2023-10-25,https://github.com/mosaicml/diffusion/blob/main/assets/common-canvas.md,"['YFCC100M', 'BLIP-2']",CommonCatalog is a curated dataset of CommonCrawl images and synthetic captions.,images with non-derivative licenses,,images with derivative licenses,,Apache 2.0,image-caption pairings,,CommonCatalog,USA,Mosaic,,,[],70M images,dataset,https://arxiv.org/pdf/2310.16825.pdf,,,,,,,,,,,
open,unknown,2024-10-08,,[],"XTTS-v2 is a voice generation model that allows voice cloning into different languages using a brief 6-second audio clip, supporting 17 languages with features like emotion and style transfer, cross-language voice cloning, and multi-lingual speech generation. It powers Coqui Studio and Coqui API, with improvements in architectural and prosody aspects for better audio quality.",,"Users can join the Coqui community on Discord, engage on Twitter, or send emails to info@coqui.ai for feedback and queries.",,"Voice cloning, multi-lingual speech generation, emotion and style transfer in speech.",Coqui Public Model,audio; audio,unknown,XTTS-v2,USA,Coqui,unknown,unknown,,unknown,model,https://huggingface.co/coqui/XTTS-v2,https://huggingface.co/coqui/XTTS-v2,unknown,unknown,unknown,,,,,,,
limited,,2023-03-14,,['GPT-4 API'],The first-ever digital visual assistant powered by OpenAI’s new GPT-4 language model.,,,,,unknown,,,Virtual Volunteer,Denmark,Be My Eyes,,,,,application,https://www.bemyeyes.com/blog/introducing-be-my-eyes-virtual-volunteer,,,,,,,,,,,
limited,"Extensive testing on Multilingual Librispeech dataset resulted in 20% lower validation perplexity. In downstream evaluations, this leads to a 2x lower word error rate and a 1 point higher quality score. Sonic also displays impressive performance metrics at inference, achieving lower latency (1.5x lower time-to-first-audio), faster inference speed (2x lower real-time factor), and higher throughput (4x).",2024-05-29,,['Multilingual Librispeech dataset'],"Sonic is a low-latency voice model that generates lifelike speech. Developed by Cartesia, it was designed to be an efficient real-time AI capable of processing any-sized contexts and running on any device.",,Contact through the provided form or via email at join@cartesia.ai.,,"Sonic has potential applications across customer support, entertainment, and content creation and is a part of Cartesias broader mission to bring real-time multimodal intelligence to every device.",unknown,text; audio,unknown,Sonic,USA,Cartesia,unknown,,,2024-05-29,model,https://cartesia.ai/blog/sonic,,unknown,unknown,unknown,,,,,,,
open,,2023-11-07,,['Mistral'],SciPhi Mistral is a Large Language Model (LLM) fine-tuned from Mistral.,,https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k/discussions,,,MIT,text; text,unknown,SciPhi Mistral,USA,SciPhi,,,,7B parameters (dense),model,https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k,https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k,unknown,unknown,unknown,,,,,,,
closed,,2023-09-26,,[],"A.X is SK Telecom's proprietary LLM, which has been trained on the Korean language.",,,,,unknown,text; text,,A.X,South Korea,SK Telecom,,,,39B parameters,model,https://www.sktelecom.com/en/press/press_detail.do?idx=1582,,unknown,unknown,unknown,,,,,,,
open,,2021-08-20,https://laion.ai/blog/laion-400-open-dataset/,"['CLIP', 'CommonCrawl']","LAION-400M is a dataset with CLIP-filtered 400 million image-text pairs, their CLIP embeddings and kNN indices that allow efficient similarity search. This dataset is entirely openly, freely accessible.","Authors apply the following filtering conditions on the WAT files downloaded from Common Crawl: ""All samples with less than 5 character alt-text length or less than 5 KB image size are dropped. Duplicate removal is performed with bloom filter based on URL and alt-text. We use CLIP to compute embeddings of the image and alt-text. Then we compute the cosine similarity of both embeddings and drop all samples with cosine similarity below 0.3. This threshold was selected based on human inspections. We use the CLIP embeddings of images and texts to filter out illegal contents.""
",,,"The authors recommend using the dataset ""for research purposes"" and warn that ""this large-scale dataset is non-curated. It was built for research purposes to enable testing model training on larger scale for broad researcher and other interested communities, and is not meant for any real-world production or application.""",CC BY 4.0,"image, text",,LAION-400M,Germany,LAION,No uses are explicitly prohibited by the license. Users are warned from using LAION-400M for any real-world production or application.,"The authors use  CLIP embeddings of images and texts to filter out illegal contents. They also use CLIP to tag image-text pairs as NSFW. They note that less than 1% of images were detected as NSFW, which can be filtered out by an user with NSFW tag.",[],400M image-text pairs,dataset,https://laion.ai/blog/laion-400-open-dataset/,,,,,,,,,,,
open,,2022-12-12,https://laion.ai/blog/laion-5b/,"['CLIP', 'mCLIP', 'CommonCrawl']",LAION is a dataset of 5 billion image-text pairs from the Internet,"All samples with less than 5 characters alt-text length or less than 5 KB image size are dropped. All images with the too big resolution, potentially DOS bombs, were dropped before attempting to process them. Duplicate removal is performed with a bloom filter based on URL. Future runs would include more variate deduplication rules, such as URL + language for the multilanguage dataset. We use CLIP respectively MCLIP to compute embeddings of the image and alt-text. Then we compute the cosine similarity of both embeddings and drop all samples with cosine similarity below 0.28 for the English language ( with CLIP B/32) and 0.26 for the multilingual dataset (MCLIP). These thresholds were selected based on human inspection of the test results. We use the CLIP embeddings of images and texts to filter out to the possible extent the illegal content.",,,"The authors recommend using the dataset ""for research purposes"" and ""do not recommend using it for creating ready-to-go industrial products, as the basic research about general properties and safety of such large-scale models, which we would like to encourage with this release, is still in progress""",CC BY 4.0,"image, text",,LAION-5B,Germany,LAION,No uses are explicitly prohibited by the license. Users are warned from using LAION-5B for non-research purposes.,,[],5B image-text pairs,dataset,https://laion.ai/blog/laion-5b/,,,,,,,,,,,
open,,2022-12-12,https://laion.ai/blog/laion-5b/,"['CLIP', 'LAION-5B']",LAION-2B-en is a subset of the LAION-5B dataset and contains 2.3 billion English image-text pairs.,"All samples with less than 5 characters alt-text length or less than 5 KB image size are dropped. All images with the too big resolution, potentially DOS bombs, were dropped before attempting to process them. Duplicate removal is performed with a bloom filter based on URL. Future runs would include more variate deduplication rules, such as URL + language for the multilanguage dataset. We use CLIP respectively MCLIP to compute embeddings of the image and alt-text. Then we compute the cosine similarity of both embeddings and drop all samples with cosine similarity below 0.28 for the English language ( with CLIP B/32) and 0.26 for the multilingual dataset (MCLIP). These thresholds were selected based on human inspection of the test results. We use the CLIP embeddings of images and texts to filter out to the possible extent the illegal content.",,,"The authors recommend using the dataset ""for research purposes"" and ""do not recommend using it for creating ready-to-go industrial products, as the basic research about general properties and safety of such large-scale models, which we would like to encourage with this release, is still in progress""",CC BY 4.0,"image, text",,LAION-2B-en,Germany,LAION,No uses are explicitly prohibited by the license. Users are warned from using LAION-2B-en for non-research purposes.,,[],2.32B image-text pairs,dataset,https://arxiv.org/pdf/2210.08402.pdf,,,,,,,,,,,
open,Evaluated on COCO captioning and VQAv2 vision-language tasks.,2023-03-28,,"['LLaMA', 'CLIP']","An open-source reproduction of DeepMind's Flamingo model. At its core, OpenFlamingo is a framework that enables training and evaluation of large multimodal models (LMMs).",,,,academic research purposes,MIT,"image, text; text",,OpenFlamingo,Germany,LAION,commercial use,,,9B parameters (dense),model,https://laion.ai/blog/open-flamingo/,https://github.com/mlfoundations/open_flamingo/blob/main/MODEL_CARD.md,,,,,,,,,,
open,Evaluated on MT-Bench and AlphaEval benchmarks.,2023-12-01,,"['UltraFeedback', 'Zephyr']","Notus is an open source LLM, fine-tuned using Direct Preference Optimization (DPO) and AIF (AI Feedback) techniques.",,https://huggingface.co/argilla/notus-7b-v1/discussions,,Academic research and free commercial usage,MIT,text; text,,Notus,Belgium,Argilla,,,,7B parameters (dense),model,https://argilla.io/blog/notus7b/,https://huggingface.co/argilla/notus-7b-v1,unknown,8 x A100 40GB GPUs,unknown,,,,,,,
closed,unknown,2021-11-15,https://docs.cohere.ai/data-statement,[],"The family of datasets used to train Cohere models, which come in two forms: coheretext-filtered and coheretext-unfiltered. The former is used to train the Representation models, while the latter one is used to train the Generation models.
","Documents that are not in English are excluded.
",unknown,"As stated in the datasheet, the dataset ""includes the Google Books dataset, CommonCrawl, and text from the internet scraped by the Cohere infrastructure team."" The top ten domains scraped were: wordpress.com, medium.com, stackexchange.com, tumblr.com, elsevier.com, genius.com, bbc.co.uk, libsyn.com, yahoo.com, nytimes.com [[Datasheet]](https://docs.cohere.ai/data-statement).
","The intended use of the dataset is to train Cohere's language models.
",unknown,text; text,unknown,coheretext,Canada,Cohere,unknown,"In the datasheet, it is implied that Cohere employs filtration methods for removing racist, biased and toxic content, but the details are not provided. These filtration methods take both the context and the language, as opposed to using a list of blockwords [[Datasheet]](https://docs.cohere.ai/data-statement).
",[],200 GB,dataset,,,,,,,,,,,,
limited,"The model's performance was analyzed on Hellaswag and COPA, as well as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card).",2021-11-15,,['coheretext'],"The Generations model is a language model trained by Cohere for generation tasks.
",,unknown,,"On the model card, the intended uses are stated as ""interactive autocomplete, augmenting human writing processes, summarization, text rephrasing, and other text-to-text tasks in non-sensitive domains"" [[Model Card]](https://docs.cohere.ai/generation-card).
",unknown,text; text,"The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/generation-card).
",Cohere Base,Canada,Cohere,"The usage of the model is bound by the Cohere usage guidelines [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive list of specific application violating these guidelines are: astroturfing, generation of misinformation and other harmful content, and ""generation of text about people, places, or events without a human-in-the-loop"" [[Model Card]](https://docs.cohere.ai/generation-card).
",unknown,,unknown,model,,https://docs.cohere.ai/generation-card,unknown,unknown,unknown,,,,,,,
limited,"The model's performance was analyzed on Hellaswag and COPA, as well as several safety benchmarks [[Model Card]](https://docs.cohere.ai/generation-card).",2023-01-01,,['Cohere Base'],"This model is a generative model optimized to follow commands in the prompt.
",,unknown,,"On the model card, the intended uses are stated as ""interactive autocomplete, augmenting human writing processes, summarization, text rephrasing, and other text-to-text tasks in non-sensitive domains"" [[Model Card]](https://docs.cohere.ai/generation-card).
",unknown,text; text,"The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/generation-card).
",Cohere Command,Canada,Cohere,"The usage of the model is bound by the Cohere usage guidelines [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive list of specific application violating these guidelines are: astroturfing, generation of misinformation and other harmful content, and ""generation of text about people, places, or events without a human-in-the-loop"" [[Model Card]](https://docs.cohere.ai/generation-card).
",unknown,,unknown,model,https://docs.cohere.com/docs/command-beta,https://docs.cohere.ai/generation-card,unknown,unknown,unknown,,,,,,,
limited,"The model's performance was analyzed on several safety benchmarks [[Model Card]](https://docs.cohere.ai/representation-card).
",2021-11-15,,[],"The Embedding Large (English) model is a language model trained by Cohere for tasks requiring embeddings.
",,unknown,,"The intended uses are stated as ""estimating semantic similarity between two sentences, choosing a sentence which is most likely to follow another sentence, sentiment analysis, topic extraction, or categorizing user feedback"" on the Cohere model card [[Model Card]](https://docs.cohere.ai/representation-card).
",unknown,text; text,"The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/representation-card).
",Cohere Embed (English),Canada,Cohere,"The usage of the model is bound by the Cohere usage guidelines [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive list of specific application violating these guidelines are: extraction of identity and demographic information, building purposefully opaque text classification systems, and ""building downstream classifiers that serve as automated decision-making systems that have real-world consequences on people, where those decisions are made without a human-in-the-loop"" [[Model Card]](https://docs.cohere.ai/representation-card).
",unknown,,unknown,model,,https://docs.cohere.ai/representation-card,unknown,unknown,unknown,,,,,,,
limited,"The model's performance was analyzed on several safety benchmarks [[Model Card]](https://docs.cohere.ai/representation-card).
",2022-12-12,,[],"This model maps text from 100+ languages to a semantic vector space, positioning text with a similar meaning (regardless of language) in close proximity.
",,unknown,,"The intended uses are stated as ""estimating semantic similarity between two sentences, choosing a sentence which is most likely to follow another sentence, sentiment analysis, topic extraction, or categorizing user feedback"" on the Cohere model card [[Model Card]](https://docs.cohere.ai/representation-card).
",unknown,text; text,"The usage of the model is monitored by Cohere [[Model Card]](https://docs.cohere.ai/representation-card).
",Cohere Embed (Multilingual),Canada,Cohere,"The usage of the model is bound by the Cohere usage guidelines [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines). A non-comprehensive list of specific application violating these guidelines are: extraction of identity and demographic information, building purposefully opaque text classification systems, and ""building downstream classifiers that serve as automated decision-making systems that have real-world consequences on people, where those decisions are made without a human-in-the-loop"" [[Model Card]](https://docs.cohere.ai/representation-card).
",unknown,,unknown,model,https://txt.cohere.ai/multilingual/,https://docs.cohere.ai/representation-card,unknown,unknown,unknown,,,,,,,
limited,,2021-11-15,,"['Cohere Chat Endpoint', 'Cohere Generate Endpoint (legacy)', 'Cohere Embed Endpoint', 'Cohere Classify Endpoint', 'Cohere Summarize Endpoint']","Cohere API allows users to access the cohere language models and utilize them in their applications.
",,"General feedback as well as the violations of the usage guidelines can be reported to Cohere at safety at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
",,"Intended to be used by developers who would like to incorporate NLP into their applications [[Cohere Website]](https://cohere.ai/).
",custom,,"All applications developed using the Cohere API is subject to review by Cohere.
",Cohere API,Canada,Cohere,"The usage of the API is bound by the Cohere usage guidelines. Disallowed use cases include violence and threats, antisocial and antidemocratic uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making, high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
","The new users of the API get a limited access restricting the sizes of the models as well as the number of tokens that can be used. Users are required to go through an internal application to upgrade to full access [[Limited Access]](https://docs.cohere.ai/limited-access).
",,,application,https://cohere.ai/,,,,,unknown,unknown,unknown,generation and embeddings,https://cohere.ai/terms-of-use,unknown,
limited,,2023-09-28,,"['Cohere Command', 'Command R', 'Command R+']","This endpoint generates a text response to a user message in the context of a conversation, optionally grounding its generation with source material from tools like search engines, functions or databases, and attributing its responses with citations.",,"General feedback as well as the violations of the usage guidelines can be reported to Cohere at safety at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
",,"Intended to be used by developers who would like to incorporate NLP into their applications [[Cohere Website]](https://cohere.ai/).
","Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).
",,"All applications developed using the Cohere API is subject to review by Cohere.
",Cohere Chat Endpoint,Canada,Cohere,"The usage of the API is bound by the Cohere usage guidelines. Disallowed use cases include violence and threats, antisocial and antidemocratic uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making, high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
",,,,application,https://docs.cohere.com/reference/chat,,,,,unknown,unknown,unknown,generation,https://cohere.ai/terms-of-use,unknown,
limited,,2021-11-15,,"['Cohere Base', 'Cohere Command']","This endpoint generates realistic text conditioned on a given input.
",,"General feedback as well as the violations of the usage guidelines can be reported to Cohere at safety at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
",,"Intended to be used by developers who would like to incorporate NLP into their applications [[Cohere Website]](https://cohere.ai/).
","Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).
",,"All applications developed using the Cohere API is subject to review by Cohere.
",Cohere Generate Endpoint (legacy),Canada,Cohere,"The usage of the API is bound by the Cohere usage guidelines. Disallowed use cases include violence and threats, antisocial and antidemocratic uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making, high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
","The new users of the API get a limited access restricting the sizes of the models as well as the number of tokens that can be used. Users are required to go through an internal application to upgrade to full access [[Limited Access]](https://docs.cohere.ai/limited-access).
",,,application,https://docs.cohere.ai/reference/generate,,,,,unknown,unknown,unknown,generation,https://cohere.ai/terms-of-use,unknown,
limited,,2021-11-15,,"['Cohere Embed (Multilingual)', 'Cohere Embed (English)']","This endpoint returns text embeddings. An embedding is a list of floating point numbers that captures semantic information about the text that it represents.
",,"General feedback as well as the violations of the usage guidelines can be reported to Cohere at safety at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
",,"Intended to be used by developers who would like to incorporate NLP into their applications [[Cohere Website]](https://cohere.ai/).
","Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).
",,"All applications developed using the Cohere API is subject to review by Cohere.
",Cohere Embed Endpoint,Canada,Cohere,"The usage of the API is bound by the Cohere usage guidelines. Disallowed use cases include violence and threats, antisocial and antidemocratic uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making, high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
","The new users of the API get a limited access restricting the sizes of the models as well as the number of tokens that can be used. Users are required to go through an internal application to upgrade to full access [[Limited Access]](https://docs.cohere.ai/limited-access).
",,,application,https://docs.cohere.ai/reference/embed,,,,,unknown,unknown,unknown,embedding,https://cohere.ai/terms-of-use,unknown,
limited,,2022-05-05,,"['Cohere Embed (Multilingual)', 'Cohere Embed (English)']","This endpoint makes a prediction about which label best fits a specified text input. To make a prediction, Classify uses the provided examples of text + label pairs as a reference.
",,"General feedback as well as the violations of the usage guidelines can be reported to Cohere at safety at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
",,"Intended to be used by developers who would like to incorporate NLP into their applications [[Cohere Website]](https://cohere.ai/).
","Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).
",,"All applications developed using the Cohere API is subject to review by Cohere.
",Cohere Classify Endpoint,Canada,Cohere,"The usage of the API is bound by the Cohere usage guidelines. Disallowed use cases include violence and threats, antisocial and antidemocratic uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making, high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
","The new users of the API get a limited access restricting the sizes of the models as well as the number of tokens that can be used. Users are required to go through an internal application to upgrade to full access [[Limited Access]](https://docs.cohere.ai/limited-access).
",,,application,https://docs.cohere.ai/reference/classify,,,,,unknown,unknown,unknown,embedding,https://cohere.ai/terms-of-use,unknown,
limited,,2023-02-22,,[],"This endpoint generates a succinct version of the original text that relays the most important information.
",,"General feedback as well as the violations of the usage guidelines can be reported to Cohere at safety at cohere.ai [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
",,"Intended to be used by developers who would like to incorporate NLP into their applications [[Cohere Website]](https://cohere.ai/).
","Limited use license to Cohere platform users [[Terms of Use]](https://cohere.ai/terms-of-use).
",,"All applications developed using the Cohere API is subject to review by Cohere.
",Cohere Summarize Endpoint,Canada,Cohere,"The usage of the API is bound by the Cohere usage guidelines. Disallowed use cases include violence and threats, antisocial and antidemocratic uses, deceit, attacks on security or privacy, unsafe unsupervised uses, decision-making, high-Risk generations among others [[Usage Guidelines]](https://docs.cohere.ai/usage-guidelines).
","The new users of the API get a limited access restricting the sizes of the models as well as the number of tokens that can be used. Users are required to go through an internal application to upgrade to full access [[Limited Access]](https://docs.cohere.ai/limited-access).
",,,application,https://docs.cohere.ai/reference/summarize,,,,,unknown,unknown,unknown,generation,https://cohere.ai/terms-of-use,unknown,
limited,Achieves SOTA performances on trusted MTEB and BEIR benchmarks.,2023-11-02,,[],"As of release, Cohere Embedv3 is Cohere's latest and most advanced embeddings model.",,https://huggingface.co/Cohere/Cohere-embed-english-v3.0/discussions,,,unknown,text; text,,Cohere Embedv3 (English),Canada,Cohere,,,,unknown,model,https://txt.cohere.com/introducing-embed-v3/,https://huggingface.co/Cohere/Cohere-embed-english-v3.0,unknown,unknown,unknown,,,,,,,
open,Evaluated on standard LLM and multilingual benchmarks in comparison to SotA models.,2024-02-12,,"['mT5', 'Aya Dataset']",Aya is a massively multilingual generative language model that follows instructions in 101 languages of which over 50% are considered as lower-resourced.,,https://huggingface.co/CohereForAI/aya-101/discussions,,,Apache 2.0,text; text,unknown,Aya,unknown,"Cohere for AI, Cohere, Brown University, Carnegie Mellon University, MIT",,,,13B parameters (dense),model,https://arxiv.org/pdf/2402.07827.pdf,https://huggingface.co/CohereForAI/aya-101,unknown,unknown,unknown,,,,,,,
open,"Command R was evaluated on standard English and multilingual (translated MMLU, FLoRES) benchmarks. It was also evaluated on KILT, HotpotQA and Bamboogle.",2024-03-11,,[],"Command R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprise. It is an auto-regressive large language model that uses an optimized transformer architecture. After pretraining, supervised fine-tuning (SFT) and preference training were applied to align model behavior to human preferences for helpfulness and safety. Command R is part of a family of open weight releases from Cohere and Cohere For AI. Its larger companion model is [Command R+](https://huggingface.co/CohereForAI/c4ai-command-r-plus).",,[https://huggingface.co/CohereForAI/c4ai-command-r-v01/discussions](https://huggingface.co/CohereForAI/c4ai-command-r-v01/discussions),,"Command R is optimized to perform well up to 128K context length and in the following languages: English, French, Spanish, Italian, German, Brazilian Portuguese, Japanese, Korean, Simplified Chinese, and Arabic. It has been trained for conversational tool use and grounded generation capabilities. The open weights release of Command R is intended to make community-based research efforts more accessible.",CC BY NC 4.0,text; text,unknown,Command R,Canada,Cohere,"Usage of this model requires adherence to [C4AI's Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy). Usage via the Cohere Platform is subject to Cohere's [Terms of Use](https://cohere.com/terms-of-use) and [Usage Guidelines](https://docs.cohere.com/docs/usage-guidelines). A non-exhaustive list of disallowed use cases include:
- Violence and harm
- Harassment and abuse
- Sexual exploitation, harm, or abuse
- Sensitve information
- Fraud and deception
- Synthetic data for commercial uses

Misuse should be reported to Cohere at safety at cohere.ai.",Comprehensive guidance for working with Command R's tool use and grounded generation prompt template can be found in Cohere's [documentation](https://docs.cohere.com/docs/prompting-command-r).,,35B parameters (dense),model,https://txt.cohere.com/command-r/,https://huggingface.co/CohereForAI/c4ai-command-r-v01,unknown,unknown,unknown,,,,,https://cohere.ai/terms-of-use,,
open,unknown,2024-02-09,https://huggingface.co/datasets/CohereForAI/aya_dataset,[],The Aya Dataset is a dataset that consists of original human-curated prompt-completion pairs written by fluent speakers of 65 languages.,,https://huggingface.co/datasets/CohereForAI/aya_dataset/discussions,,,Apache 2.0,text,unknown,Aya Dataset,unknown,"Cohere for AI, Beijing Academy of Artificial Intelligence, Cohere, Binghamton University",,,[],204k human-annotated prompt-completion pairs,dataset,https://arxiv.org/pdf/2402.06619.pdf,,,,,,,,,,,
limited,"Evaluated on code retrieval and data retrieval capabilities, with improvements compared to the standard in both.",2024-04-11,,[],Rerank 3 is a new foundation model for efficient enterprise search and retrieval with 4k context length.,,,,Efficient enterprise search and retrieval.,unknown,text; text,unknown,Rerank 3,Canada,Cohere,,,,unknown,model,https://cohere.com/blog/rerank-3,,unknown,unknown,unknown,,,,,,,
open,Evaluated across 23 languages with the highest results in all tasks and languages compared to other multilingual language models.,2024-05-31,,[],Aya 23 is an open weights research release of an instruction fine-tuned model with multilingual capabilities. It focuses on pairing a highly performant pre-trained Command family of models with the recently released Aya Collection. This model supports 23 languages.,,https://huggingface.co/CohereForAI/aya-23-35B/discussions,,This model is designed to be used for multilingual tasks covering 23 languages.,CC-BY-NC,text; text,unknown,Aya 23,Canada,Cohere,unknown,unknown,,35B parameters,model,https://arxiv.org/pdf/2405.15032,https://huggingface.co/CohereForAI/aya-23-35B,unknown,TPUv4 chips with up to 128 pod slices,unknown,,,,,,,
open,"Command R+ was evaluated on standard English and multilingual (e.g. FLoRES, WMT23) benchmarks. It was also evaluated on multihop reasoning tasks (e.g. HotpotQA, Bamboogle, StrategyQA), and tool use (e.g. ToolTalk, Berkeley Function-Calling).",2024-04-03,,[],"Command R+ is a state-of-the-art RAG-optimized model designed to tackle enterprise-grade workloads. It is an auto-regressive large language model that uses an optimized transformer architecture. After pretraining, supervised fine-tuning (SFT) and preference training were applied to align model behavior to human preferences for helpfulness and safety. Command R+ is part of a family of open weight releases from Cohere and Cohere For AI. Its smaller companion model is [Command R](https://huggingface.co/CohereForAI/c4ai-command-r-v01).",,[https://huggingface.co/CohereForAI/c4ai-command-r-plus/discussions](https://huggingface.co/CohereForAI/c4ai-command-r-plus/discussions),,"Command R+ is optimized to perform well up to 128K context length and in the following languages: English, French, Spanish, Italian, German, Brazilian Portuguese, Japanese, Korean, Simplified Chinese, and Arabic. It has been trained for conversational tool use and grounded generation capabilities. The open weights release of Command R+ is intended to make community-based research efforts more accessible.",CC BY NC 4.0,text; text,unknown,Command R+,Canada,Cohere,"Usage of this model requires adherence to [C4AI's Acceptable Use Policy](https://docs.cohere.com/docs/c4ai-acceptable-use-policy). Usage via the Cohere Platform is subject to Cohere's [Terms of Use](https://cohere.com/terms-of-use) and [Usage Guidelines](https://docs.cohere.com/docs/usage-guidelines). A non-exhaustive list of disallowed use cases include:
- Violence and harm
- Harassment and abuse
- Sexual exploitation, harm, or abuse
- Sensitve information
- Fraud and deception
- Synthetic data for commercial uses

Misuse should be reported to Cohere at safety at cohere.ai.",Comprehensive guidance for working with Command R+'s tool use and grounded generation prompt template can be found in Cohere's [documentation](https://docs.cohere.com/docs/prompting-command-r).,,104B parameters (dense),model,https://txt.cohere.com/command-r-plus-microsoft-azure/,https://huggingface.co/CohereForAI/c4ai-command-r-plus,unknown,unknown,unknown,,,,,,,
open,"Evaluated in comparison to the MusicCaps dataset and with respect to n-gram, neural metrics.",2023-07-31,,"['MusicCaps', 'Million Song Dataset', 'Magnatagtune']",LP-MusicCaps is a LLM-based pseudo music caption dataset.,,,,,CC-BY-NC-4.0,,,LP-MusicCaps,South Korea,South Korea Graduate School of Culture Technology,,,[],2.2M captions paired with 0.5M audio clips,dataset,https://arxiv.org/pdf/2307.16372.pdf,,,,,,,,,,,
limited,,2023-03-14,,['GPT-4 API'],"Explain My Answer offers learners the chance to learn more about their response in a lesson (whether their answer was correct or incorrect!) By tapping a button after certain exercise types, learners can enter a chat with Duo to get a simple explanation on why their answer was right or wrong, and ask for examples or further clarification.",,,,,custom,,,Duolingo Explain My Answer,USA,Duolingo,,,,,application,https://blog.duolingo.com/duolingo-max/,,,,,,,,,https://www.duolingo.com/terms,,
limited,,2023-03-14,,"['Duolingo Role Play', 'Duolingo Explain My Answer']",Duolingo Max is a new subscription tier above Super Duolingo that gives learners access to two brand-new features and exercises - Explain My Answer and Roleplay.,,,,,custom,,,Duolingo Max,USA,Duolingo,,,,,application,https://blog.duolingo.com/duolingo-max/,,,,,,,,,,,
limited,,2023-03-14,,['GPT-4 API'],"Roleplay allows learners to practice real-world conversation skills with world characters in the app. These challenges, which earn XP, will live alongside the path as one of the “Side Quests” learners can access by tapping on the character. What will you talk about? We’ll guide you through different scenarios! Learners might discuss future vacation plans with Lin, order coffee at a café in Paris, go furniture shopping with Eddy, or ask a friend to go for a hike.",,,,,custom,,,Duolingo Role Play,USA,Duolingo,,,,,application,https://blog.duolingo.com/duolingo-max/,,,,,,,,,https://www.duolingo.com/terms,,
open,"Evaluated on standard language benchmarks, common sense reasoning, and reading comprehension in comparison to SoTA LLMs.",2023-11-02,,[],The Yi series models are large language models trained from scratch by developers at 01 AI.,,https://huggingface.co/01-ai/Yi-34B/discussions,,,custom,text; text,unknown,Yi,France,01 AI,,"Model underwent supervised fine-tuning, leading to a greater diversity of responses.",,34B parameters (dense),model,https://github.com/01-ai/Yi,https://huggingface.co/01-ai/Yi-34B,unknown,unknown,unknown,,,,,,,
open,"Yi-VL outperforms all existing open-source models in MMMU and CMMMU, two advanced benchmarks that include massive multi-discipline multimodal questions (based on data available up to January 2024).",2024-01-23,,[],"The Yi Vision Language (Yi-VL) model is the open-source, multimodal version of the Yi Large Language Model (LLM) series, enabling content comprehension, recognition, and multi-round conversations about images.",,https://huggingface.co/01-ai/Yi-VL-34B/discussions,,,custom,text; text,unknown,Yi-VL,France,01 AI,,unknown,,34B parameters (dense),model,https://github.com/01-ai/Yi,https://huggingface.co/01-ai/Yi-VL-34B,unknown,128 NVIDIA A800 (80G) GPUs,10 days,,,,,,,
open,"Re-LAION-5B aims to fix the issues as reported by Stanford Internet Observatory for the original LAION-5B. It is available for download in two versions, research and research-safe. In total, 2236 links that potentially led to inappropriate content were removed.",2024-08-30,,['LAION-5B'],"Re-LAION-5B is an updated version of LAION-5B, the first web-scale, text-link to images pair dataset to be thoroughly cleaned of known links to suspected CSAM. It is an open dataset for fully reproducible research on language-vision learning. This model was developed in response to issues identified by the Stanford Internet Observatory in December 2023. The updates were made in collaboration with multiple organizations like the Internet Watch Foundation (IWF), the Canadian Center for Child Protection (C3P), and Stanford Internet Observatory.",,Problems with the dataset should be reported to the LAION organization. They have open lines for communication with their partners and the broader research community.,,Re-LAION-5B is designed for research on language-vision learning. It can also be used by third parties to clean existing derivatives of LAION-5B by generating diffs and removing all matched content from their versions.,Apache 2.0,text; image,unknown,Re-LAION-5B,Germany,LAION e.V.,"The dataset should not be utilized for purposes that breach legal parameters or ethical standards, such as dealing with illegal content.",The model utilized lists of link and image hashes provided by partner organizations. These were used to remove inappropriate links from the original LAION-5B dataset to create Re-LAION-5B.,,"5.5B (text, image) pairs",model,https://laion,unknown,Unknown,Unknown,Unknown,,,,,,,
open,Evaluated on the dimensions proposed by OpenCompass in comparison to other LLMs.,2023-09-20,,[],"InternLM is an LLM pre-trained on over 2.3T Tokens containing high-quality English, Chinese, and code data.",,https://huggingface.co/internlm/internlm-20b/discussions,,,Apache 2.0,text; text,unknown,InternLM,China,InternLM,,,,7B parameters (dense),model,https://github.com/InternLM/InternLM,https://huggingface.co/internlm/internlm-20b,unknown,unknown,unknown,,,,,,,
open,Evaluated on open source multilingual model benchmarks.,2024-02-26,,['Llama 2'],SambaLingo is a suite of models that adapt Llama 2 to a diverse set of 9 languages.,,https://huggingface.co/sambanovasystems/SambaLingo-Arabic-Base/discussions,,,LLaMA 2,text; text,,SambaLingo,USA,Samba Nova Systems,"SambaLingo should not be used for mission-critical applications, applications involving the safety of others, and highly critical decisions.",,,unknown,model,https://sambanova.ai/blog/sambalingo-open-source-language-experts,https://huggingface.co/sambanovasystems/SambaLingo-Arabic-Base,unknown,unknown,unknown,,,,,,,
limited,unknown,2024-02-28,,"['Llama 2', 'Mistral', 'Falcon-180B', 'Deepseek', 'BLOOM', 'LLaVA', 'CLIP']",Samba 1 is a trillion parameter generative AI model using a Composition of Experts architecture.,,,,,unknown,text; text,unknown,Samba 1,USA,Samba Nova Systems,,,,1T parameters (dense),model,https://sambanova.ai/blog/samba-1-composition-of-experts-mode,,unknown,unknown,unknown,,,,,,,
limited,,2023-01-31,,['ChatGPT API'],"Give your sales, marketing, and customer service teams one of the most powerful AI tools available - ChatGPT priority access, no timeout limits, company wide access managed through a single account, incorporate into your existing processes without leaving HubSpot",,,,,unknown,,,ChatGPT powered by OBO,USA,HubSpot,,,,,application,https://ecosystem.hubspot.com/marketplace/apps/sales/sales-enablement/the-obo-group-chatgpt-1398072,,,,,,,,,,,
open,"Mainly evaluated on MT-Bench and AlpacaEval, which are GPT-4-based comparisons.",2023-11-02,,[],Starling is a large language model trained by reinforcement learning from AI feedback focused on improving chatbot helpfulness.,,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha/discussions,,Academic research and free commercial usage,CC BY NC 4.0,text; text,,Starling,USA,Ollama,,,,7B parameters (dense),model,https://starling.cs.berkeley.edu/,https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha,unknown,unknown,unknown,,,,,,,
limited,,,,['OpenAI API'],"""Sana is your all-in-one, AI-assisted, online learning platform (LMS). Author employee training courses and measure team development with Sana's powerful analytics. Sana partners with the world's most important organizations and fastest-growing startups to make personalized, adaptive learning available for everyone, everywhere"" [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).
",,unknown,,"Sana is intended to be used by employers to provide a learning service for their employees.
",custom,,unknown,Sana,USA,Sana,,unknown,,,application,https://www.sanalabs.com/,,,,,"Customized GPT-3, fine-tuned on private data [[Sana GPT-3 Demo]](https://gpt3demo.com/apps/sanalabs).
",unknown,unknown,"question and answer, summarization, sentiment analysis,topic identification",https://www.sanalabs.com/legal/,unknown,
open,Evaluated on EleutherAI evaluation harness.,2023-06-16,,"['GPT-NeoX', 'H2O AI OpenAssistant', 'h2oGPT Repositories']",Series of models fine-tuned on well-known LLMs using the h2oGPT repositories.,,https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b/discussions,,,Apache 2.0,text; text,,h2oGPT,USA,H2O AI,,,,20B parameters (dense),model,https://arxiv.org/pdf/2306.08161.pdf,https://huggingface.co/h2oai/h2ogpt-oasst1-512-20b,unknown,unspecified number of 48GB A100 NVIDIA GPUs,unknown,,,,,,,
open,Evaluated on common sense and world knowledge benchmarks.,2024-01-30,,[],H2O Danube is a language model trained on 1T tokens following the core principles of LLaMA 2 and Mistral.,,https://huggingface.co/h2oai/h2o-danube-1.8b-base/discussions,,,Apache 2.0,text; text,unknown,H2O Danube,USA,H2O AI,"Users are encouraged to use the large language model responsibly and ethically. By using this model, you agree not to use it for purposes that promote hate speech, discrimination, harassment, or any form of illegal or harmful activities.",unknown,,1.8B parameters (dense),model,https://arxiv.org/pdf/2401.16818.pdf,https://huggingface.co/h2oai/h2o-danube-1.8b-base,unknown,8x H100 GPUs on a single node,unknown,,,,,,,
open,,2023-11-04,,[],SaiLy is a series/collection of AI Models by Deepnight Research which are highly experimental and uncensored.,,https://huggingface.co/deepnight-research/saily_100b/discussions,,,MIT,text; text,unknown,SaiLY,International,Deepnight Research,,,,100B parameters (dense),model,https://huggingface.co/deepnight-research/saily_100b,https://huggingface.co/deepnight-research/saily_100b,unknown,unknown,unknown,,,,,,,
open,Evaluated across a range of domain tasks across standard benchmarks in comparison to predecessor Llama 2.,2023-10-19,,"['Llama 2', 'BLOOM']",TigerBot is an open source multilingual multitask LLM.,,https://huggingface.co/TigerResearch/tigerbot-180b-base-v2/discussions,,,Apache 2.0,text; text,unknown,TigerBot,International,TigerResearch,,Safety filtering performed to mitigate risk and remove toxic content.,,180B parameters (dense),model,https://arxiv.org/pdf/2312.08688.pdf,https://huggingface.co/TigerResearch/tigerbot-180b-base-v2,unknown,32 A100-40G GPUs,unknown,,,,,,,
open,outperforms majority of preceding state-of-the-art models over 15 unique biomedical modalities.,2023-05-26,,"['GPT-style autoregressive decoder', 'BiomedGPT biomedical datasets']",BiomedGPT leverages self-supervision on large and diverse datasets to accept multi-modal inputs and perform a range of downstream tasks.,,,,furthering research in developing unified and generalist models for biomedicine.,Apache 2.0,"image, text; text",,BiomedGPT,USA,Lehigh University,,"No specific quality control is mentioned in model training, though details on data processing and how the model was trained are provided in the paper.",,472M parameters (dense),model,https://arxiv.org/pdf/2305.17100.pdf,,unknown,10 NVIDIA A5000 GPUs,unknown,,,,,,,
open,"Evaluated on public benchmarks like MMLU, CMMLU, GSM8K, and HumanEval.",2023-09-20,,[],"Baichuan 2 is a series of large-scale multilingual language models containing 7 billion and 13 billion parameters, trained from scratch, on 2.6 trillion tokens.",,https://huggingface.co/Xwin-LM/Xwin-LM-70B-V0.1/discussions,,,Apache 2.0,text; text,,Baichuan 2,China,Baichuan Inc.,,,,13B parameters (dense),model,https://arxiv.org/pdf/2309.10305.pdf,,unknown,1024 NVIDIA A800 GPUs,unknown,,,,,,,
open,"Evaluated across standard benchmarks and generally performs better than Mixtral, which it was fine-tuned on.",2024-01-10,,['Mixtral'],Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the Mixtral 8x7B MoE LLM.,,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/discussions,,,Apache 2.0,"text; code, text",unknown,Nous Hermes 2,Canada,Nous Research,,unknown,,7B parameters (dense),model,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO,https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO,unknown,unknown,unknown,,,,,,,
open,Evaluated across a variety of standard benchmarks in comparison to LLaMA 2.,2023-11-01,,['LLaMA 2'],YaRN LLaMA 2 is an adapted version of LLaMA 2 using the YaRN extension method.,,https://huggingface.co/NousResearch/Yarn-Llama-2-70b-32k/discussions,,,LLaMA 2,text; text,unknown,YaRN LLaMA 2,Canada/Switzerland,"Nous Research, EleutherAI, University of Geneva",,,,70B parameters (dense),model,https://arxiv.org/pdf/2309.00071.pdf,https://huggingface.co/NousResearch/Yarn-Llama-2-70b-32k,unknown,unknown,unknown,,,,,,,
open,,2023-11-13,,['Yi'],The Capybara series is a series of LLMs and the first Nous collection of models made by fine-tuning mostly on data created by Nous in-house.,,https://huggingface.co/NousResearch/Nous-Capybara-34B/discussions,,,MIT,text; text,unknown,Nous Capybara,Canada,Nous Research,,,,34B parameters (dense),model,https://huggingface.co/NousResearch/Nous-Capybara-34B,https://huggingface.co/NousResearch/Nous-Capybara-34B,unknown,unknown,unknown,,,,,,,
open,Evaluated across a variety of standard benchmarks in comparison to Mistral.,2023-11-01,,['Mistral'],YaRN Mistral is an adapted version of Mistral using the YaRN extension method.,,https://huggingface.co/NousResearch/Yarn-Mistral-7b-128k/discussions,,,MIT,text; text,unknown,YaRN Mistral,Canada/Switzerland,"Nous Research, EleutherAI, University of Geneva",,,,7B parameters (dense),model,https://arxiv.org/pdf/2309.00071.pdf,https://huggingface.co/NousResearch/Yarn-Mistral-7b-128k,unknown,unknown,unknown,,,,,,,
open,Evaluated on common LLM benchmarks in comparison to other Mistral derivatives.,2023-11-03,,['Mistral'],"OpenHermes 2.5 Mistral 7B is a state of the art Mistral Fine-tune, a continuation of OpenHermes 2 model, trained on additional code datasets.",,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B/discussions,,,Apache 2.0,text; text,unknown,OpenHermes 2.5 Mistral,Canada,Nous Research,,,,7B parameters (dense),model,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B,https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B,unknown,unknown,unknown,,,,,,,
open,"The model was examined across a range of benchmarks including GPT4All, AGIEval, BigBench, TruthfulQA and in-house evaluations of function calling and JSON mode.",2024-03-10,,"['Mistral', 'OpenHermes 2.5 Dataset', 'Nous Hermes 2']","Hermes 2 Pro on Mistral 7B is an upgraded, retrained version of Nous Hermes 2. This improved version excels at function calling, JSON Structured Outputs, and several other areas, scoring positively on various benchmarks.",,https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B/discussions,,"The model is intended for general task and conversation capabilities, function calling, and JSON structured outputs.",Apache 2.0,text; text,unknown,Hermes 2 Pro-Mistral,unknown,Nous,unknown,"The model was evaluated across multiple tasks, displaying notable scores in GPT4All, AGIEval, BigBench, and TruthfulQA. It also has a high score on function calling and JSON mode, indicating the robustness of its capabilities.",,7B parameters (dense),model,https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B,https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B,unknown,unknown,unknown,,,,,,,
open,unknown,2024-03-07,,[],"Genstruct is an instruction-generation model, designed to create valid instructions given a raw text corpus. This enables the creation of new, partially synthetic instruction finetuning datasets from any raw-text corpus. This work was inspired by Ada-Instruct and the model is also trained to generate questions involving complex scenarios that require detailed reasoning.",,https://huggingface.co/NousResearch/Genstruct-7B/discussions,,"The model is intended for instruction-generation, creating questions involving complex scenarios and generating reasoning steps for those questions.",Apache 2.0,text; text,unknown,Genstruct,unknown,Nous,unknown,unknown,,7B parameters (dense),model,https://huggingface.co/NousResearch/Genstruct-7B,https://huggingface.co/NousResearch/Genstruct-7B,unknown,unknown,unknown,,,,,,,
open,Comprehensive evaluations reveal that DeepSeek-V3 outperforms other open-source models and achieves performance comparable to leading closed-source models.,2025-01-14,,['DeepSeek-R1'],"DeepSeek-V3 is a Mixture-of-Experts (MoE) language model with 671B total parameters and 37B activated per token. It utilizes Multi-head Latent Attention (MLA) and adopts innovative strategies for improved performance, such as an auxiliary-loss-free load balancing and a multi-token prediction training objective. Comprehensive evaluations show it achieves performance comparable to leading closed-source models.",,unknown,,unknown,MIT,unknown,unknown,DeepSeek-V3,unknown,DeepSeek,unknown,"Post-training includes knowledge distillation from the DeepSeek-R1 model, incorporating verification and reflection patterns to enhance reasoning performance.",,671B parameters (sparse),model,https://huggingface.co/deepseek-ai/DeepSeek-V3,https://huggingface.co/deepseek-ai/DeepSeek-V3,unknown,H800 GPUs,2.788M GPU hours,,,,,,,
open,,2024-04-24,,[],"We present a methodology for scaling LLMs called depth up-scaling (DUS) , which encompasses architectural modifications and continued pretraining. In other words, we integrated Mistral 7B weights into the upscaled layers, and finally, continued pre-training for the entire model. SOLAR-10.7B has remarkable performance. It outperforms models with up to 30B parameters, even surpassing the recent Mixtral 8X7B model. For detailed information, please refer to the experimental table. Solar 10.7B is an ideal choice for fine-tuning. SOLAR-10.7B offers robustness and adaptability for your fine-tuning needs. Our simple instruction fine-tuning using the SOLAR-10.7B pre-trained model yields significant performance improvements (SOLAR-10.7B-Instruct-v1.0).",,https://www.upstage.ai/solar-llm,,,Apache 2.0,text; text,,SOLAR,South Korea,Upstage.ai,,,,10.7B parameters,model,https://arxiv.org/abs/2312.15166,https://huggingface.co/upstage/SOLAR-10.7B-v1.0,,,,,,,,,,
closed,,2022-09-14,,[],ACT-1 (ACtion Transformer) is a large-scale transformer model designed and trained specifically for taking actions on computers (use software tools APIs and websites) in response to the user's natural language commands.,,,,,unknown,text; text,,ACT-1,USA,Adept,,,,,model,https://www.adept.ai/blog/act-1,,unknown,unknown,unknown,,,,,,,
open,"Evaluated in comparison to LLaMA 2 and MPT Instruct, and outperforms both on standard benchmarks.",2023-09-07,,[],"Persimmon is the most capable open-source, fully permissive model with fewer than 10 billion parameters, as of its release date.",,,,,Apache 2.0,text; text,,Persimmon,USA,Adept,,,,8B parameters (dense),model,https://www.adept.ai/blog/persimmon-8b,,,,,,,,,,,
open,Evaluated on standard image understanding benchmarks.,2023-10-17,,[],Fuyu is a small version of the multimodal model that powers Adept's core product.,,https://huggingface.co/adept/fuyu-8b/discussions,,The model is intended for research purposes only.,CC-BY-NC-4.0,"image, text; text",,Fuyu,USA,Adept,"The model was not trained to be factual or true representations of people or events, and therefore using the model to generate such content is out-of-scope for the abilities of this model.",,,8B parameters (dense),model,https://www.adept.ai/blog/fuyu-8b,https://huggingface.co/adept/fuyu-8b,unknown,unknown,unknown,,,,,,,
closed,"Evaluated on the MMLU, GSM8K, MATH, and HumanEval benchmarks. According to these benchmarks, Fuyu-Heavy is, as of release, the strongest multimodal model trained outside of Google or OpenAI.",2024-01-24,,[],Fuyu Heavy is a new multimodal model designed specifically for digital agents.,,,,unknown,unknown,"image, text; text",,Fuyu Heavy,USA,Adept,,,,unknown,model,https://www.adept.ai/blog/adept-fuyu-heavy,,unknown,unknown,unknown,,,,,,,
closed,"The GPT-3 paper, which also introduces the GPT-3 dataset, provides a limited analysis on the GPT-3 dataset, reporting the dirtiness of the dataset after the it was filtered for text occurring in common benchmarking tasks. The authors report that ""as the dataset becomes more contaminated, the variance of the clean over all fraction increases, but there is no apparent bias towards improved or degraded performance"" [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).",2020-06-11,,['WebText'],The GPT-3 dataset is the text corpus that was used to train the GPT-3 model. Information on the GPT-3 dataset is limited to discussion in the paper introducing GPT-3 [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).,The Common Crawl dataset was processed using a classifier that kept high quality documents and filtered low quality documents. WebText was used as a proxy for high quality documents [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A).,unknown,"The dataset is composed of several NLP corpora including Common Crawl (filtered, 60%), WebText2 (22%), Books1 (8%), Books2 (8%), Wikipedia (3%) [[Section 2.2]](https://arxiv.org/pdf/2005.14165.pdf#subsection.2.2).",The intended use of the GPT-3 dataset is to train language models.,unknown,text,unknown,GPT-3 dataset,USA,OpenAI,unknown,"In addition to excluding low quality documents from the Common Crawl dataset, the authors fuzzily deduplicated documents within each dataset, by removing documents that have high overlap with each other. The same procedure was followed to fuzzily deduplicate WebText from Common Crawl [[Appendix A]](https://arxiv.org/pdf/2005.14165.pdf#appendix.A). Text occurring in benchmark datasets were also partially removed [[Appendix C]](https://arxiv.org/pdf/2005.14165.pdf#appendix.C).",[],570 GB,dataset,https://arxiv.org/pdf/2005.14165.pdf,,,,,,,,,,,
open,,2021-08-10,,[],"HumanEval is a dataset of 164 programming problems hand-written to evaluate their Codex model.
","Code problems easily found on the internet.
","Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).
","164 hand-written questions.
","Evaluating code generation capabilities of models.
",MIT,code,,HumanEval,USA,OpenAI,,"The evaluation dataset was handwritten to ensure that the evaluation problems do not exist in the Codex dataset [[Section 2.2]](https://arxiv.org/pdf/2107.03374.pdf#subsection.2.2).
","['\n\ndef string_sequence(n: int) -> str:\n    """""" Return a string containing space-delimited numbers starting from 0 upto n inclusive.\n    >>> string_sequence(0)\n    \'0\'\n    >>> string_sequence(5)\n    \'0 1 2 3 4 5\'\n    """"""\n', '\n\ndef count_distinct_characters(string: str) -> int:\n    """""" Given a string, find out how many distinct characters (regardless of case) does it consist of\n    >>> count_distinct_characters(\'xyzXYZ\')\n    3\n    >>> count_distinct_characters(\'Jerry\')\n    4\n    """"""\n', 'from typing import List\n\n\ndef parse_music(music_string: str) -> List[int]:\n    """""" Input to this function is a string representing musical notes in a special ASCII format.\n    Your task is to parse this string and return list of integers corresponding to how many beats does each\n    not last.\n\n    Here is a legend:\n    \'o\' - whole note, lasts four beats\n    \'o|\' - half note, lasts two beats\n    \'.|\' - quater note, lasts one beat\n\n    >>> parse_music(\'o o| .| o| o| .| .| .| .| o o\')\n    [4, 2, 1, 2, 2, 1, 1, 1, 1, 4, 4]\n    """"""\n', '\n\ndef how_many_times(string: str, substring: str) -> int:\n    """""" Find how many times a given substring can be found in the original string. Count overlaping cases.\n    >>> how_many_times(\'\', \'a\')\n    0\n    >>> how_many_times(\'aaa\', \'a\')\n    3\n    >>> how_many_times(\'aaaa\', \'aa\')\n    3\n    """"""\n', 'from typing import List\n\n\ndef sort_numbers(numbers: str) -> str:\n    """""" Input is a space-delimited string of numberals from \'zero\' to \'nine\'.\n    Valid choices are \'zero\', \'one\', \'two\', \'three\', \'four\', \'five\', \'six\', \'seven\', \'eight\' and \'nine\'.\n    Return the string with numbers sorted from smallest to largest\n    >>> sort_numbers(\'three one five\')\n    \'one three five\'\n    """"""\n']",214 KB,dataset,https://arxiv.org/pdf/2107.03374.pdf,,,,,,,,,,,
closed,,2021-08-10,,[],"The dataset used to train the Codex model.
","Following were filtered from the dataset: autogenerated files; files with average line length > 100, maximum line length > 1000, or few alphanumeric characters [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).
","Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).
","The dataset includes 54 million public software repositories hosted on GitHub as of an unspecified date in May 2020 [[Section 3.1]](https://arxiv.org/pdf/2107.03374.pdf#subsection.3.1).
",Training language models on code.,,,unknown,Codex dataset,USA,OpenAI,unknown,"Dataset was filtered using simple heuristics, as outlined in the excluded field.
",[],159 GB,dataset,https://arxiv.org/pdf/2107.03374.pdf,,,,,,,,,,,
closed,"The dataset contained some overlap with the test sets of the benchmarks used for evaluation, but the authors determined the impact to be small: ""There is a median overlap of 2.2% and an average overlap of 3.2%. Due to this small amount of overlap, overall accuracy is rarely shifted by more than 0.1% with only 7 datasets above this threshold"" [[Section 5]](https://arxiv.org/pdf/2103.00020.pdf#section.5).
",2021-01-05,,[],"CLIP dataset contains text-image pairs crawled from the internet.
",,unknown,"Data crawled from the internet, without any filtering (including de-duplication) or curation.
",Training multimodal vision models.,unknown,"image, text",unknown,CLIP dataset,USA,OpenAI,unknown,"The data was ""only crawled websites that had policies against excessively violent and adult images and allowed us to filter out such content"" [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).
",[],"400M (image, text) pairs",dataset,https://arxiv.org/pdf/2103.00020.pdf,,,,,,,,,,,
closed,"The authors found that the dataset contained 21% of the images in the MS-COCO validation set, but observed no significant changes in the performance of the accompanying DALL·E when tested on MS-COCO evaluation set with and without the said images [[Section 3.1]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.1).",2021-01-05,,[],"DALL·E dataset is the training set consisting of image and text pairs collected to train the DALL·E model.
","MS-COCO was excluded from the dataset, but because MS-COCO was created from YFCC100M, some of the test images (not the captions) were included.
",unknown,"Data from the internet, including Conceptual Captions and a filtered subset of YFCC100M.
",Training multimodal vision models.,unknown,"image, text",unknown,DALL·E dataset,USA,OpenAI,unknown,"The data was de-duplicated [[Section 3.2]](https://arxiv.org/pdf/2102.12092.pdf#subsection.3.2). The data collected from the internet was filtered using image, text and joint image and text filters, which included: ""discarding instances whose captions are too short, are classified as non-English by the Python package cld3, or that consist primarily of boilerplate phrases such as “photographed on <date>”, where <date> matches various formats for dates that we found in the data"". The authors also discard ""instances whose images have aspect ratios not in [1/2, 2]"" [[Appendix C]](https://arxiv.org/pdf/2102.12092.pdf#appendix.C).
",[],"250M (image, text) pairs
",dataset,https://arxiv.org/abs/2102.12092,,,,,,,,,,,
closed,"The Whisper paper provides limited details on preprocessing.
",2022-09-21,,[],"The Whisper dataset is the speech corpus that was used to train the Whisper model. Information on the dataset is limited to discussion in the paper introducing Whisper. [[Section 2.1]](https://cdn.openai.com/papers/whisper.pdf).
","Automated filtering was conducted.
",unknown,"The dataset is composed three major sources: multilingual speech recognition (17%), translation (18%), and English speech recognition (65%). [[Figure 11]](https://cdn.openai.com/papers/whisper.pdf).
",The intended use is to train speech models.,unknown,audio,,Whisper dataset,USA,OpenAI,,"In addition to filtering, basic text standardization was done.
",[],680k hours,dataset,https://cdn.openai.com/papers/whisper.pdf,,,,,,,,,,,
closed,,2019-11-01,,[],,,,,,unknown,text,,WebText,USA,OpenAI,,,[],40 GB,dataset,https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf,,,,,,,,,,,
open,,2019-11-01,,['WebText'],,,,,,Modified MIT License,text; text,,GPT-2,USA,OpenAI,,,,1.5B parameters (dense),model,https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf,https://github.com/openai/gpt-2/blob/master/model_card.md,,,,,,,,,,
limited,"The GPT-3 model was evaluated on language modeling, closed-book question answering, translation, Winograd-style tasks, commonsense reasoning, reading comprehension, SuperGLUE, NLI, synthetic tasks, and generation [[Section 4]](https://arxiv.org/pdf/2005.14165.pdf#section.4); as well as on fairness and biases [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).
",2020-06-11,,['GPT-3 dataset'],"GPT-3 is an autoregressive language model.
",,"Feedback for GPT-3 can be provided on the feedback form linked in the model card [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md). The form is especially meant to collect feedback on concerns about misuse, synthetic text detection, bias, and risk of generative language models.
",,"GPT-3 was intended to be use through the OpenAI API by developers for language applications. Other intended use of GPT-3 include researchers accessing the model through the API to study its paradigms [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).
",unknown,text; text,"OpenAI reviews all use cases of the model [[Model Card]](https://github.com/openai/gpt-3/blob/master/model-card.md).
",GPT-3,USA,OpenAI,"Access to GPT-3 is governed by Open AI API Usage Guidelines and API Terms of Use, prohibiting the use of the API in a way that causes societal harm. [[Usage Guidelines]] (https://beta.openai.com/docs/usage-guidelines/content-policy) [[Terms of Use]](https://openai.com/api/policies/terms/). The list of disallowed applications can be found in the usage guidelines [[Disallowed Applications]] (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).
","One quality control method OpenAI employed was releasing GPT-3 only through the OpenAI API. OpenAI states that it is easier to respond to misuse when the access to the model is gated through the API. It also hints that it plans to broaden the API access over time based on the amount of misuse [[OpenAI API Blog Post]](https://openai.com/blog/openai-api/). The authors identify potential misuses of GPT-3 in the paper and analyze it for fairness, bias and representation issues, but do not identify mitigation strategies [[Section 6]](https://arxiv.org/pdf/2005.14165.pdf#section.6).
",,175B parameters (dense),model,https://arxiv.org/pdf/2005.14165.pdf,https://github.com/openai/gpt-3/blob/master/model-card.md,552.1 tCO2e,Azure,3640 petaflop/s-days,,,,,,,
limited,"The model was evaluated using the HumanEval dataset with pass@k metric and BLEU scores [[Section 2]](https://arxiv.org/pdf/2107.03374.pdf#section.2).
",2021-08-10,,"['GPT-3', 'Codex dataset', 'HumanEval']","Codex is a GPT language model fine-tuned on publicly available code from GitHub.
",,"Email the authors [[Codex Paper]](https://arxiv.org/pdf/2107.03374.pdf).
",,"Codex is intended to be used for coding related language modelling tasks.
",unknown,"text; code, text",unknown,Codex,USA,OpenAI,unknown,"The model wasn't fully released to the public as a quality control measure. The authors identify potential risks of Codex in their paper due to the following: over-reliance, misalignment, bias and representation, economic and labor market impacts, security implications, environmental impact and legal implications. They also make suggestions for some of these, but do not implement them in Codex [[Section 7]](https://arxiv.org/pdf/2107.03374.pdf#section.7).
",,12B parameters (dense),model,https://arxiv.org/pdf/2107.03374.pdf,,unknown,Azure,100-1000 petaflop/s-days,,,,,,,
closed,"The model was evaluated on human ratings to the InstructGPT answers to the prompts submitted to the OpenAI API as well as on public NLP datasets spanning truthfulness, toxicity, and bias, question answering, reading comprehension, and summarization tasks.",2022-01-27,,"['GPT-3', 'OpenAI API']","InstructGPT is a family of GPT-3 based models fine-tuned on human feedback, which allows for better instruction following capabilities than GPT-3.
",,"Email the authors [[InstructGPT Paper]](https://arxiv.org/pdf/2203.02155.pdf).
",,"As stated in the model card: ""The intended direct users of InstructGPT are developers who access its capabilities via the OpenAI API. Through the OpenAI API, the model can be used by those who may not have AI development experience, to build and explore language modeling systems across a wide range of functions. We also anticipate that the model will continue to be used by researchers to better understand the behaviors, capabilities, biases, and constraints of large-scale language models"" [[Model Card]](https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md).
",unknown,"text; code, text",unknown,InstructGPT,USA,OpenAI,"Access to InstructGPT is governed by Open AI API Usage Guidelines and API Terms of Use, prohibiting the use of the API in a way that causes societal harm. [[Usage Guidelines]] (https://beta.openai.com/docs/usage-guidelines/content-policy) [[Terms of Use]](https://openai.com/api/policies/terms/). The list of disallowed applications can be found in the usage guidelines [[Disallowed Applications]] (https://beta.openai.com/docs/usage-guidelines/disallowed-applications).
","The model wasn't fully released to the public as a quality control measure.
",,175B parameters (dense),model,https://arxiv.org/pdf/2203.02155.pdf,https://github.com/openai/following-instructions-human-feedback/blob/main/model-card.md,unknown,unknown,60 petaflops/s-days,,,,,,,
open,"The model was evaluated for zero-shot English and multingual speech recognition, translation, language identification and robustness to noise.",2022-09-21,,['Whisper dataset'],Whisper is an audio transcription software.,,"The discussions page of the codebase is not formally cited as a place for feedback, but is being used in this way [[Discussions page]](https://github.com/openai/whisper/discussions)
",,"Whisper is a general-purpose speech recognition model; it is a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification.
",MIT,audio; text,,Whisper,USA,OpenAI,unknown,No specific quality control methods are documented.,,1.5B parameters (dense),model,https://cdn.openai.com/papers/whisper.pdf,https://github.com/openai/whisper/blob/main/model-card.md,unknown,Azure,unknown,,,,,,,
open,"The model was evaluated on standard vision datasets (e.g. CIFAR10, ImageNet) and showed robust state of the art results.",2021-01-05,,['CLIP dataset'],"""CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. It can be instructed in natural language to predict the most relevant text snippet, given an image, without directly optimizing for the task, similarly to the zero-shot capabilities of GPT-2 and 3. We found CLIP matches the performance of the original ResNet50 on ImageNet “zero-shot” without using any of the original 1.28M labeled examples, overcoming several major challenges in computer vision"" [[CLIP Repository]](https://github.com/openai/CLIP).
",,Questions can be shared at the feedback form linked in the CLIP model card [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen).,,"The model is intended to be used by AI researchers to better understand ""robustness, generalization, and other capabilities, biases, and constraints of computer vision models"" [[CLIP Model Card]](https://github.com/openai/CLIP/blob/main/model-card.md).
",MIT,"image, text; text",,CLIP,USA,OpenAI,"""Any deployed use case of the model - whether commercial or not - is currently out of scope. Non-deployed use cases such as image search in a constrained environment, are also not recommended unless there is thorough in-domain testing of the model with a specific, fixed class taxonomy. This is because our safety assessment demonstrated a high need for task specific testing especially given the variability of CLIP’s performance with different class taxonomies. This makes untested and unconstrained deployment of the model in any use case currently potentially harmful.
Certain use cases which would fall under the domain of surveillance and facial recognition are always out-of-scope regardless of performance of the model. This is because the use of artificial intelligence for tasks such as these can be premature currently given the lack of testing norms and checks to ensure its fair use.
Since the model has not been purposefully trained in or evaluated on any languages other than English, its use should be limited to English language use cases"" [[Model Card]](https://github.com/openai/CLIP/blob/main/model-card.mdlicen).
","The authors found that the performance of the model depended heavily on which classes are included (and excluded) for a given task. They reported significant race and gender based disparities on the Fairface dataset, depending on how the classes were constructed. The authors also demonstrated that the model was capable of racial profiling with high accuracy [[Section 7]](https://arxiv.org/pdf/2103.00020.pdf#section.7).
",,unknown,model,https://arxiv.org/pdf/2103.00020.pdf,https://github.com/openai/CLIP/blob/main/model-card.md,unknown,NVIDIA V100 GPUs,71.12 petaflop/s-day,,,,,,,
limited,"The model was evaluated against three prior approaches, AttnGAN, DM-GAN, and DF-GAN using Inception Score and Fréchet Inception Distance on MS-COCO as metrics. The model was also evaluated by humans and received the majority of the votes in generating images that look realistic and better match the caption when compared to the images generated by DF-GAN [[Section]](https://arxiv.org/pdf/2102.12092.pdf#section.3).
",2021-01-05,,['DALL·E dataset'],"DALL·E is a GPT-3 based model trained to generate images from text descriptions. The authors found that it had ""a diverse set of capabilities, including creating anthropomorphized versions of animals and objects, combining unrelated concepts in plausible ways, rendering text, and applying transformations to existing images"" [[OpenAI Blog Post]](https://openai.com/blog/dall-e/).
",,"Contact the paper author(s) specified on the paper [[Paper]](https://arxiv.org/pdf/2102.12092.pdf).
",,"""The model is intended for others to use for training their own generative models"" [[Model Card]](https://github.com/openai/DALL-E/blob/master/model_card.md).
",unknown,text; image,,DALL·E,USA,OpenAI,unknown,unknown,,12B parameters (dense),model,https://arxiv.org/pdf/2102.12092.pdf,https://github.com/openai/DALL-E/blob/master/model_card.md,unknown,NVIDIA V100 GPUs,unknown,,,,,,,
open,Evaluations in paper are primarily considering the fidelity and novelty of samples from Jukebox.,2020-04-30,,['Jukebox Dataset'],Jukebox is a generative model that produces music,,,,,Noncommercial Use License,text; audio,,Jukebox,USA,OpenAI,,,,5B parameters (dense),model,https://arxiv.org/abs/2005.00341,,unknown,510 V100s,4 weeks,,,,,,,
limited,The model is capable of generating explicit content and the researchers found limited amount of spurious content generated. The researchers also found that visual synonyms can be used to prompt the model to surface unwanted generations [[Probes and Evaluations]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#probes-and-evaluations).,2022-04-13,,"['DALL·E dataset', 'CLIP dataset']","""DALL·E 2 is an artificial intelligence model that takes a text prompt and/or existing image as an input and generates a new image as an output"" [[System Card]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md). The model wasn't fully released, but OpenAI released a version of the model (DALL·E 2 Preview) to a select group of testers.
",,Feedback can be provided at support at openai.com.,,"""The intended use of the DALL·E 2 Preview at this time is for personal, non-commercial exploration and research purposes by people who are interested in understanding the potential uses of these capabilities"" [[Use]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#use).
",unknown,text; image,"Uses of the model are monitored. In the preview version, any user can flag content. The specific policies for monitoring are not disclosed, but possible measures include disabling of accounts violating the content policies [[Monitoring and Reporting]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#monitoring-and-reporting).
'",DALL·E 2,USA,OpenAI,"Use of the model is governed by the OpenAI Content Policy, which prohibits posting of G rated content. Users are not allowed to utilize the model in commercial products in the preview version [[Content Policy]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#policies-and-enforcement).",The model is not fully released to the public as part of a quality control measure. The usage of the model by testers is monitored and user provided prompts are filtered [[Input filters]] (https://github.com/openai/dalle-2-preview/blob/main/system-card.md#input-filters).,,unknown,model,https://arxiv.org/abs/2204.06125,https://github.com/openai/dalle-2-preview/blob/main/system-card.md,unknown,unknown,unknown,,,,,,,
limited,,2020-06-11,,"['GPT-3', 'Codex', 'code-davinci-002', 'text-davinci-002', 'text-davinci-003', 'gpt-3.5-turbo', 'Whisper', 'DALL·E', 'GPT-4', 'GPT-4 Turbo']","OpenAI API is a general purpose ""text in, text out"" interface connecting users with a suite of language models. The API was initially released as a gateway to GPT-3, but it now supports access to other, more specialized OpenAI models. [[Open AI Blog Post]](https://openai.com/blog/openai-api/)
",,unknown,,"OpenAI API was designed to be used by developers to empower applications, and researchers to study language models [[Section 3]](https://openai.com/api/policies/terms/).
",custom,,"OpenAI may monitor the API use to ensure ""quality and improve OpenAI systems, products and services; perform research; and ensure compliance"" with the Terms of Service and all applicable laws. Users of the API will give OpenAI reasonable access to their application to monitor compliance with the terms listed in the Terms of Service [[Section 5(b)]](https://openai.com/api/policies/terms/). Apps using the OpenAI API should submit an application once they are deployed to real users. The review form takes 10 minutes to complete and over 97% of the applications are directly accepted or conditionally accepted. The applicants are notified of the decision within 2 business days [[App Review Guidelines]] (https://beta.openai.com/docs/usage-guidelines/app-review).
",OpenAI API,USA,OpenAI,"OpenAI API Terms of Use prohibits the use of the API in a way violating the applicable law, including: (i) ""Illegal activities, such as child pornography, gambling, cybercrime, piracy, violating copyright, trademark or other intellectual property laws""; (ii) ""Accessing or authorizing anyone to access the APIs from an embargoed country, region, or territory as prohibited by the U.S. government""; (iii) ""Threatening, stalking, defaming, defrauding, degrading, victimizing or intimidating anyone for any reason"". The usage requirements are detailed in the Terms of Use [[Section 3]](https://openai.com/api/policies/terms/).
","Given a prompt, OpenAI API checks whether a completion contains unsafe language using its filters and marks the completion accordingly if so. The API also provides developers with special endpoints that scope the API usage. OpenAI also developed user guidelines to help developers understand safety issues [[OpenAI API]](https://openai.com/api/).
",,,application,https://openai.com/api/,,,,,The API exposes the models fairly direclty with a range of hyperparameters (e.g. temperature scaling).,unknown,unknown,"Given a prompting text, the OpenAI API provides access to text completions, and log probabilities. The support for text and code embeddings were added on 2022-01-25 [[OpenAI Blog Post]] (https://openai.com/blog/introducing-text-and-code-embeddings/).
",https://openai.com/api/policies/terms/,unknown,
open,,2022-06-23,,['web_clean'],,,,,,MIT,video; video,,VPT,USA,OpenAI,,,,500M parameters (dense),model,https://arxiv.org/abs/2206.11795,,,,,,,,,,,
closed,,2022-06-23,,[],,,,,,unknown,video,,web_clean,USA,OpenAI,,,[],70k hours,dataset,https://arxiv.org/abs/2206.11795,,,,,,,,,,,
limited,,2022-11-30,,"['gpt-3.5-turbo', 'OpenAI toxicity classifier']",ChatGPT is an artificial intelligence chatbot developed by OpenAI.,,,,,custom,,,ChatGPT,USA,OpenAI,,,,,application,https://openai.com/blog/chatgpt,,,,,,,100M,,,,
limited,,2023-03-01,,['gpt-3.5-turbo dataset'],,,,,,custom,text; text,,gpt-3.5-turbo,USA,OpenAI,,,,unknown,model,https://platform.openai.com/docs/models/gpt-3-5,,,,,,,,,,,
limited,,2023-11-06,,[],GPT-4 Turbo is a more capable version of GPT-4 and has knowledge of world events up to April 2023. It has a 128k context window so it can fit the equivalent of more than 300 pages of text in a single prompt.,,,,,custom,text; text,unknown,GPT-4 Turbo,USA,OpenAI,,,,unknown,model,https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo,,unknown,unknown,unknown,,,,,,,
limited,,2023-03-01,,[],,,,,,unknown,text,,gpt-3.5-turbo dataset,USA,OpenAI,,,[],unknown,dataset,https://platform.openai.com/docs/models/gpt-3-5,,,,,,,,,,,
limited,,2022-05-01,,[],,,,,,unknown,"code, text",,code-davinci-002 dataset,USA,OpenAI,,,[],unknown,dataset,https://platform.openai.com/docs/model-index-for-researchers,,,,,,,,,,,
limited,,2022-05-01,,['code-davinci-002 dataset'],,,,,,unknown,"text; code, text",,code-davinci-002,USA,OpenAI,,,,unknown,model,https://platform.openai.com/docs/model-index-for-researchers,,,,,,,,,,,
limited,,2022-05-01,,['code-davinci-002'],,,,,,unknown,text; text,,text-davinci-002,USA,OpenAI,,,,unknown,model,https://platform.openai.com/docs/model-index-for-researchers,,,,,,,,,,,
limited,,2022-11-30,,['text-davinci-002'],,,,,,unknown,text; text,,text-davinci-003,USA,OpenAI,,,,unknown,model,https://platform.openai.com/docs/model-index-for-researchers,,,,,,,,,,,
open,,2023-03-01,,['Whisper'],API to query OpenAI's Whisper model.,,,,,custom,,,Whisper API,USA,OpenAI,,,,,application,https://openai.com/blog/introducing-chatgpt-and-whisper-apis,,,,,,,,,,,
open,,2023-03-01,,['ChatGPT'],API to query OpenAI's ChatGPT model.,,,,,custom,,,ChatGPT API,USA,OpenAI,,,,,application,https://openai.com/blog/introducing-chatgpt-and-whisper-apis,,,,,,,,,,,
open,,2022-08-10,,['OpenAI toxicity classifier'],This endpoint provides OpenAI API developers with free access to GPT-based classifiers that detect undesired content—an instance of using AI systems to assist with human supervision of these systems.,,,,,custom,,,OpenAI Moderation API,USA,OpenAI,,,,,application,https://openai.com/blog/new-and-improved-content-moderation-tooling,,,,,,,,,,,
closed,,2023-01-18,,['OpenAI toxicity dataset'],,,,,,unknown,text; text,,OpenAI toxicity classifier,USA,OpenAI,,,,unknown,model,"https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT.",,,,,,,,,,,
closed,,2023-01-18,,[],,,,,,unknown,text,,OpenAI toxicity dataset,USA,OpenAI,,,[],unknown,dataset,"https://time.com/6247678/openai-chatgpt-kenya-workers/#:~:text=In%20a%20statement%2C%20an%20OpenAI,datasets%20of%20tools%20like%20ChatGPT.",,,,,,,,,,,
limited,,2023-02-03,,['Sage'],A chatbot language model available via Quora's Poe,,,,,unknown,,,Sage API,USA,OpenAI,,,,,application,https://quorablog.quora.com/Poe-1,,,,,,,,,,,
limited,,2023-02-03,,['Dragonfly'],A chatbot language model available via Quora's Poe,,,,,unknown,,,Dragonfly API,USA,OpenAI,,,,,application,https://quorablog.quora.com/Poe-1,,,,,,,,,,,
limited,,2023-02-03,,[],A chatbot language model available via Quora's Poe,,,,,unknown,text; text,,Sage,USA,OpenAI,,,,unknown,model,https://quorablog.quora.com/Poe-1,,,,,,,,,,,
limited,,2023-02-03,,[],A chatbot language model available via Quora's Poe,,,,,unknown,text; text,,Dragonfly,USA,OpenAI,,,,unknown,model,https://quorablog.quora.com/Poe-1,,,,,,,,,,,
limited,,2023-03-07,,['ChatGPT API'],"The app integrates ChatGPT’s powerful AI technology to deliver instant conversation summaries, research tools, and writing assistance directly in Slack to help millions of companies work more productively.",,,,,unknown,,,ChatGPT for Slack,unknown,"OpenAI, Salesforce",,,,,application,https://www.salesforce.com/news/stories/chatgpt-app-for-slack/,,,,,,,,,,,
limited,,2023-03-14,,[],,,,,,unknown,"image, text; image, text",,GPT-4,USA,OpenAI,,,,unknown,model,https://arxiv.org/abs/2303.08774,,,,,,,,,,,
limited,,2023-03-14,,['GPT-4'],"GPT-4 is OpenAI’s most advanced system, producing safer and more useful responses",,,,,custom,,,GPT-4 API,USA,OpenAI,,,,,application,https://openai.com/product/gpt-4,,,,,,,,,,,
limited,,2023-08-28,,['GPT-4'],"ChatGPT Enterprise offers enterprise-grade security and privacy, unlimited higher-speed GPT-4 access, longer context windows for processing longer inputs, advanced data analysis capabilities, and customization options compared to OpenAI's previous offerings.",,,,,custom,,,ChatGPT Enterprise,USA,OpenAI,,,,,application,https://openai.com/enterprise,,,,,,,,,https://openai.com/policies/terms-of-use,,
limited,The model is capable of generating explicit content and the researchers found limited amount of spurious content generated.,2023-09-20,,"['DALL·E 2 dataset', 'CLIP dataset', 'ChatGPT']","DALL·E 3 is an artificial intelligence model that takes a text prompt and/or existing image as an input and generates a new image as an output The model is now in research preview, and will be available to ChatGPT Plus and Enterprise customers in October.",,Feedback can be provided at openai.com,,"The intended use of the DALL·E 3 Preview at this time is for personal, non-commercial exploration and research purposes by people who are interested in understanding the potential uses of these capabilities",custom,text; image,"Uses of the model are monitored. In the preview version, any user can flag content. The specific policies for monitoring are not disclosed, but possible measures include disabling of accounts violating the content",DALL·E 3,USA,OpenAI,"Use of the model is governed by the OpenAI Content Policy, which prohibits posting of G rated content. Users are not allowed to utilize the model in commercial products in the preview version.","DALL·E 3 has mitigations to decline requests that ask for a public figure by name. We improved safety performance in risk areas like generation of public figures and harmful biases related to visual over/under-representation, in partnership with red teamers—domain experts who stress-test the model—to help inform our risk assessment and mitigation efforts in areas like propaganda and misinformation.",,unknown,model,https://openai.com/dall-e-3,,unknown,unknown,unknown,,,,,,,
limited,,2024-02-15,,[],Sora is an AI model that can create realistic and imaginative scenes from text instructions.,,,,,unknown,"text; image, video",unknown,Sora,USA,OpenAI,,,,unknown,model,https://openai.com/sora,,unknown,unknown,unknown,,,,,,,
limited,"When evaluated on standard performance benchmarks, achieves similar levels of performance to GPT-4 Turbo.",2024-05-13,,[],"GPT-4o is OpenAI's new flagship model, as of release, that can reason across audio, vision, and text in real time.",,,,,unknown,"audio, image, text, video; audio, image, text","Internal monitoring of risk for non-text outputs before a public release (currently only image, text inputs and text outputs are available).",GPT-4o,USA,OpenAI,,Training data filtering and post-training refinement act as additional guardrails for preventing harmful outputs.,,unknown,model,https://openai.com/index/hello-gpt-4o/,,unknown,unknown,unknown,,,,,,,
open,Evaluated across a range of standard datasets regarding multiple model capabilities like language comprehension and logical reasoning.,2023-11-06,,[],XVERSE is a multilingual large language model for over 40 languages.,,https://huggingface.co/xverse/XVERSE-65B/discussions,,,custom,text; text,unknown,XVERSE,USA,Xverse,,,,65B parameters (dense),model,https://github.com/xverse-ai/XVERSE-65B,https://huggingface.co/xverse/XVERSE-65B,unknown,unknown,unknown,,,,,,,
limited,,2023-03-07,,['ChatGPT API'],"Brex Inc., a highly valued startup that makes software for finance professionals, is turning to the same artificial intelligence tool behind ChatGPT for a service that can answer questions about corporate budgets, policy and spending.",,,,,custom,,,Brex Chat,USA,Brex,,,,,application,https://fortune.com/2023/03/07/cfo-chatbot-chatgpt-ai-brex-finance-software-startup-budgets-policies/,,,,,,,,,https://www.brex.com/legal/user-terms,,
open,Evaluated in comparison to LLaMA series models on standard language benchmarks.,2023-09-27,,[],Mistral is a compact language model.,,https://huggingface.co/mistralai/Mistral-7B-v0.1/discussions,,,Apache 2.0,text; text,,Mistral,USA,Mistral AI,,,,7.3B parameters (dense),model,https://mistral.ai/news/announcing-mistral-7b/,https://huggingface.co/mistralai/Mistral-7B-v0.1,unknown,unknown,unknown,,,,,,,
limited,Evaluated on commonly used benchmarks in comparison to the current LLM leaders.,2024-02-26,,[],Mistral Large is Mistral AI’s new cutting-edge text generation model.,,,,,unknown,text; text,,Mistral Large,USA,Mistral AI,,,,unknown,model,https://mistral.ai/news/mistral-large/,,unknown,unknown,unknown,,,,,,,
limited,,2024-02-26,,"['Mistral', 'Mistral Large']",Le Chat is a first demonstration of what can be built with Mistral models and what can deployed in the business environment.,,,,,unknown,,,Le Chat,USA,Mistral AI,,,,,application,https://mistral.ai/news/le-chat-mistral/,,,,,,unknown,unknown,,https://mistral.ai/terms/#terms-of-use,unknown,
open,"Performance of Codestral is evaluated in Python, SQL, and additional languages, C++, bash, Java, PHP, Typescript, and C#. Fill-in-the-middle performance is assessed using HumanEval pass@1 in Python, JavaScript, and Java.",2024-05-29,,[],"Codestral is an open-weight generative AI model explicitly designed for code generation tasks. It helps developers write and interact with code through a shared instruction and completion API endpoint. Mastering code and English, it can be used to design advanced AI applications for software developers. It is fluent in 80+ programming languages.",,,,"Helps developers write and interact with code, design advanced AI applications for software developers, integrated into LlamaIndex and LangChain for building applications, integrated in VSCode and JetBrains environments for code generation and interactive conversation.",Mistral AI Non-Production License,text; code,unknown,Codestral,USA,Mistral AI,unknown,,,22B parameters,model,https://mistral.ai/news/codestral/,,unknown,unknown,unknown,,,,,,,
open,"The model underwent an advanced fine-tuning and alignment phase. Its performance was evaluated using GPT4o as a judge on official references. It was compared to recent open-source pre-trained models Gemma 2 9B, Llama 3 8B regarding multilingual performance and coding accuracy. Tekken tokenizer's compression ability was compared with previous tokenizers like SentencePiece and the Llama 3 tokenizer.",2024-07-18,,[],"The Mistral NeMo model is a state-of-the-art 12B model built in collaboration with NVIDIA, offering a large context window of up to 128k tokens. The model is suitable for multilingual applications and exhibits excellent reasoning, world knowledge, and coding accuracy. It's easy to use and a drop-in replacement in a system that uses Mistral 7B. The model uses a new tokenizer, Tekken, based on Tiktoken, which is trained on over 100 languages. It compresses natural language text and source code more efficiently than previously used tokenizers.",,"Problems should be reported to the Mistral AI team, though the specific method of reporting is unknown.",,"The model can be used for multilingual applications, understanding and generating natural language as well as source code, handling multi-turn conversations, and providing more precise instruction following.",Apache 2.0,text; text,Unknown,Mistral NeMo,USA,"Mistral AI, NVIDIA",Unknown,The model underwent an advanced fine-tuning and alignment phase. Various measures such as accuracy comparisons with other models and instruction-tuning were implemented to ensure its quality.,,12B parameters,model,https://mistral.ai/news/mistral-nemo/,unknown,Unknown,"NVIDIA hardware, specifics unknown",Unknown,,,,,,,
open,"The model has been tested for in-context retrieval capabilities up to 256k tokens. It has been created with advanced code and reasoning capabilities, which enables it to perform on par with SOTA transformer-based models.",2024-07-16,,[],"Codestral Mamba is a Mamba2 language model that is specialized in code generation. It has a theoretical ability to model sequences of infinite length and offers linear time inference. This makes it effective for extensive user engagement and is especially practical for code productivity use cases. Codestral Mamba can be deployed using the mistral-inference SDK or through TensorRT-LLM, and users can download the raw weights from HuggingFace.",,Problems with the model can be reported through the organization's website.,,The model is intended for code generation and can be utilized as a local code assistant.,Apache 2.0,text; text,Unknown,Codestral Mamba,USA,Mistral AI,Unknown,Unknown,,7.3B parameters,model,https://mistral.ai/news/codestral-mamba/,unknown,Unknown,Unknown,Unknown,,,,,,,
open,The model's performance has been evaluated on the MATH and MMLU industry-standard benchmarks. It scored notably higher on both these tests than the base model Mistral 7B.,2024-07-16,,['Mistral 7B'],"MathΣtral is a 7B model designed for math reasoning and scientific discovery. It achieves state-of-the-art reasoning capacities in its size category across various industry-standard benchmarks. This model stands on the shoulders of Mistral 7B and specializes in STEM subjects. It is designed to assist efforts in advanced mathematical problems requiring complex, multi-step logical reasoning. It particularly achieves 56.6% on MATH and 63.47% on MMLU.",,Feedback is likely expected to be given through the HuggingFace platform where the model's weights are hosted or directly to the Mistral AI team.,,"The model is intended for use in solving advanced mathematical problems requiring complex, multi-step logical reasoning or any math-related STEM subjects challenges.",Apache 2.0,text; text,Unknown,MathΣtral,USA,Mistral AI,Unknown,This model has been fine-tuned from a base model and its inference and performance have been tested on several industry benchmarks.,,7B parameters,model,https://mistral.ai/news/mathstral/,unknown,Unknown,Unknown,Unknown,,,,,,,
open,"We evaluate Pixtral Large against frontier models on a set of standard multimodal benchmarks, through a common testing harness.",2024-11-18,,['Mistral Large 2'],"Pixtral Large is the second model in our multimodal family and demonstrates frontier-level image understanding. Particularly, the model is able to understand documents, charts and natural images, while maintaining the leading text-only understanding of Mistral Large 2.",,unknown,,"RAG and agentic workflows, making it a suitable choice for enterprise use cases such as knowledge exploration and sharing, semantic understanding of documents, task automation, and improved customer experiences.","Mistral Research License (MRL), Mistral Commercial License","text, image; text",unknown,Pixtral Large,USA,Mistral AI,unknown,unknown,,124B parameters,model,https://mistral.ai/news/pixtral-large/,unknown,unknown,unknown,unknown,,,,,,,
closed,Benchmarks We have benchmarked the new Codestral with the leading sub-100B parameter coding models that are widely considered to be best-in-class for FIM tasks.,2025-01-13,,[],"Lightweight, fast, and proficient in over 80 programming languages, Codestral is optimized for low-latency, high-frequency usecases and supports tasks such as fill-in-the-middle (FIM), code correction and test generation.",,We can’t wait to hear your experience! Try it now Try it on Continue.dev with VsCode or JetBrains,,"Highly capable coding companion, regularly boosting productivity several times over.",unknown,text; text,unknown,Codestral 25.01,USA,Mistral AI,unknown,unknown,,unknown,model,https://mistral.ai/news/codestral-2501/,unknown,unknown,unknown,unknown,,,,,,,
open,"Evaluated on the OpenLLM leaderboard, releasing at rank number 4 on the leaderboard.",2023-08-11,,"['LLaMA 2', 'Guanaco LLaMA dataset']","GodziLLa 2 is an experimental combination of various proprietary LoRAs from Maya Philippines and Guanaco LLaMA 2 1K dataset, with LLaMA 2.",,,,,LLaMA 2,text; text,unknown,GodziLLa 2,Philippines,Maya Philippines,,,,70B parameters (dense),model,https://huggingface.co/MayaPH/GodziLLa2-70B,https://huggingface.co/MayaPH/GodziLLa2-70B,unknown,unknown,unknown,,,,,,,
limited,"We evaluated voyage-code-3 using an enhanced suite of evaluation datasets designed to address the shortcomings of existing benchmarks and deliver practical, robust results.",2024-12-04,,[],"Introducing voyage-code-3, our next-generation embedding model optimized for code retrieval.",,"If you’re also interested in fine-tuned embedding models, we’d love to hear from you—please email us at contact@voyageai.com.",,optimized for code retrieval,unknown,unknown,unknown,voyage-code-3,USA,Voyage AI,unknown,unknown,,unknown,model,https://blog.voyageai.com/2024/12/04/voyage-code-3/,unknown,unknown,unknown,unknown,,,,,,,
open,,2022-06-22,,"['The Pile', 'Yandex Russian Pretraining Dataset']",YaLM is a 100B parameter autoregressive model trained on 25% English and 75% Russian text.,,,,,Apache 2.0,text; text,,YaLM,Russia,Yandex,,,,100B parameters (dense),model,https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6,,,Yandex 800 A100 Cluster,,,,,,,,
open,,2022-06-23,,['YaLM'],Yandex is a search engine and web portal. Yandex offers internet search and other services,,,,,custom,,,Yandex Search,Russia,Yandex,,,,,application,https://medium.com/yandex/yandex-publishes-yalm-100b-its-the-largest-gpt-like-neural-network-in-open-source-d1df53d0e9a6,,,,,,,,,https://yandex.com/legal/browser_agreement/,,
open,Evaluated in comparison with ChatGPT and Stanford Alpaca.,2023-04-03,,"['LLaMA', 'web-scraped dialogue data']",A relatively small chatbot trained by fine-tuning Meta’s LLaMA on dialogue data gathered from the web.,,https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g/discussions,,academic research,Apache 2.0,text; text,,Koala,USA,Berkeley,,,,13B parameters (dense),model,https://bair.berkeley.edu/blog/2023/04/03/koala/,https://huggingface.co/TheBloke/koala-7B-GPTQ-4bit-128g,,8 A100 GPUs,6 hours,,,,,,,
open,Evaluated using AST sub-tree matching technique and compared to other models in terms of API functionality accuracy.,2023-05-24,,"['LLaMA', 'Gorilla document retriever']",Gorilla is a finetuned LLaMA-based model that surpasses the performance of GPT-4 on writing API calls.,,,,In conjunction with a LLM to improve its capability for using API calls.,Apache 2.0,text; API,,Gorilla,USA,Berkeley,,"No specific quality control is mentioned in model training, though details on data processing and collection are provided in the paper.",,7B parameters (dense),model,https://arxiv.org/pdf/2305.15334v1.pdf,,,,,,,,,,,
open,Evaluated on wide range of tasks using own evaluation benchmarks.,2023-05-03,,['RedPajama'],OpenLlama is an open source reproduction of Meta's LLaMA model.,,,,,Apache 2.0,text; text,,OpenLLaMA,USA,Berkeley,,,,17B parameters (dense),model,https://github.com/openlm-research/open_llama,,unknown,,unknown,,,,,,,
open,,2023-03-01,,['ChatGPT API'],"Quizlet is introducing Q-Chat, a fully-adaptive AI tutor that engages students with adaptive questions based on relevant study materials delivered through a fun chat experience.",,,,,,,,Q-Chat,USA,Quizlet,,,,,application,https://openai.com/blog/introducing-chatgpt-and-whisper-apis,,,,,,,,,https://quizlet.com/tos,,
open,Evaluated on standard and ocean science benchmarks in comparison to other similar-sized models.,2024-02-06,,[],OceanGPT is the first-ever LLM in the ocean domain and displays expertise in various ocean science tasks.,,https://huggingface.co/zjunlp/OceanGPT-7b/discussions,,,MIT,text; text,unknown,OceanGPT,China,Zhejiang University,,,,7B parameters (dense),model,https://www.zjukg.org/project/OceanGPT/,https://huggingface.co/zjunlp/OceanGPT-7b,unknown,6 A800 NVIDIA GPUs,7 days,,,,,,,
open,The model was tested extensively internally and externally. It has developed and implemented numerous safeguards to prevent harms. They have also received user feedback to make continuous improvements.,2024-06-12,,[],"Stable Diffusion 3 Medium is Stability AI’s advanced text-to-image open model. It's suitable for running on consumer PCs and laptops as well as enterprise-tier GPUs. The model is known for its overall Quality and Photorealism, prompt understanding, typography, being resource-efficient, and being fine-tuned. The model in collaboration with NVIDIA and AMD has enhanced performance.",,"Feedback can be given through Twitter, Instagram, LinkedIn, or Discord Community.",,"The model can be used by professional artists, designers, developers, and AI enthusiasts for creating high-quality image outputs from text inputs.",Stability Community License,text; image,"Continuous collaboration with researchers, experts, and the community to ensure that the model is being used appropriately.",Stable Diffusion 3 Medium,UK,Stability AI,Large-scale commercial use requires contacting the organization for licensing details. The model should not be used for any purpose that does not adhere to the usage guidelines.,"They have conducted extensive internal and external testing of this model and have implemented numerous safeguards to prevent harms. Safety measures were implemented from the start of training the model and continued throughout testing, evaluation, and deployment.",,2B parameters,model,https://stability.ai/news/stable-diffusion-3-medium,unknown,Unknown,unknown,Unknown,,,,,,,
open,"Consistency across the spatial and temporal axes greatly improves with this model. Stable Video 4D is able to generate novel view videos that are more detailed, faithful to the input video, and are consistent across frames and views compared to existing works.",2024-07-24,,['Stable Video Diffusion Model'],"Stable Video 4D is our latest AI model for dynamic multi-angle video generation. It allows users to upload a single video and receive novel-view videos of eight new angles/views. This advancement moves from image-based video generation to full 3D dynamic video synthesis. Users can specify camera angles, tailoring the output to meet specific creative needs. The model is currently available on Hugging Face and can generate 5-frame videos across the 8 views in about 40 seconds.",,"Feedback and reports about the progress should be shared via their social channels like Twitter, Instagram, LinkedIn or their Discord Community.",,"This model can be used for creating dynamic multi-angle videos, with applications in game development, video editing, and virtual reality. It allows professionals in these fields to visualize objects from multiple angles, enhancing the realism and immersion of their products.",Stability Community License,video; video,Continuous monitoring by the Stability AI team for improvements and refinements.,Stable Video 4D,UK,Stability AI,Unknown,The Stability AI team is dedicated to continuous innovation and exploration of real-world use-cases for this model and others. They are actively working to refine and optimize the model beyond the current synthetic datasets it has been trained on.,,Unknown,model,https://stability.ai/news/stable-video-4d,unknown,Unknown,Unknown,Unknown,,,,,,,
open,"The model was evaluated on its ability to quickly and accurately transform a single image into a detailed 3D asset. This evaluation highlighted the model's unprecedented speed and quality, marking it as a valuable tool for rapid prototyping in 3D work. Compared to the previous SV3D model, Stable Fast 3D offers significantly reduced inference times--0.5 seconds versus 10 minutes--while maintaining high-quality output.",2024-08-01,,['TripoSR'],"Stable Fast 3D is a ground-breaking model in 3D asset generation technology. It can transform a single input image into a highly detailed 3D asset in around half a second, setting new standards in terms of speed and quality in the realm of 3D reconstruction. Users start the process by uploading an image of an object. Stable Fast 3D then swiftly generates a complete 3D asset, which includes, UV unwrapped mesh, material parameters, albedo colors with reduced illumination bake-in, and optional quad or triangle remeshing. This model has various applications, notably for game and virtual reality developers, as well as professionals in retail, architecture, design, and other graphic-intensive professions.",,Information on any downstream issues with the model can be reported to Stability AI through their support request system.,,"The model is intended for use in game development, virtual reality, retail, architecture, design and other graphically intense professions. It allows for rapid prototyping in 3D work, assisting both enterprises and indie developers. It's also used in movie production for creating static assets for games and 3D models for e-commerce, as well as fast model creation for AR/VR.",Stability Community License,image; 3D,Unknown,Stable Fast 3D,UK,Stability AI,Use by individuals or organizations with over $1M in annual revenue without obtaining an Enterprise License.,Unknown,,unknown,model,https://stability.ai/news/introducing-stable-fast-3d,https://huggingface.co/stabilityai/stable-fast-3d,Unknown,unknown,Unknown,,,,,,,
open,Our analysis shows that Stable Diffusion 3.5 Large leads the market in prompt adherence and rivals much larger models in image quality.,2023-10-29,,[],"Stable Diffusion 3.5 reflects our commitment to empower builders and creators with tools that are widely accessible, cutting-edge, and free for most use cases.",,We look forward to hearing your feedback on Stable Diffusion 3.5 and seeing what you create with the models.,,This model is ideal for professional use cases at 1 megapixel resolution.,Stability AI Community,text; image,unknown,Stable Diffusion 3.5,UK,Stability AI,unknown,"We believe in safe, responsible AI practices and take deliberate measures to ensure Integrity starts at the early stages of development.",,8.1B parameters,model,https://stability.ai/news/introducing-stable-diffusion-3-5,unknown,unknown,unknown,unknown,,,,,,,
closed,,2021-05-21,,[],HyperClova is an autoregressive language model,,,,,unknown,text; text,,HyperCLOVA,South Korea,NAVER,,,,82B parameters,model,https://arxiv.org/abs/2109.04650,,,1024 A100 GPUs,130.4 days,,,,,,,
limited,"Evaluated on English and Korean benchmarks in comparison to open source English and multilingual LLMs, with HyperCLOVA X (closed) surpassing the models compared.",2024-04-13,,[],"HyperCLOVA X is a family of large language models (LLMs) tailored to the Korean language and culture, along with competitive capabilities in English, math, and coding.",,,,,unknown,text; text,,HyperCLOVA X,South Korea,NAVER,,,,unknown,model,https://arxiv.org/pdf/2404.01954,,unknown,unknown,unknown,,,,,,,
limited,,,,['Anthropic API'],An online tutoring solution to help students achieve academic success.,,,,,unknown,,,Juni Tutor Bot,USA,Juni Learning,,,,,application,https://junilearning.com/,,,,,,,,,,,


================================================
FILE: scripts/collate.py
================================================
#!/usr/bin/env python
from pathlib import Path

import yaml
from pandas import DataFrame


ASSET_PATH = Path("./assets")


def scalar(val):
    """Clean scalar values whenever possible."""
    if not isinstance(val, dict):
        return val

    if "explanation" in val:
        return val.get("value")

    if not val:
        return None

    return val


def collate_assets():
    """Store all assets together in a single tabular CSV file."""
    paths = ASSET_PATH.glob("*.yaml")
    assets = [asset for fp in paths for asset in yaml.safe_load(open(fp))]
    df = DataFrame.from_records(assets)

    # Make data consistent within columns (cannot have scalar and dicts in same column)
    df = df.apply(lambda ser: ser.apply(scalar))

    # Cleanup missing data representations
    df = df.replace("none", None)

    non_categorical = ["created_date"]
    for col in non_categorical:
        df[col] = df[col].replace("unknown", None)

    df.to_csv("./resources/all_assets.csv", index=False)


if __name__ == "__main__":
    collate_assets()


================================================
FILE: server.py
================================================
#!/usr/bin/env python

from http import server

class MyHTTPRequestHandler(server.SimpleHTTPRequestHandler):
    def end_headers(self):
        self.send_my_headers()
        server.SimpleHTTPRequestHandler.end_headers(self)

    def send_my_headers(self):
        self.send_header("Cache-Control", "no-cache, no-store, must-revalidate")
        self.send_header("Pragma", "no-cache")
        self.send_header("Expires", "0")

if __name__ == '__main__':
    server.test(HandlerClass=MyHTTPRequestHandler)


================================================
FILE: yamlfix.toml
================================================
[tool.yamlfix]
line_length         = 200
none_representation = "Unknown"
flow_style_sequence = "false"
explicit_start      = "true"