Repository: cheahjs/free-llm-api-resources
Branch: main
Commit: c3ac91133bf8
Files: 9
Total size: 77.0 KB

Directory structure:
gitextract_rnr7ukix/

├── .github/
│   ├── pull_request_template.md
│   └── workflows/
│       ├── readme-change-validator.yml
│       └── update-readme.yml
├── .gitignore
├── README.md
└── src/
    ├── README_template.md
    ├── data.py
    ├── pull_available_models.py
    └── requirements.txt

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/pull_request_template.md
================================================
<!---
Before adding a new provider, there's a few general guidelines as to what providers would be accepted into the repo, primarily around trustworthiness and legitimacy:

1. Is the provider a legitimate company?
2. Does the provider provide a proper API service?
   For example, reverse engineering services such as Claude Code, Codex, GitHub Copilot, Qwen Code, etc are not considered appropriate for this list.
3. Does the provider have a business model?
4. Does the provider provide legitimate services?
   For example, providing commercial models such as Anthropic or OpenAI models for free or at steep discounts can suggest questionable business practices, such as chatbot reverse engineering, API credit resale/theft, or other similar practices.

These guidelines are not exhaustive, but attempts to put into writing what sort of providers might be included in the list.

For providers that are trying to add themselves to the list, please note that you are very likely going to see abusive traffic, as free APIs are very common abuse targets.
--->


================================================
FILE: .github/workflows/readme-change-validator.yml
================================================
name: README Change Validator

on:
  pull_request:
    paths:
      - 'README.md'
      - 'src/README_template.md'
      - 'src/pull_available_models.py'

permissions:
  issues: write

jobs:
  check-readme-changes:
    if: github.actor != 'github-actions[bot]'
    runs-on: ubuntu-latest
    
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6
        with:
          fetch-depth: 0  # Fetch all history to compare changes

      - name: Get changed files
        id: changed-files
        run: |
          README_CHANGED=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep -c "README.md" || true)
          TEMPLATE_CHANGED=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep -c "src/README_template.md" || true)
          SCRIPT_CHANGED=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep -c "src/pull_available_models.py" || true)
          
          echo "readme_changed=$README_CHANGED" >> $GITHUB_OUTPUT
          echo "template_changed=$TEMPLATE_CHANGED" >> $GITHUB_OUTPUT
          echo "script_changed=$SCRIPT_CHANGED" >> $GITHUB_OUTPUT

      - name: Validate README changes
        run: |
          if [ "${{ steps.changed-files.outputs.readme_changed }}" -gt 0 ] && \
             [ "${{ steps.changed-files.outputs.template_changed }}" -eq 0 ] && \
             [ "${{ steps.changed-files.outputs.script_changed }}" -eq 0 ]; then
            echo "Error: README.md was modified without corresponding changes in src/README_template.md or src/pull_available_models.py"
            echo "Please update the template or script instead of modifying README.md directly."
            exit 1
          fi

      - name: Add PR comment if check fails
        if: failure()
        uses: actions/github-script@v8
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
            github.rest.issues.createComment({
              owner: context.repo.owner,
              repo: context.repo.repo,
              issue_number: context.issue.number,
              body: '❌ README.md was modified without corresponding changes in `src/README_template.md` or `src/pull_available_models.py`\n\nPlease update the template or script instead of modifying README.md directly.'
            })


================================================
FILE: .github/workflows/update-readme.yml
================================================
name: Update README

on:
  schedule:
    - cron: "0 0 * * *"
  workflow_dispatch:

jobs:
  update-readme:
    runs-on: ubuntu-latest
    permissions:
      contents: write
      pull-requests: write
      id-token: write
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6
      - name: Setup Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.12"
      - name: Install dependencies
        run: |
          pip install -r src/requirements.txt
      - id: "auth"
        uses: "google-github-actions/auth@v3"
        with:
          workload_identity_provider: "projects/576328904266/locations/global/workloadIdentityPools/github/providers/cheahjs-org"
          project_id: ${{ secrets.GCP_PROJECT }}
      - name: Run script
        env:
          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
          CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
          CLOUDFLARE_API_KEY: ${{ secrets.CLOUDFLARE_API_KEY }}
          HYPERBOLIC_API_KEY: ${{ secrets.HYPERBOLIC_API_KEY }}
          GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
          GOOGLE_APPLICATION_CREDENTIALS: ${{ steps.auth.outputs.credentials_file_path }}
          LAMBDA_API_KEY: ${{ secrets.LAMBDA_API_KEY }}
          MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
          SCALEWAY_API_KEY: ${{ secrets.SCALEWAY_API_KEY }}
          COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
        run: |
          python -u src/pull_available_models.py
      - name: Remove credentials
        run: |
          rm ${{ steps.auth.outputs.credentials_file_path }}
      - name: Create Pull Request
        uses: peter-evans/create-pull-request@v8
        with:
          title: "Update README with latest models"
          body: "This PR updates the README with the latest available models."
          branch: update-readme
          base: main
          commit-message: "Update README with latest models"


================================================
FILE: .gitignore
================================================
venv
.venv
.env
.aider*
*.json
*.pyc


================================================
FILE: README.md
================================================
<!---
WARNING: DO NOT EDIT THIS FILE DIRECTLY. IT IS GENERATED BY src/pull_available_models.py
--->
# Free LLM API resources

This lists various services that provide free access or credits towards API-based LLM usage.

> [!NOTE]  
> Please don't abuse these services, else we might lose them.

> [!WARNING]  
> This list explicitly excludes any services that are not legitimate (eg reverse engineers an existing chatbot)

- [Free Providers](#free-providers)
  - [OpenRouter](#openrouter)
  - [Google AI Studio](#google-ai-studio)
  - [NVIDIA NIM](#nvidia-nim)
  - [Mistral (La Plateforme)](#mistral-la-plateforme)
  - [Mistral (Codestral)](#mistral-codestral)
  - [HuggingFace Inference Providers](#huggingface-inference-providers)
  - [Vercel AI Gateway](#vercel-ai-gateway)
  - [OpenCode Zen](#opencode-zen)
  - [Cerebras](#cerebras)
  - [Groq](#groq)
  - [Cohere](#cohere)
  - [GitHub Models](#github-models)
  - [Cloudflare Workers AI](#cloudflare-workers-ai)
- [Providers with trial credits](#providers-with-trial-credits)
  - [Fireworks](#fireworks)
  - [Baseten](#baseten)
  - [Nebius](#nebius)
  - [Novita](#novita)
  - [AI21](#ai21)
  - [Upstage](#upstage)
  - [NLP Cloud](#nlp-cloud)
  - [Alibaba Cloud (International) Model Studio](#alibaba-cloud-international-model-studio)
  - [Modal](#modal)
  - [Inference.net](#inferencenet)
  - [Hyperbolic](#hyperbolic)
  - [SambaNova Cloud](#sambanova-cloud)
  - [Scaleway Generative APIs](#scaleway-generative-apis)

## Free Providers

### [OpenRouter](https://openrouter.ai)

**Limits:**

[20 requests/minute<br>50 requests/day<br>Up to 1000 requests/day with $10 lifetime topup](https://openrouter.ai/docs/api/reference/limits)

Models share a common quota.

- [Gemma 3 12B Instruct](https://openrouter.ai/google/gemma-3-12b-it:free)
- [Gemma 3 27B Instruct](https://openrouter.ai/google/gemma-3-27b-it:free)
- [Gemma 3 4B Instruct](https://openrouter.ai/google/gemma-3-4b-it:free)
- [Hermes 3 Llama 3.1 405B](https://openrouter.ai/nousresearch/hermes-3-llama-3.1-405b:free)
- [Llama 3.2 3B Instruct](https://openrouter.ai/meta-llama/llama-3.2-3b-instruct:free)
- [Llama 3.3 70B Instruct](https://openrouter.ai/meta-llama/llama-3.3-70b-instruct:free)
- [Mistral Small 3.1 24B Instruct](https://openrouter.ai/mistralai/mistral-small-3.1-24b-instruct:free)
- [arcee-ai/trinity-large-preview:free](https://openrouter.ai/arcee-ai/trinity-large-preview:free)
- [arcee-ai/trinity-mini:free](https://openrouter.ai/arcee-ai/trinity-mini:free)
- [cognitivecomputations/dolphin-mistral-24b-venice-edition:free](https://openrouter.ai/cognitivecomputations/dolphin-mistral-24b-venice-edition:free)
- [google/gemma-3n-e2b-it:free](https://openrouter.ai/google/gemma-3n-e2b-it:free)
- [google/gemma-3n-e4b-it:free](https://openrouter.ai/google/gemma-3n-e4b-it:free)
- [liquid/lfm-2.5-1.2b-instruct:free](https://openrouter.ai/liquid/lfm-2.5-1.2b-instruct:free)
- [liquid/lfm-2.5-1.2b-thinking:free](https://openrouter.ai/liquid/lfm-2.5-1.2b-thinking:free)
- [nvidia/nemotron-3-nano-30b-a3b:free](https://openrouter.ai/nvidia/nemotron-3-nano-30b-a3b:free)
- [nvidia/nemotron-nano-12b-v2-vl:free](https://openrouter.ai/nvidia/nemotron-nano-12b-v2-vl:free)
- [nvidia/nemotron-nano-9b-v2:free](https://openrouter.ai/nvidia/nemotron-nano-9b-v2:free)
- [openai/gpt-oss-120b:free](https://openrouter.ai/openai/gpt-oss-120b:free)
- [openai/gpt-oss-20b:free](https://openrouter.ai/openai/gpt-oss-20b:free)
- [qwen/qwen3-4b:free](https://openrouter.ai/qwen/qwen3-4b:free)
- [qwen/qwen3-coder:free](https://openrouter.ai/qwen/qwen3-coder:free)
- [qwen/qwen3-next-80b-a3b-instruct:free](https://openrouter.ai/qwen/qwen3-next-80b-a3b-instruct:free)
- [stepfun/step-3.5-flash:free](https://openrouter.ai/stepfun/step-3.5-flash:free)
- [z-ai/glm-4.5-air:free](https://openrouter.ai/z-ai/glm-4.5-air:free)

### [Google AI Studio](https://aistudio.google.com)

Data is used for training when used outside of the UK/CH/EEA/EU.

<table><thead><tr><th>Model Name</th><th>Model Limits</th></tr></thead><tbody>
<tr><td>Gemini 3 Flash</td><td>250,000 tokens/minute<br>20 requests/day<br>5 requests/minute</td></tr>
<tr><td>Gemini 3.1 Flash-Lite</td><td>250,000 tokens/minute<br>500 requests/day<br>15 requests/minute</td></tr>
<tr><td>Gemini 2.5 Flash</td><td>250,000 tokens/minute<br>20 requests/day<br>5 requests/minute</td></tr>
<tr><td>Gemini 2.5 Flash-Lite</td><td>250,000 tokens/minute<br>20 requests/day<br>10 requests/minute</td></tr>
<tr><td>Gemma 3 27B Instruct</td><td>15,000 tokens/minute<br>14,400 requests/day<br>30 requests/minute</td></tr>
<tr><td>Gemma 3 12B Instruct</td><td>15,000 tokens/minute<br>14,400 requests/day<br>30 requests/minute</td></tr>
<tr><td>Gemma 3 4B Instruct</td><td>15,000 tokens/minute<br>14,400 requests/day<br>30 requests/minute</td></tr>
<tr><td>Gemma 3 1B Instruct</td><td>15,000 tokens/minute<br>14,400 requests/day<br>30 requests/minute</td></tr>
</tbody></table>

### [NVIDIA NIM](https://build.nvidia.com/explore/discover)

Phone number verification required.
Models tend to be context window limited.

**Limits:** 40 requests/minute

- [Various open models](https://build.nvidia.com/models)

### [Mistral (La Plateforme)](https://console.mistral.ai/)

* Free tier (Experiment plan) requires opting into data training
* Requires phone number verification.

**Limits (per-model):** 1 request/second, 500,000 tokens/minute, 1,000,000,000 tokens/month

- [Open and Proprietary Mistral models](https://docs.mistral.ai/getting-started/models/models_overview/)

### [Mistral (Codestral)](https://codestral.mistral.ai/)

* Currently free to use
* Monthly subscription based
* Requires phone number verification

**Limits:** 30 requests/minute, 2,000 requests/day

- Codestral

### [HuggingFace Inference Providers](https://huggingface.co/docs/inference-providers/en/index)

HuggingFace Serverless Inference limited to models smaller than 10GB. Some popular models are supported even if they exceed 10GB.

**Limits:** [$0.10/month in credits](https://huggingface.co/docs/inference-providers/en/pricing)

- Various open models across supported providers

### [Vercel AI Gateway](https://vercel.com/docs/ai-gateway)

Routes to various supported providers.

**Limits:** [$5/month](https://vercel.com/docs/ai-gateway/pricing)


### [OpenCode Zen](https://opencode.ai/docs/zen/)

AI gateway with curated models.

Free models may use data for improvement.

- Big Pickle Stealth
- MiniMax M2.5 Free
- Arcee Large Preview Free

### [Cerebras](https://cloud.cerebras.ai/)

<table><thead><tr><th>Model Name</th><th>Model Limits</th></tr></thead><tbody>
<tr><td>gpt-oss-120b</td><td>30 requests/minute<br>60,000 tokens/minute<br>900 requests/hour<br>1,000,000 tokens/hour<br>14,400 requests/day<br>1,000,000 tokens/day</td></tr>
<tr><td>Llama 3.1 8B</td><td>30 requests/minute<br>60,000 tokens/minute<br>900 requests/hour<br>1,000,000 tokens/hour<br>14,400 requests/day<br>1,000,000 tokens/day</td></tr>
</tbody></table>

### [Groq](https://console.groq.com)

<table><thead><tr><th>Model Name</th><th>Model Limits</th></tr></thead><tbody>
<tr><td>Allam 2 7B</td><td>7,000 requests/day<br>6,000 tokens/minute</td></tr>
<tr><td>Llama 3.1 8B</td><td>14,400 requests/day<br>6,000 tokens/minute</td></tr>
<tr><td>Llama 3.3 70B</td><td>1,000 requests/day<br>12,000 tokens/minute</td></tr>
<tr><td>Llama 4 Maverick 17B 128E Instruct</td><td>1,000 requests/day<br>6,000 tokens/minute</td></tr>
<tr><td>Llama 4 Scout Instruct</td><td>1,000 requests/day<br>30,000 tokens/minute</td></tr>
<tr><td>Whisper Large v3</td><td>7,200 audio-seconds/minute<br>2,000 requests/day</td></tr>
<tr><td>Whisper Large v3 Turbo</td><td>7,200 audio-seconds/minute<br>2,000 requests/day</td></tr>
<tr><td>canopylabs/orpheus-arabic-saudi</td><td></td></tr>
<tr><td>canopylabs/orpheus-v1-english</td><td></td></tr>
<tr><td>groq/compound</td><td>250 requests/day<br>70,000 tokens/minute</td></tr>
<tr><td>groq/compound-mini</td><td>250 requests/day<br>70,000 tokens/minute</td></tr>
<tr><td>meta-llama/llama-guard-4-12b</td><td>14,400 requests/day<br>15,000 tokens/minute</td></tr>
<tr><td>meta-llama/llama-prompt-guard-2-22m</td><td></td></tr>
<tr><td>meta-llama/llama-prompt-guard-2-86m</td><td></td></tr>
<tr><td>moonshotai/kimi-k2-instruct</td><td>1,000 requests/day<br>10,000 tokens/minute</td></tr>
<tr><td>moonshotai/kimi-k2-instruct-0905</td><td>1,000 requests/day<br>10,000 tokens/minute</td></tr>
<tr><td>openai/gpt-oss-120b</td><td>1,000 requests/day<br>8,000 tokens/minute</td></tr>
<tr><td>openai/gpt-oss-20b</td><td>1,000 requests/day<br>8,000 tokens/minute</td></tr>
<tr><td>openai/gpt-oss-safeguard-20b</td><td>1,000 requests/day<br>8,000 tokens/minute</td></tr>
<tr><td>qwen/qwen3-32b</td><td>1,000 requests/day<br>6,000 tokens/minute</td></tr>
</tbody></table>

### [Cohere](https://cohere.com)

**Limits:**

[20 requests/minute<br>1,000 requests/month](https://docs.cohere.com/docs/rate-limits)

Models share a common monthly quota.

- c4ai-aya-expanse-32b
- c4ai-aya-vision-32b
- command-a-03-2025
- command-a-reasoning-08-2025
- command-a-translate-08-2025
- command-a-vision-07-2025
- command-r-08-2024
- command-r-plus-08-2024
- command-r7b-12-2024
- command-r7b-arabic-02-2025
- tiny-aya-earth
- tiny-aya-fire
- tiny-aya-global
- tiny-aya-water

### [GitHub Models](https://github.com/marketplace/models)

Extremely restrictive input/output token limits.

**Limits:** [Dependent on Copilot subscription tier (Free/Pro/Pro+/Business/Enterprise)](https://docs.github.com/en/github-models/prototyping-with-ai-models#rate-limits)

- AI21 Jamba 1.5 Large
- Codestral 25.01
- Cohere Command A
- Cohere Command R 08-2024
- Cohere Command R+ 08-2024
- DeepSeek-R1
- DeepSeek-R1-0528
- DeepSeek-V3-0324
- Grok 3
- Grok 3 Mini
- Llama 4 Maverick 17B 128E Instruct FP8
- Llama 4 Scout 17B 16E Instruct
- Llama-3.2-11B-Vision-Instruct
- Llama-3.2-90B-Vision-Instruct
- Llama-3.3-70B-Instruct
- MAI-DS-R1
- Meta-Llama-3.1-405B-Instruct
- Meta-Llama-3.1-8B-Instruct
- Ministral 3B
- Mistral Medium 3 (25.05)
- Mistral Small 3.1
- OpenAI GPT-4.1
- OpenAI GPT-4.1-mini
- OpenAI GPT-4.1-nano
- OpenAI GPT-4o
- OpenAI GPT-4o mini
- OpenAI Text Embedding 3 (large)
- OpenAI Text Embedding 3 (small)
- OpenAI gpt-5
- OpenAI gpt-5-chat (preview)
- OpenAI gpt-5-mini
- OpenAI gpt-5-nano
- OpenAI o1
- OpenAI o1-mini
- OpenAI o1-preview
- OpenAI o3
- OpenAI o3-mini
- OpenAI o4-mini
- Phi-4
- Phi-4-mini-instruct
- Phi-4-mini-reasoning
- Phi-4-multimodal-instruct
- Phi-4-reasoning

### [Cloudflare Workers AI](https://developers.cloudflare.com/workers-ai)

**Limits:** [10,000 neurons/day](https://developers.cloudflare.com/workers-ai/platform/pricing/#free-allocation)

- @cf/aisingapore/gemma-sea-lion-v4-27b-it
- @cf/ibm-granite/granite-4.0-h-micro
- @cf/openai/gpt-oss-120b
- @cf/openai/gpt-oss-20b
- @cf/qwen/qwen3-30b-a3b-fp8
- @cf/zai-org/glm-4.7-flash
- DeepSeek R1 Distill Qwen 32B
- Deepseek Coder 6.7B Base (AWQ)
- Deepseek Coder 6.7B Instruct (AWQ)
- Deepseek Math 7B Instruct
- Discolm German 7B v1 (AWQ)
- Falcom 7B Instruct
- Gemma 2B Instruct (LoRA)
- Gemma 3 12B Instruct
- Gemma 7B Instruct
- Gemma 7B Instruct (LoRA)
- Hermes 2 Pro Mistral 7B
- Llama 2 13B Chat (AWQ)
- Llama 2 7B Chat (FP16)
- Llama 2 7B Chat (INT8)
- Llama 2 7B Chat (LoRA)
- Llama 3 8B Instruct
- Llama 3 8B Instruct (AWQ)
- Llama 3.1 8B Instruct (AWQ)
- Llama 3.1 8B Instruct (FP8)
- Llama 3.2 11B Vision Instruct
- Llama 3.2 1B Instruct
- Llama 3.2 3B Instruct
- Llama 3.3 70B Instruct (FP8)
- Llama 4 Scout Instruct
- Llama Guard 3 8B
- Mistral 7B Instruct v0.1
- Mistral 7B Instruct v0.1 (AWQ)
- Mistral 7B Instruct v0.2
- Mistral 7B Instruct v0.2 (LoRA)
- Mistral Small 3.1 24B Instruct
- Neural Chat 7B v3.1 (AWQ)
- OpenChat 3.5 0106
- OpenHermes 2.5 Mistral 7B (AWQ)
- Phi-2
- Qwen 1.5 0.5B Chat
- Qwen 1.5 1.8B Chat
- Qwen 1.5 14B Chat (AWQ)
- Qwen 1.5 7B Chat (AWQ)
- Qwen 2.5 Coder 32B Instruct
- Qwen QwQ 32B
- SQLCoder 7B 2
- Starling LM 7B Beta
- TinyLlama 1.1B Chat v1.0
- Una Cybertron 7B v2 (BF16)
- Zephyr 7B Beta (AWQ)

</tbody></table>


## Providers with trial credits

### [Fireworks](https://fireworks.ai/)

**Credits:** $1

**Models:** [Various open models](https://fireworks.ai/models)

### [Baseten](https://app.baseten.co/)

**Credits:** $30

**Models:** [Any supported model - pay by compute time](https://www.baseten.co/library/)

### [Nebius](https://tokenfactory.nebius.com/)

**Credits:** $1

**Models:** [Various open models](https://tokenfactory.nebius.com/models)

### [Novita](https://novita.ai/?ref=ytblmjc&utm_source=affiliate)

**Credits:** $0.5 for 1 year

**Models:** [Various open models](https://novita.ai/models)

### [AI21](https://studio.ai21.com/)

**Credits:** $10 for 3 months

**Models:** Jamba family of models

### [Upstage](https://console.upstage.ai/)

**Credits:** $10 for 3 months

**Models:** Solar Pro/Mini

### [NLP Cloud](https://nlpcloud.com/home)

**Credits:** $15

**Requirements:** Phone number verification

**Models:** Various open models

### [Alibaba Cloud (International) Model Studio](https://bailian.console.alibabacloud.com/)

**Credits:** 1 million tokens/model

**Models:** [Various open and proprietary Qwen models](https://www.alibabacloud.com/en/product/modelstudio)

### [Modal](https://modal.com)

**Credits:** $5/month upon sign up, $30/month with payment method added

**Models:** Any supported model - pay by compute time

### [Inference.net](https://inference.net)

**Credits:** $1, $25 on responding to email survey

**Models:** Various open models

### [Hyperbolic](https://app.hyperbolic.ai/)

**Credits:** $1

**Models:**
- DeepSeek V3
- DeepSeek V3 0324
- Llama 3.1 405B Base
- Llama 3.1 405B Instruct
- Llama 3.1 8B Instruct
- Llama 3.2 3B Instruct
- Llama 3.3 70B Instruct
- Pixtral 12B (2409)
- Qwen QwQ 32B
- Qwen2.5 72B Instruct
- Qwen2.5 Coder 32B Instruct
- Qwen2.5 VL 72B Instruct
- Qwen2.5 VL 7B Instruct
- deepseek-ai/deepseek-r1-0528
- openai/gpt-oss-120b
- openai/gpt-oss-120b-turbo
- openai/gpt-oss-20b
- qwen/qwen3-235b-a22b
- qwen/qwen3-235b-a22b-instruct-2507
- qwen/qwen3-coder-480b-a35b-instruct
- qwen/qwen3-next-80b-a3b-instruct
- qwen/qwen3-next-80b-a3b-thinking

### [SambaNova Cloud](https://cloud.sambanova.ai/)

**Credits:** $5 for 3 months

**Models:**
- E5-Mistral-7B-Instruct
- Llama 3.1 8B
- Llama 3.3 70B
- Llama 3.3 70B
- Llama-4-Maverick-17B-128E-Instruct
- Qwen/Qwen3-235B
- Qwen/Qwen3-32B
- Whisper-Large-v3
- deepseek-ai/DeepSeek-R1-0528
- deepseek-ai/DeepSeek-R1-Distill-Llama-70B
- deepseek-ai/DeepSeek-V3-0324
- deepseek-ai/DeepSeek-V3.1
- deepseek-ai/DeepSeek-V3.1-Terminus
- deepseek-ai/DeepSeek-V3.2
- minimaxai/minimax-m2.5
- openai/gpt-oss-120b
- tbd

### [Scaleway Generative APIs](https://console.scaleway.com/generative-api/models)

**Credits:** 1,000,000 free tokens

**Models:**
- BGE-Multilingual-Gemma2
- DeepSeek R1 Distill Llama 70B
- Gemma 3 27B Instruct
- Llama 3.1 8B Instruct
- Llama 3.3 70B Instruct
- Mistral Nemo 2407
- Pixtral 12B (2409)
- Whisper Large v3
- devstral-2-123b-instruct-2512
- gpt-oss-120b
- holo2-30b-a3b
- mistral-small-3.2-24b-instruct-2506
- qwen3-235b-a22b-instruct-2507
- qwen3-coder-30b-a3b-instruct
- qwen3-embedding-8b
- voxtral-small-24b-2507


================================================
FILE: src/README_template.md
================================================
# Free LLM API resources

This lists various services that provide free access or credits towards API-based LLM usage.

> [!NOTE]  
> Please don't abuse these services, else we might lose them.

> [!WARNING]  
> This list explicitly excludes any services that are not legitimate (eg reverse engineers an existing chatbot)

{{TOC}}

## Free Providers

{{MODEL_LIST}}

## Providers with trial credits

{{TRIAL_LIST_MARKDOWN}}


================================================
FILE: src/data.py
================================================
MODEL_TO_NAME_MAPPING = {
    "@cf/deepseek-ai/deepseek-math-7b-instruct": "Deepseek Math 7B Instruct",
    "@cf/defog/sqlcoder-7b-2": "SQLCoder 7B 2",
    "@cf/fblgit/una-cybertron-7b-v2-bf16": "Una Cybertron 7B v2 (BF16)",
    "@cf/google/gemma-2b-it-lora": "Gemma 2B Instruct (LoRA)",
    "@cf/google/gemma-7b-it-lora": "Gemma 7B Instruct (LoRA)",
    "@cf/meta-llama/llama-2-7b-chat-hf-lora": "Llama 2 7B Chat (LoRA)",
    "@cf/meta/llama-2-7b-chat-fp16": "Llama 2 7B Chat (FP16)",
    "@cf/meta/llama-2-7b-chat-int8": "Llama 2 7B Chat (INT8)",
    "@cf/meta/llama-3-8b-instruct-awq": "Llama 3 8B Instruct (AWQ)",
    "@cf/meta/llama-3-8b-instruct": "Llama 3 8B Instruct",
    "@cf/meta/llama-3.1-8b-instruct-awq": "Llama 3.1 8B Instruct (AWQ)",
    "@cf/meta/llama-3.1-8b-instruct-fp8": "Llama 3.1 8B Instruct (FP8)",
    "@cf/meta/llama-3.1-8b-instruct": "Llama 3.1 8B Instruct",
    "@cf/microsoft/phi-2": "Phi-2",
    "@cf/mistral/mistral-7b-instruct-v0.1-vllm": "Mistral 7B Instruct v0.1",
    "@cf/mistral/mistral-7b-instruct-v0.1": "Mistral 7B Instruct v0.1",
    "@cf/mistral/mistral-7b-instruct-v0.2-lora": "Mistral 7B Instruct v0.2 (LoRA)",
    "@cf/openchat/openchat-3.5-0106": "OpenChat 3.5 0106",
    "@cf/qwen/qwen1.5-0.5b-chat": "Qwen 1.5 0.5B Chat",
    "@cf/qwen/qwen1.5-1.8b-chat": "Qwen 1.5 1.8B Chat",
    "@cf/qwen/qwen1.5-14b-chat-awq": "Qwen 1.5 14B Chat (AWQ)",
    "@cf/qwen/qwen1.5-7b-chat-awq": "Qwen 1.5 7B Chat (AWQ)",
    "@cf/thebloke/discolm-german-7b-v1-awq": "Discolm German 7B v1 (AWQ)",
    "@cf/tiiuae/falcon-7b-instruct": "Falcom 7B Instruct",
    "@cf/tinyllama/tinyllama-1.1b-chat-v1.0": "TinyLlama 1.1B Chat v1.0",
    "@hf/google/gemma-7b-it": "Gemma 7B Instruct",
    "@hf/meta-llama/meta-llama-3-8b-instruct": "Llama 3 8B Instruct",
    "@hf/mistral/mistral-7b-instruct-v0.2": "Mistral 7B Instruct v0.2",
    "@hf/nexusflow/starling-lm-7b-beta": "Starling LM 7B Beta",
    "@hf/nousresearch/hermes-2-pro-mistral-7b": "Hermes 2 Pro Mistral 7B",
    "@hf/thebloke/deepseek-coder-6.7b-base-awq": "Deepseek Coder 6.7B Base (AWQ)",
    "@hf/thebloke/deepseek-coder-6.7b-instruct-awq": "Deepseek Coder 6.7B Instruct (AWQ)",
    "@hf/thebloke/llama-2-13b-chat-awq": "Llama 2 13B Chat (AWQ)",
    "@hf/thebloke/llamaguard-7b-awq": "LlamaGuard 7B (AWQ)",
    "@hf/thebloke/mistral-7b-instruct-v0.1-awq": "Mistral 7B Instruct v0.1 (AWQ)",
    "@hf/thebloke/neural-chat-7b-v3-1-awq": "Neural Chat 7B v3.1 (AWQ)",
    "@hf/thebloke/openhermes-2.5-mistral-7b-awq": "OpenHermes 2.5 Mistral 7B (AWQ)",
    "@hf/thebloke/zephyr-7b-beta-awq": "Zephyr 7B Beta (AWQ)",
    "codellama-13b-instruct-hf": "CodeLlama 13B Instruct",
    "distil-whisper-large-v3-en": "Distil Whisper Large v3",
    "gemma-7b-it": "Gemma 7B Instruct (Deprecated)",
    "gemma2-9b-it": "Gemma 2 9B Instruct",
    "google/gemma-2-9b-it:free": "Gemma 2 9B Instruct",
    "google/gemma-7b-it:free": "Gemma 7B Instruct",
    "gryphe/mythomist-7b:free": "Mythomist 7B",
    "huggingfaceh4/zephyr-7b-beta:free": "Zephyr 7B Beta",
    "llama-2-13b-chat-hf": "Llama 2 13B Chat",
    "llama-3-70b-instruct": "Llama 3 70B Instruct",
    "llama-3-8b-instruct": "Llama 3 8B Instruct",
    "llama-3.1-405b-reasoning": "Llama 3.1 405B",
    "llama-3.1-70b-versatile": "Llama 3.1 70B",
    "llama-3.1-8b-instant": "Llama 3.1 8B",
    "llama-guard-3-8b": "Llama Guard 3 8B",
    "llama3-70b-8192": "Llama 3 70B",
    "llama3-8b-8192": "Llama 3 8B",
    "llama3-groq-70b-8192-tool-use-preview": "Llama 3 70B - Groq Tool Use Preview",
    "llama3-groq-8b-8192-tool-use-preview": "Llama 3 8B - Groq Tool Use Preview",
    "meta-llama/llama-3-8b-instruct:free": "Llama 3 8B Instruct",
    "meta-llama/llama-3.1-8b-instruct:free": "Llama 3.1 8B Instruct",
    "meta-llama/meta-llama-3-70b-instruct": "Llama 3 70B Instruct",
    "meta-llama/meta-llama-3.1-405b": "Llama 3.1 405B Base",
    "meta-llama/meta-llama-3.1-405b-fp8": "Llama 3.1 405B Base (FP8)",
    "meta-llama/meta-llama-3.1-405b-instruct": "Llama 3.1 405B Instruct",
    "meta-llama/meta-llama-3.1-70b-instruct": "Llama 3.1 70B Instruct",
    "meta-llama/meta-llama-3.1-8b-instruct": "Llama 3.1 8B Instruct",
    "microsoft/phi-3-medium-128k-instruct:free": "Phi-3 Medium 128k Instruct",
    "microsoft/phi-3-mini-128k-instruct:free": "Phi-3 Mini 128k Instruct",
    "mistral-7b-instruct": "Mistral 7B Instruct",
    "mistralai/mistral-7b-instruct:free": "Mistral 7B Instruct",
    "mixtral-8x22b-instruct": "Mixtral 8x22B Instruct",
    "mixtral-8x7b-32768": "Mixtral 8x7B",
    "mixtral-8x7b-instruct": "Mixtral 8x7B Instruct",
    "nousresearch/hermes-3-llama-3.1-70b": "Hermes 3 Llama 3.1 70B",
    "nousresearch/nous-capybara-7b:free": "Nous Capybara 7B",
    "openchat/openchat-7b:free": "OpenChat 7B",
    "qwen/qwen-2-7b-instruct:free": "Qwen 2 7B Instruct",
    "qwen/qwen2-72b-instruct": "Qwen 2 72B Instruct",
    "undi95/toppy-m-7b:free": "Toppy M 7B",
    "whisper-large-v3": "Whisper Large v3",
    "whisper-large-v3-turbo": "Whisper Large v3 Turbo",
    "01-ai/yi-34b-chat": "Yi 34B Chat",
    "01-ai/yi-1.5-34b-chat": "Yi 1.5 34B Chat",
    "nousresearch/hermes-3-llama-3.1-70b-fp8": "Hermes 3 Llama 3.1 70B (FP8)",
    "nousresearch/hermes-3-llama-3.1-405b:free": "Hermes 3 Llama 3.1 405B",
    "llava-v1.5-7b-4096-preview": "LLaVA 1.5 7B",
    "mattshumer/reflection-llama-3.1-70b": "Reflection Llama 3.1 70B",
    "mattshumer/reflection-70b:free": "Reflection Llama 3.1 70B",
    "mattshumer/reflection-llama-3.1-70b-completions": "Reflection Llama 3.1 70B Completions",
    "deepseek-ai/deepseek-v2.5": "DeepSeek V2.5",
    "mistralai/pixtral-12b-2409": "Pixtral 12B (2409)",
    "qwen/qwen2-vl-7b-instruct": "Qwen2-VL 7B Instruct",
    "mistralai/pixtral-12b:free": "Pixtral 12B",
    "qwen/qwen-2-vl-7b-instruct:free": "Qwen2-VL 7B Instruct",
    "qwen/qwen2-vl-72b-instruct": "Qwen2-VL 72B Instruct",
    "qwen/qwen2.5-72b-instruct": "Qwen2.5 72B Instruct",
    "llama-3.2-90b-text-preview": "Llama 3.2 90B (Text Only)",
    "llama-3.2-3b-preview": "Llama 3.2 3B",
    "llama-3.2-11b-text-preview": "Llama 3.2 11B (Text Only)",
    "llama-3.2-1b-preview": "Llama 3.2 1B",
    "@cf/meta/llama-3.2-1b-instruct": "Llama 3.2 1B Instruct",
    "meta-llama/llama-3.2-11b-vision-instruct:free": "Llama 3.2 11B Vision Instruct",
    "@cf/meta/llama-3.2-11b-vision-instruct": "Llama 3.2 11B Vision Instruct",
    "@cf/meta/llama-3.2-3b-instruct": "Llama 3.2 3B Instruct",
    "meta-llama/llama-3.2-90b-vision-instruct": "Llama 3.2 90B Vision Instruct",
    "meta-llama/llama-3.2-3b-instruct": "Llama 3.2 3B Instruct",
    "llama-3.2-11b-vision-preview": "Llama 3.2 11B Vision",
    "llama-3.2-90b-vision-preview": "Llama 3.2 90B Vision",
    "meta-llama/llama-3.2-90b-vision": "Llama 3.2 90B Vision",
    "meta-llama/llama-3.1-70b-instruct:free": "Llama 3.1 70B Instruct",
    "meta-llama/llama-3.2-1b-instruct:free": "Llama 3.2 1B Instruct",
    "liquid/lfm-40b:free": "Liquid LFM 40B",
    "meta-llama/llama-3.2-3b-instruct:free": "Llama 3.2 3B Instruct",
    "meta-llama/llama-3.1-405b-instruct:free": "Llama 3.1 405B Instruct",
    "mathstral-7b-v0.1": "Mathstral 7B v0.1",
    "llama-3.1-70b-instruct": "Llama 3.1 70B Instruct",
    "gryphe/mythomax-l2-13b:free": "Mythomax L2 13B",
    "meta-llama/llama-3.2-90b-vision-instruct:free": "Llama 3.2 90B Vision Instruct",
    "mamba-codestral-7b-v0-1": "Codestral Mamba 7B v0.1",
    "hermes3-70b": "Hermes 3 70B",
    "llama3.1-nemotron-70b-instruct": "Llama 3.1 Nemotron 70B Instruct",
    "llama3.2-3b-instruct": "Llama 3.2 3B Instruct",
    "llama3.1-8b-instruct": "Llama 3.1 8B Instruct",
    "llama3.1-70b-instruct-fp8": "Llama 3.1 70B Instruct (FP8)",
    "llama3.1-405b-instruct-fp8": "Llama 3.1 405B Instruct (FP8)",
    "hermes3-405b": "Hermes 3 405B",
    "deepseek-coder-v2-lite-instruct": "DeepSeek Coder v2 Lite Instruct",
    "hermes3-8b": "Hermes 3 8B",
    "dracarys2-72b-instruct": "Dracarys 2 72B Instruct",
    "lfm-40b": "Liquid LFM 40B",
    "qwen/qwen2.5-coder-32b-instruct": "Qwen2.5 Coder 32B Instruct",
    "thedrummer/unslopnemo-12b:free": "UnslopNemo 12B",
    "mistral-nemo-instruct-2407": "Mistral Nemo 2407",
    "google/gemini-exp-1121:free": "Gemini Experimental 1121",
    "meta-llama/llama-3.1-70b-instruct-fp8": "Llama 3.1 70B Instruct (FP8)",
    "google/learnlm-1.5-pro-experimental:free": "LearnLM 1.5 Pro Experimental",
    "google/gemini-exp-1114:free": "Gemini Experimental 1114",
    "qwen25-coder-32b-instruct": "Qwen2.5 Coder 32B Instruct",
    "qwen/qwq-32b-preview": "Qwen QwQ 32B Preview",
    "meta-llama/llama-3.3-70b-instruct": "Llama 3.3 70B Instruct",
    "llama-3.3-70b-versatile": "Llama 3.3 70B",
    "google/gemini-exp-1206:free": "Gemini Experimental 1206",
    "llama3.1-nemotron-70b-instruct-fp8": "Llama 3.1 Nemotron 70B Instruct (FP8)",
    "llama-3.3-70b-specdec": "Llama 3.3 70B (Speculative Decoding)",
    "@cf/meta/llama-3.3-70b-instruct-fp8-fast": "Llama 3.3 70B Instruct (FP8)",
    "google/gemini-2.0-flash-exp:free": "Gemini 2.0 Flash Experimental",
    "qwen2.5-coder-32b-instruct": "Qwen2.5 Coder 32B Instruct",
    "bge-multilingual-gemma2": "BGE-Multilingual-Gemma2",
    "pixtral-12b-2409": "Pixtral 12B (2409)",
    "google/gemini-2.0-flash-thinking-exp:free": "Gemini 2.0 Flash Thinking Experimental",
    "sentence-t5-xxl": "sentence-t5-xxl",
    "meta-llama/meta-llama-3.1-405b-instruct-virtuals": "Llama 3.1 405B Instruct Virtuals",
    "llama-3.1-8b-instruct": "Llama 3.1 8B Instruct",
    "deepseek-ai/deepseek-v3": "DeepSeek V3",
    "llava-next-mistral-7b": "Llava Next Mistral 7B",
    "llama-3.3-70b-instruct": "Llama 3.3 70B Instruct",
    "google/gemini-2.0-flash-thinking-exp-1219:free": "Gemini 2.0 Flash Thinking Experimental 1219",
    "sophosympatheia/rogue-rose-103b-v0.2:free": "Rogue Rose 103B v0.2",
    "deepseek-ai/deepseek-r1": "DeepSeek R1",
    "deepseek-ai/deepseek-r1-zero": "DeepSeek R1-Zero",
    "deepseek/deepseek-r1:free": "DeepSeek R1",
    "deepseek-r1-distill-llama-70b": "DeepSeek R1 Distill Llama 70B",
    "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b": "DeepSeek R1 Distill Qwen 32B",
    "deepseek-ai/janus-pro-7b": "DeepSeek Janus Pro 7B",
    "deepseek-r1-distill-llama-8b": "DeepSeek R1 Distill Llama 8B",
    "nvidia/llama-3.1-nemotron-70b-instruct:free": "Llama 3.1 Nemotron 70B Instruct",
    "deepseek/deepseek-r1-distill-llama-70b:free": "DeepSeek R1 Distill Llama 70B",
    "qwen/qwen2.5-vl-72b-instruct:free": "Qwen2.5 VL 72B Instruct",
    "google/gemini-2.0-flash-lite-preview-02-05:free": "Gemini 2.0 Flash Lite Preview 02-05",
    "qwen/qwen-vl-plus:free": "Qwen VL Plus",
    "google/gemini-2.0-pro-exp-02-05:free": "Gemini 2.0 Pro Experimental 02-05",
    "deepseek-r1": "DeepSeek R1",
    "meta-llama/llama-3.3-70b-instruct:free": "Llama 3.3 70B Instruct",
    "deepseek/deepseek-chat:free": "DeepSeek V3",
    "deepseek-r1-distill-qwen-32b": "DeepSeek R1 Distill Qwen 32B",
    "mistralai/mistral-nemo:free": "Mistral Nemo",
    "allam-2-7b": "Allam 2 7B",
    "mistralai/mistral-small-24b-instruct-2501:free": "Mistral Small 24B Instruct 2501",
    "qwen-2.5-32b": "Qwen 2.5 32B",
    "cognitivecomputations/dolphin3.0-r1-mistral-24b:free": "Dolphin 3.0 R1 Mistral 24B",
    "qwen-2.5-coder-32b": "Qwen 2.5 Coder 32B",
    "cognitivecomputations/dolphin3.0-mistral-24b:free": "Dolphin 3.0 Mistral 24B",
    "deepseek-r1-671b": "DeepSeek R1",
    "@cf/meta/llama-guard-3-8b": "Llama Guard 3 8B",
    "mistral-saba-24b": "Mistral Saba 24B",
    "deepseek/deepseek-r1-zero:free": "DeepSeek R1 Zero",
    "nousresearch/deephermes-3-llama-3-8b-preview:free": "DeepHermes 3 Llama 3 8B Preview",
    "qwen-qwq-32b": "Qwen QwQ 32B",
    "qwen/qwq-32b": "Qwen QwQ 32B",
    "qwen/qwq-32b:free": "Qwen QwQ 32B",
    "qwen/qwen2.5-vl-7b-instruct": "Qwen2.5 VL 7B Instruct",
    "qwen/qwen-2.5-coder-32b-instruct:free": "Qwen2.5 Coder 32B Instruct",
    "mistral-7b-instruct-v0.3": "Mistral 7B Instruct v0.3",
    "moonshotai/moonlight-16b-a3b-instruct:free": "Moonlight-16B-A3B-Instruct",
    "google/gemma-3-27b-it:free": "Gemma 3 27B Instruct",
    "qwen/qwen-2.5-72b-instruct:free": "Qwen 2.5 72B Instruct",
    "rekaai/reka-flash-3:free": "Reka Flash 3",
    "deepseek/deepseek-r1-distill-qwen-32b:free": "DeepSeek R1 Distill Qwen 32B",
    "deepseek/deepseek-r1-distill-qwen-14b:free": "DeepSeek R1 Distill Qwen 14B",
    "qwen/qwen2.5-vl-72b-instruct": "Qwen2.5 VL 72B Instruct",
    "qwen/qwq-32b-preview:free": "Qwen QwQ 32B Preview",
    "google/gemma-3-12b-it:free": "Gemma 3 12B Instruct",
    "google/gemma-3-1b-it:free": "Gemma 3 1B Instruct",
    "google/gemma-3-4b-it:free": "Gemma 3 4B Instruct",
    "open-r1/olympiccoder-32b:free": "OlympicCoder 32B",
    "open-r1/olympiccoder-7b:free": "OlympicCoder 7B",
    "featherless/qwerky-72b:free": "Featherless Qwerky 72B",
    "qwen/qwen2.5-vl-32b-instruct:free": "Qwen 2.5 VL 32B Instruct",
    "deepseek/deepseek-chat-v3-0324:free": "DeepSeek V3 0324",
    "qwen/qwen-2.5-vl-7b-instruct:free": "Qwen 2.5 VL 7B Instruct",
    "deepseek-ai/deepseek-v3-0324": "DeepSeek V3 0324",
    "allenai/molmo-7b-d:free": "Molmo 7B D",
    "qwen/qwen2.5-vl-3b-instruct:free": "Qwen 2.5 VL 3B Instruct",
    "google/gemini-2.5-pro-exp-03-25:free": "Gemini 2.5 Pro Experimental 03-25",
    "mistralai/mistral-small-3.1-24b-instruct:free": "Mistral Small 3.1 24B Instruct",
    "bytedance-research/ui-tars-72b:free": "Bytedance UI Tars 72B",
    "meta-llama-3_3-70b-instruct": "Llama 3.3 70B Instruct",
    "mixtral-8x7b-instruct-v0.1": "Mixtral 8x7B Instruct v0.1",
    "deepseek/deepseek-v3-base:free": "DeepSeek V3 Base",
    "qwen2.5-vl-72b-instruct": "Qwen 2.5 VL 72B Instruct",
    "meta-llama-3_1-70b-instruct": "Llama 3.1 70B Instruct",
    "qwen/qwen-2.5-7b-instruct:free": "Qwen 2.5 7B Instruct",
    "mamba-codestral-7b-v0.1": "Mamba Codestral 7B v0.1",
    "meta-llama/llama-4-scout-17b-16e-instruct": "Llama 4 Scout Instruct",
    "@cf/meta/llama-4-scout-17b-16e-instruct": "Llama 4 Scout Instruct",
    "meta-llama/llama-4-scout:free": "Llama 4 Scout",
    "meta-llama/llama-4-maverick:free": "Llama 4 Maverick",
    "rekaai/reka-flash-3": "Reka Flash 3",
    "cognitivecomputations/dolphin3.0-mistral-24b": "Dolphin 3.0 Mistral 24B",
    "unsloth/gemma-3-12b-it": "Gemma 3 12B Instruct",
    "chutesai/llama-4-maverick-17b-128e-instruct-fp8": "Llama 4 Maverick 17B 128E Instruct FP8",
    "unsloth/gemma-3-1b-it": "Gemma 3 1B Instruct",
    "deepseek-ai/deepseek-v3-base": "DeepSeek V3 Base",
    "unsloth/gemma-3-4b-it": "Gemma 3 4B Instruct",
    "open-r1/olympiccoder-32b": "OlympicCoder 32B",
    "chutesai/llama-4-scout-17b-16e-instruct": "Llama 4 Scout 17B 16E Instruct",
    "cognitivecomputations/dolphin3.0-r1-mistral-24b": "Dolphin 3.0 R1 Mistral 24B",
    "open-r1/olympiccoder-7b": "OlympicCoder 7B",
    "nousresearch/deephermes-3-llama-3-8b-preview": "DeepHermes 3 Llama 3 8B Preview",
    "chutesai/mistral-small-3.1-24b-instruct-2503": "Mistral Small 3.1 24B Instruct 2503",
    "qwen/qwen2.5-vl-32b-instruct": "Qwen 2.5 VL 32B Instruct",
    "nvidia/llama-3_1-nemotron-ultra-253b-v1": "Llama 3.1 Nemotron Ultra 253B v1",
    "nvidia/llama-3.1-nemotron-ultra-253b-v1:free": "Llama 3.1 Nemotron Ultra 253B v1",
    "nvidia/llama-3.1-nemotron-nano-8b-v1": "Llama 3.1 Nemotron Nano 8B v1",
    "mistral-small-3.1-24b-instruct-2503": "Mistral Small 3.1 24B Instruct 2503",
    "nvidia/llama-3_3-nemotron-super-49b-v1": "Llama 3.3 Nemotron Super 49B v1",
    "gemma-3-27b-it": "Gemma 3 27B Instruct",
    "nvidia/llama-3.3-nemotron-super-49b-v1:free": "Llama 3.3 Nemotron Super 49B v1",
    "nvidia/llama-3.1-nemotron-nano-8b-v1:free": "Llama 3.1 Nemotron Nano 8B v1",
    "meta-llama/llama-4-maverick-17b-128e-instruct": "Llama 4 Maverick 17B 128E Instruct",
    "moonshotai/kimi-vl-a3b-thinking:free": "Kimi VL A3B Thinking",
    "moonshotai/kimi-vl-a3b-thinking": "Kimi VL A3B Thinking",
    "@cf/mistralai/mistral-small-3.1-24b-instruct": "Mistral Small 3.1 24B Instruct",
    "@cf/google/gemma-3-12b-it": "Gemma 3 12B Instruct",
    "@cf/qwen/qwq-32b": "Qwen QwQ 32B",
    "@cf/qwen/qwen2.5-coder-32b-instruct": "Qwen 2.5 Coder 32B Instruct",
    "arliai/qwq-32b-arliai-rpr-v1:free": "QwQ 32B ArliAI RpR v1",
    "agentica-org/deepcoder-14b-preview:free": "DeepCoder 14B Preview",
    "agentica-org/deepcoder-14b-preview": "DeepCoder 14B Preview",
    "arliai/qwq-32b-arliai-rpr-v1": "QwQ 32B ArliAI RpR v1",
    "shisa-ai/shisa-v2-llama3.3-70b:free": "Shisa V2 Llama 3.3 70B",
    "compound-beta-mini": "Groq compound-beta-mini",
    "compound-beta": "Groq compound-beta",
    "shisa-ai/shisa-v2-llama3.3-70b": "Shisa V2 Llama 3.3 70B",
}

HYPERBOLIC_IGNORED_MODELS = {
    "Wifhat",
    "FLUX.1-dev",
    "StableDiffusion",
    "Monad",
    "TTS",
    "deepseek-ai/Janus-Pro-7B",
    "test",
    "SDXL1.0-base",
    # Ignore DeepSeek R1 and R1-Zero because they are not available in the free tier.
    "deepseek-ai/DeepSeek-R1",
    "deepseek-ai/DeepSeek-R1-Zero",
}

LAMBDA_IGNORED_MODELS = {"lfm-40b-vllm", "hermes3-405b-fp8-128k"}

OPENROUTER_IGNORED_MODELS = {
    # Ignore gemini experimental free models because rate limits mean they are unusable.
    "google/gemini-exp-1121:free",
    "google/learnlm-1.5-pro-experimental:free",
    "google/gemini-exp-1114:free",
    "google/gemini-exp-1206:free",
    "google/gemini-2.0-flash-exp:free",
    "google/gemini-2.0-flash-thinking-exp:free",
    "google/gemini-2.0-flash-thinking-exp-1219:free",
    "google/gemini-flash-1.5-exp:free",
    "google/gemini-2.0-pro-exp-02-05:free",
}


================================================
FILE: src/pull_available_models.py
================================================
#!/usr/bin/env python3

from collections import defaultdict
import logging
import json
import requests
import os
from dotenv import load_dotenv
from google.cloud import cloudquotas_v1
from mistralai import Mistral
from concurrent.futures import ThreadPoolExecutor
import time
import re

from data import (
    MODEL_TO_NAME_MAPPING,
    HYPERBOLIC_IGNORED_MODELS,
    LAMBDA_IGNORED_MODELS,
    OPENROUTER_IGNORED_MODELS,
)


load_dotenv()
script_dir = os.path.dirname(os.path.abspath(__file__))

# Global clients
mistral_client = Mistral(api_key=os.environ["MISTRAL_API_KEY"])
last_mistral_request_time = 0


def create_logger(provider_name):
    logger = logging.getLogger(provider_name)
    logger.setLevel(logging.DEBUG)
    handler = logging.StreamHandler()
    formatter = logging.Formatter(f"{provider_name}: %(message)s")
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    return logger


MISSING_MODELS = set()


def get_model_name(id):
    id = id.lower()
    if id in MODEL_TO_NAME_MAPPING:
        return MODEL_TO_NAME_MAPPING[id]
    MISSING_MODELS.add(id)
    return id


def get_groq_limits_for_stt_model(model_id, logger):
    logger.info(f"Getting limits for STT model {model_id}...")
    try:
        r = requests.post(
            "https://api.groq.com/openai/v1/audio/transcriptions",
            headers={
                "Authorization": f'Bearer {os.environ["GROQ_API_KEY"]}',
            },
            data={
                "model": model_id,
            },
            files={
                "file": open(os.path.join(script_dir, "1-second-of-silence.mp3"), "rb"),
            },
        )
    except Exception as e:
        logger.error(f"Failed to get limits for model {model_id}: {e}")
        return {}
    try:
        r.raise_for_status()
    except Exception as e:
        logger.error(f"Failed to get limits for model {model_id}: {e}")
        logger.error(r.text)
        return {}
    audio_seconds_per_minute = int(r.headers["x-ratelimit-limit-audio-seconds"])
    rpd = int(r.headers["x-ratelimit-limit-requests"])
    return {
        "audio-seconds/minute": audio_seconds_per_minute,
        "requests/day": rpd,
    }


def get_groq_limits_for_model(model_id, script_dir, logger):
    if "whisper" in model_id:
        return get_groq_limits_for_stt_model(model_id, logger)
    if "tts" in model_id:
        return None
    logger.info(f"Getting limits for chat model {model_id}...")

    try:
        r = requests.post(
            "https://api.groq.com/openai/v1/chat/completions",
            headers={
                "Authorization": f'Bearer {os.environ["GROQ_API_KEY"]}',
                "Content-Type": "application/json",
            },
            json={
                "model": model_id,
                "messages": [{"role": "user", "content": "Hi!"}],
                "max_tokens": 1,
                "stream": True,
            },
            stream=True,
        )
    except Exception as e:
        logger.error(f"Failed to get limits for model {model_id}: {e}")
        return {}
    try:
        r.raise_for_status()
        rpd = int(r.headers["x-ratelimit-limit-requests"])
        tpm = int(r.headers["x-ratelimit-limit-tokens"])
        return {"requests/day": rpd, "tokens/minute": tpm}
    except Exception as e:
        logger.error(f"Failed to get limits for model {model_id}: {e}")
        logger.error(r.text)
        return {}


def fetch_groq_models(logger):
    logger.info("Fetching Groq models...")
    r = requests.get(
        "https://api.groq.com/openai/v1/models",
        headers={
            "Authorization": f'Bearer {os.environ["GROQ_API_KEY"]}',
            "Content-Type": "application/json",
        },
    )
    r.raise_for_status()
    models = r.json()["data"]
    logger.debug(json.dumps(models, indent=4))
    ret_models = []
    with ThreadPoolExecutor() as executor:
        futures = []
        for model in models:
            future = executor.submit(
                get_groq_limits_for_model, model["id"], script_dir, logger
            )
            futures.append((model, future))

        for model, future in futures:
            limits = future.result()
            if limits is None:
                continue
            ret_models.append(
                {
                    "id": model["id"],
                    "name": get_model_name(model["id"]),
                    "limits": limits,
                }
            )
    ret_models = sorted(ret_models, key=lambda x: x["name"])
    return ret_models


def fetch_kluster_models(logger):
    logger.info("Fetching Kluster models...")
    try:
        r = requests.get(
            "https://api.kluster.ai/v1/models",
            headers={
                "Content-Type": "application/json",
            },
            timeout=10,
        )
        r.raise_for_status()

        # Parse the JSON response
        response = r.json()

        # Based on the paste-2.txt example, the structure should be:
        # {"object":"list","data":[{model1}, {model2}, ...]}
        if isinstance(response, dict) and "data" in response:
            models = response["data"]
        else:
            models = response

        logger.info(f"Fetched {len(models)} models from Kluster")

        ret_models = []
        for model in models:
            # Extract fields from the model object
            model_id = model.get("id")
            model_name = model.get("name", model_id)

            # Skip models without an ID
            if not model_id:
                continue

            ret_models.append(
                {
                    "id": model_id,
                    "name": model_name,  # Use actual name rather than lookup, as these are official names
                }
            )

        logger.debug(json.dumps(ret_models, indent=4))
        ret_models = sorted(ret_models, key=lambda x: x["name"])
        return ret_models

    except requests.exceptions.RequestException as e:
        logger.error(f"Error fetching Kluster models: {e}")
        return []
    except json.JSONDecodeError as e:
        logger.error(f"Error decoding JSON from Kluster API: {e}")
        logger.error(f"Response text: {r.text}")
        return []


def fetch_openrouter_models(logger):
    logger.info("Fetching OpenRouter models...")
    r = requests.get(
        "https://openrouter.ai/api/v1/models",
        headers={
            "Content-Type": "application/json",
        },
    )
    r.raise_for_status()
    models = r.json()["data"]
    logger.info(f"Fetched {len(models)} models from OpenRouter")
    ret_models = []
    for model in models:
        pricing = float(model.get("pricing", {}).get("completion", "1")) + float(
            model.get("pricing", {}).get("prompt", "1")
        )
        if pricing != 0:
            continue
        if ":free" not in model["id"]:
            continue
        if model["id"].lower() in OPENROUTER_IGNORED_MODELS:
            logger.debug(f"Ignoring model {model['id']}")
            continue
        ret_models.append(
            {
                "id": model["id"],
                "name": get_model_name(model["id"]),
                "limits": {
                    "requests/minute": 20,
                    "requests/day": 50,
                },
            }
        )
    ret_models = sorted(ret_models, key=lambda x: x["name"])
    return ret_models


def fetch_cloudflare_models(logger):
    logger.info("Fetching Cloudflare models...")
    r = requests.get(
        f"https://api.cloudflare.com/client/v4/accounts/{os.environ['CLOUDFLARE_ACCOUNT_ID']}/ai/models/search?search=Text+Generation",
        headers={
            "Authorization": f'Bearer {os.environ["CLOUDFLARE_API_KEY"]}',
            "Content-Type": "application/json",
        },
    )
    r.raise_for_status()
    models = r.json()["result"]
    logger.info(f"Fetched {len(models)} models from Cloudflare")
    ret_models = []
    for model in models:
        ret_models.append(
            {
                "id": model["name"],
                "name": get_model_name(model["name"]),
            }
        )
    ret_models = sorted(ret_models, key=lambda x: x["name"])
    return ret_models


def fetch_ovh_models(logger):
    logger.info("Fetching OVH models...")
    r = requests.get(
        "https://endpoints-backend.ai.cloud.ovh.net/rest/v1/models_v2",
        params={"select": "*", "order": "id.desc", "offset": "0", "limit": "100"},
        headers={
            "accept": "*/*",
            "accept-language": "en-GB,en-US;q=0.9,en;q=0.8",
            "accept-profile": "public",
            "apikey": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ewogICJyb2xlIjogImFub24iLAogICJpc3MiOiAic3VwYWJhc2UiLAogICJpYXQiOiAxNzEwNzE2NDAwLAogICJleHAiOiAxODY4NDgyODAwCn0.Jty_eO4oWqLm4Lx_LfbpRW5WESXYXtT2humbBq2Pal8",
            "authorization": "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ewogICJyb2xlIjogImFub24iLAogICJpc3MiOiAic3VwYWJhc2UiLAogICJpYXQiOiAxNzEwNzE2NDAwLAogICJleHAiOiAxODY4NDgyODAwCn0.Jty_eO4oWqLm4Lx_LfbpRW5WESXYXtT2humbBq2Pal8",
            "priority": "u=1, i",
            "sec-ch-ua": '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
            "sec-ch-ua-mobile": "?0",
            "sec-ch-ua-platform": '"macOS"',
            "sec-fetch-dest": "empty",
            "sec-fetch-mode": "cors",
            "sec-fetch-site": "same-site",
            "x-client-info": "supabase-js-web/2.39.7",
        },
    )
    r.raise_for_status()
    models = list(filter(lambda x: x["available"] and "LLM" in x["category"], r.json()))
    logger.info(f"Fetched {len(models)} models from OVH")
    ret_models = []
    for model in models:
        ret_models.append(
            {
                "id": model["name"],
                "name": get_model_name(model["name"]),
                "limits": {
                    "requests/minute": 12,
                },
            }
        )
    ret_models = sorted(ret_models, key=lambda x: x["name"])
    return ret_models


def fetch_hyperbolic_models(logger):
    logger.info("Fetching Hyperbolic models from API...")
    r = requests.get(
        "https://api.hyperbolic.xyz/v1/models",
        headers={
            "accept": "application/json",
            "authorization": f"Bearer {os.environ['HYPERBOLIC_API_KEY']}",
        },
    )
    r.raise_for_status()
    models = r.json()["data"]
    logger.info(f"Fetched {len(models)} models from Hyperbolic's API")
    ret_models = []
    for model in models:
        if model["id"] in HYPERBOLIC_IGNORED_MODELS:
            logger.debug(f"Ignoring model {model['id']}")
            continue
        ret_models.append(
            {
                "id": model["id"],
                "name": get_model_name(model["id"]),
                "limits": {
                    "requests/minute": 60,
                },
            }
        )
    logger.debug(json.dumps(ret_models, indent=4))
    return sorted(ret_models, key=lambda x: x["name"])


def fetch_github_models(logger):
    logger.info("Fetching GitHub models...")
    all_models_data = []
    page = 1
    total_pages = 1  # Initialize with 1 to start the loop

    while page <= total_pages:
        try:
            url = f"https://github.com/marketplace?type=models&page={page}"
            logger.info(f"Fetching from {url}")
            r = requests.get(
                url,
                headers={
                    "Accept": "application/json",
                    "Content-Type": "application/json",
                    "x-requested-with": "XMLHttpRequest",
                },
            )
            r.raise_for_status()
            data = r.json()

            current_page_models = data.get("results", [])
            if not current_page_models:
                logger.info(f"No models found on page {page}. Stopping.")
                break

            all_models_data.extend(current_page_models)

            total_pages = data.get("totalPages", 0)
            logger.info(
                f"Fetched page {page}/{total_pages}. Found {len(current_page_models)} models on this page."
            )

            if page >= total_pages:
                break
            page += 1
            time.sleep(0.5)  # Be respectful to the API

        except requests.exceptions.RequestException as e:
            logger.error(f"Error fetching GitHub models on page {page}: {e}")
            if (
                r.status_code == 404 and page == 1
            ):  # If first page is 404, likely endpoint changed or no models
                logger.error(
                    "Initial request failed (404), assuming no models or endpoint issue."
                )
                return []
            elif (
                r.status_code == 404
            ):  # If a subsequent page is 404, means we've gone past the last page
                logger.info(f"Reached end of pages (404 on page {page}).")
                break
            # For other errors, break or implement retry logic if desired
            break
        except json.JSONDecodeError as e:
            logger.error(
                f"Error decoding JSON from GitHub models API on page {page}: {e}"
            )
            logger.error(f"Response text: {r.text}")
            break

    logger.info(
        f"Fetched a total of {len(all_models_data)} models from GitHub over {page-1 if page > 1 else 1} page(s)."
    )
    ret_models = []
    for model_data in all_models_data:
        # Ensure model_data is a dictionary and has the required keys
        if (
            isinstance(model_data, dict)
            and "name" in model_data
            and "friendly_name" in model_data
        ):
            ret_models.append(
                {
                    "id": model_data[
                        "name"
                    ],  # Using 'name' as id, can be changed if another field is more suitable
                    "name": model_data["friendly_name"],
                }
            )
        else:
            logger.warning(f"Skipping malformed model data: {model_data}")

    ret_models = sorted(ret_models, key=lambda x: x["name"])
    return ret_models


def fetch_gemini_limits(logger):
    logger.info("Fetching Gemini limits...")
    client = cloudquotas_v1.CloudQuotasClient()
    request = cloudquotas_v1.ListQuotaInfosRequest(
        parent=f"projects/{os.environ["GCP_PROJECT_ID"]}/locations/global/services/generativelanguage.googleapis.com"
    )
    pager = client.list_quota_infos(request=request)
    models = defaultdict(dict)
    for quota in pager:
        if (
            quota.metric
            == "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
        ):
            for dimension in quota.dimensions_infos:
                if dimension.details.value == -1:
                    # -1 means unlimited
                    continue
                models[dimension.dimensions.get("model")][
                    f"tokens/{quota.refresh_interval}"
                ] = dimension.details.value
        elif (
            quota.metric
            == "generativelanguage.googleapis.com/generate_content_free_tier_requests"
        ):
            for dimension in quota.dimensions_infos:
                if dimension.details.value == -1:
                    # -1 means unlimited
                    continue
                models[dimension.dimensions.get("model")][
                    f"requests/{quota.refresh_interval}"
                ] = dimension.details.value
    logger.debug(json.dumps(models, indent=4))
    return models


def fetch_lambda_models(logger):
    logger.info("Fetching Lambda Labs models...")
    r = requests.get(
        "https://api.lambdalabs.com/v1/models",
        headers={
            "Authorization": f"Bearer {os.environ['LAMBDA_API_KEY']}",
        },
    )
    r.raise_for_status()
    models = r.json()["data"]
    logger.info(f"Fetched {len(models)} models from Lambda Labs")
    ret_models = []
    for model in models:
        if model["id"] in LAMBDA_IGNORED_MODELS:
            logger.debug(f"Ignoring model {model['id']}")
            continue
        ret_models.append(
            {
                "id": model["id"],
                "name": get_model_name(model["id"]),
            }
        )
    ret_models = sorted(ret_models, key=lambda x: x["name"])
    return ret_models


def rate_limited_mistral_chat(client, **kwargs):
    global last_mistral_request_time

    # Ensure at least 1 second between requests
    current_time = time.time()
    time_since_last = current_time - last_mistral_request_time
    if time_since_last < 1:
        time.sleep(1 - time_since_last)

    response = client.chat.complete(**kwargs)
    last_mistral_request_time = time.time()
    return response


def fetch_samba_models(logger):
    logger.info("Fetching SambaNova models...")
    r = requests.get("https://cloud.sambanova.ai/api/pricing")
    r.raise_for_status()
    models = r.json()["prices"]
    logger.info(f"Fetched {len(models)} models from SambaNova")
    ret_models = []
    for model in models:
        ret_models.append(
            {
                "id": model["model_id"],
                "name": model["model_name"] or model["model_id"],
            }
        )
    ret_models = sorted(ret_models, key=lambda x: x["name"])
    return ret_models


def fetch_scaleway_models(logger):
    logger.info("Fetching Scaleway models...")
    r = requests.get(
        "https://api.scaleway.ai/v1/models",
        headers={"Authorization": f"Bearer {os.environ['SCALEWAY_API_KEY']}"},
    )
    r.raise_for_status()
    models = r.json()["data"]
    logger.info(f"Fetched {len(models)} models from Scaleway")
    ret_models = []
    for model in models:
        ret_models.append(
            {
                "id": model["id"],
                "name": get_model_name(model["id"]),
            }
        )
    ret_models = sorted(ret_models, key=lambda x: x["name"])
    return ret_models


def fetch_cohere_models(logger):
    logger.info("Fetching Cohere models...")
    headers = {
        "accept": "application/json",
        "Authorization": f"Bearer {os.environ['COHERE_API_KEY']}",
    }
    params = {}
    all_models = []
    page = 1

    try:
        while True:
            response = requests.get(
                "https://api.cohere.com/v1/models",
                headers=headers,
                params=params or None,
                timeout=10,
            )
            response.raise_for_status()
            payload = response.json()
            models = payload.get("models", [])
            logger.info(f"Fetched {len(models)} models from Cohere (page {page})")
            all_models.extend(models)
            next_token = payload.get("next_page_token")
            if not next_token:
                break
            params["page_token"] = next_token
            page += 1
    except requests.exceptions.RequestException as exc:
        logger.error(f"Error fetching Cohere models: {exc}")
        return []
    except json.JSONDecodeError as exc:
        logger.error(f"Error decoding Cohere API response: {exc}")
        return []

    ret_models = []
    for model in all_models:
        model_id = model.get("name")
        if not model_id:
            continue
        if model.get("is_deprecated"):
            logger.debug(f"Skipping deprecated Cohere model {model_id}")
            continue
        endpoints = set(model.get("endpoints") or []) | set(
            model.get("default_endpoints") or []
        )
        if "chat" not in endpoints:
            logger.debug(f"Skipping non-chat Cohere model {model_id}")
            continue
        ret_models.append(
            {
                "id": model_id,
                "name": get_model_name(model_id),
            }
        )

    logger.info(f"Found {len(ret_models)} Cohere chat models")
    return sorted(ret_models, key=lambda x: x["name"])


def fetch_chutes_models(logger):
    logger.info("Fetching Chutes models...")
    r = requests.get(
        "https://api.chutes.ai/chutes/?include_public=true&limit=1000",
        headers={
            "Content-Type": "application/json",
        },
    )
    r.raise_for_status()
    models = r.json()["items"]
    logger.info(f"Fetched {len(models)} models from Chutes")

    # Filter for free models based on per_million_token price
    free_models = []
    for model in models:
        price_info = model.get("current_estimated_price", {})
        # Check if per_million_tokens field exists and is set to 0 for USD
        if price_info.get("per_million_tokens", {}).get("usd", 1) == 0:
            model_name = model.get("name", "Unknown model")
            free_models.append(
                {
                    "id": model_name,
                    "name": get_model_name(model_name),
                    "description": model.get("tagline", ""),
                }
            )

    logger.info(f"Found {len(free_models)} free models from Chutes")
    return sorted(free_models, key=lambda x: x["name"])


def get_human_limits(model, seperator="<br>"):
    if "limits" not in model:
        return ""
    limits = model["limits"]
    return seperator.join([f"{value:,} {key}" for key, value in limits.items()])


def generate_toc(markdown):
    toc_lines = []
    # Find all ## and ### headings, but skip the main title (# ...)
    headings = re.findall(r"^(#{2,3}) +(.+)", markdown, re.MULTILINE)
    for hashes, title in headings:
        # Remove markdown links for anchor text, keep display text
        display = re.sub(r"\[(.*?)\]\([^)]*\)", r"\1", title)
        # Build anchor (GitHub style)
        anchor = display.lower()
        anchor = re.sub(r"[^a-z0-9 \-_]", "", anchor)
        anchor = anchor.replace(" ", "-")
        anchor = anchor.replace("--", "-")
        anchor = anchor.strip("-")
        indent = "  " if len(hashes) == 3 else ""
        toc_lines.append(f"{indent}- [{display}](#{anchor})")
    return "\n".join(toc_lines)


def main():
    logger = create_logger("Main")
    groq_logger = create_logger("Groq")
    openrouter_logger = create_logger("OpenRouter")
    google_ai_studio_logger = create_logger("Google AI Studio")
    cloudflare_logger = create_logger("Cloudflare")
    github_logger = create_logger("GitHub")
    hyperbolic_logger = create_logger("Hyperbolic")
    samba_logger = create_logger("SambaNova")
    scaleway_logger = create_logger("Scaleway")
    cohere_logger = create_logger("Cohere")

    fetch_concurrently = os.getenv("FETCH_CONCURRENTLY", "false").lower() == "true"

    if fetch_concurrently:
        with ThreadPoolExecutor() as executor:
            futures = [
                executor.submit(fetch_gemini_limits, google_ai_studio_logger),
                executor.submit(fetch_openrouter_models, openrouter_logger),
                executor.submit(fetch_hyperbolic_models, hyperbolic_logger),
                executor.submit(fetch_cloudflare_models, cloudflare_logger),
                executor.submit(fetch_github_models, github_logger),
                executor.submit(fetch_samba_models, samba_logger),
                executor.submit(fetch_scaleway_models, scaleway_logger),
                executor.submit(fetch_cohere_models, cohere_logger),
            ]
            (
                gemini_models,
                openrouter_models,
                hyperbolic_models,
                cloudflare_models,
                github_models,
                samba_models,
                scaleway_models,
                cohere_models,
            ) = [f.result() for f in futures]

            # Fetch groq models after others complete
            groq_models = fetch_groq_models(groq_logger)
    else:
        gemini_models = fetch_gemini_limits(google_ai_studio_logger)
        openrouter_models = fetch_openrouter_models(openrouter_logger)
        hyperbolic_models = fetch_hyperbolic_models(hyperbolic_logger)
        cloudflare_models = fetch_cloudflare_models(cloudflare_logger)
        github_models = fetch_github_models(github_logger)
        samba_models = fetch_samba_models(samba_logger)
        scaleway_models = fetch_scaleway_models(scaleway_logger)
        cohere_models = fetch_cohere_models(cohere_logger)
        groq_models = fetch_groq_models(groq_logger)

    # Initialize markdown string for free providers
    model_list_markdown = ""

    # --- OpenRouter ---
    model_list_markdown += "### [OpenRouter](https://openrouter.ai)\n\n"
    if openrouter_models:
        provider_limits = get_human_limits(openrouter_models[0])
        model_list_markdown += "**Limits:**\n\n"
        model_list_markdown += f"[{provider_limits}<br>Up to 1000 requests/day with $10 lifetime topup](https://openrouter.ai/docs/api/reference/limits)\n\n"
        model_list_markdown += "Models share a common quota.\n\n"
        for model in openrouter_models:
            model_list_markdown += (
                f"- [{model['name']}](https://openrouter.ai/{model['id']})\n"
            )
    model_list_markdown += "\n"

    # --- Google AI Studio ---
    model_list_markdown += "### [Google AI Studio](https://aistudio.google.com)\n\n"
    model_list_markdown += (
        "Data is used for training when used outside of the UK/CH/EEA/EU.\n\n"
    )
    model_list_markdown += "<table><thead><tr><th>Model Name</th><th>Model Limits</th></tr></thead><tbody>\n"

    gemini_text_models = [
        {
            "id": "gemini-3-flash-preview",
            "name": "Gemini 3 Flash",
            "limits": gemini_models.get("gemini-3-flash", {}),
        },
        {
            "id": "gemini-3.1-flash-lite-preview",
            "name": "Gemini 3.1 Flash-Lite",
            "limits": gemini_models.get("gemini-3.1-flash-lite", {}),
        },
        {
            "id": "gemini-2.5-flash",
            "name": "Gemini 2.5 Flash",
            "limits": gemini_models.get("gemini-2.5-flash", {}),
        },
        {
            "id": "gemini-2.5-flash-lite",
            "name": "Gemini 2.5 Flash-Lite",
            "limits": gemini_models.get("gemini-2.5-flash-lite", {}),
        },
        {
            "id": "gemma-3-27b-it",
            "name": "Gemma 3 27B Instruct",
            "limits": gemini_models.get("gemma-3-27b", {}),
        },
        {
            "id": "gemma-3-12b-it",
            "name": "Gemma 3 12B Instruct",
            "limits": gemini_models.get("gemma-3-12b", {}),
        },
        {
            "id": "gemma-3-4b-it",
            "name": "Gemma 3 4B Instruct",
            "limits": gemini_models.get("gemma-3-4b", {}),
        },
        {
            "id": "gemma-3-1b-it",
            "name": "Gemma 3 1B Instruct",
            "limits": gemini_models.get("gemma-3-1b", {}),
        },
    ]

    # Write text models to table
    for model in gemini_text_models:
        limits_str = get_human_limits(model)
        model_list_markdown += (
            f"<tr><td>{model['name']}</td><td>{limits_str}</td></tr>\n"
        )

    model_list_markdown += "</tbody></table>\n\n"

    # --- NVIDIA NIM ---
    model_list_markdown += (
        "### [NVIDIA NIM](https://build.nvidia.com/explore/discover)\n\n"
    )
    model_list_markdown += "Phone number verification required.\n"
    model_list_markdown += "Models tend to be context window limited.\n\n"
    model_list_markdown += "**Limits:** 40 requests/minute\n\n"
    model_list_markdown += "- [Various open models](https://build.nvidia.com/models)\n"
    model_list_markdown += "\n"

    # --- Mistral (La Plateforme) ---
    model_list_markdown += (
        "### [Mistral (La Plateforme)](https://console.mistral.ai/)\n\n"
    )
    model_list_markdown += (
        "* Free tier (Experiment plan) requires opting into data training\n"
    )
    model_list_markdown += "* Requires phone number verification.\n\n"
    model_list_markdown += "**Limits (per-model):** 1 request/second, 500,000 tokens/minute, 1,000,000,000 tokens/month\n\n"
    model_list_markdown += "- [Open and Proprietary Mistral models](https://docs.mistral.ai/getting-started/models/models_overview/)\n"
    model_list_markdown += "\n"

    # --- Mistral (Codestral) ---
    model_list_markdown += (
        "### [Mistral (Codestral)](https://codestral.mistral.ai/)\n\n"
    )
    model_list_markdown += "* Currently free to use\n"
    model_list_markdown += "* Monthly subscription based\n"
    model_list_markdown += "* Requires phone number verification\n\n"
    model_list_markdown += "**Limits:** 30 requests/minute, 2,000 requests/day\n\n"
    model_list_markdown += "- Codestral\n"
    model_list_markdown += "\n"

    # --- HuggingFace Serverless Inference ---
    model_list_markdown += "### [HuggingFace Inference Providers](https://huggingface.co/docs/inference-providers/en/index)\n\n"
    model_list_markdown += "HuggingFace Serverless Inference limited to models smaller than 10GB. Some popular models are supported even if they exceed 10GB.\n\n"
    model_list_markdown += "**Limits:** [$0.10/month in credits](https://huggingface.co/docs/inference-providers/en/pricing)\n\n"
    model_list_markdown += "- Various open models across supported providers\n"
    model_list_markdown += "\n"

    # --- Vercel AI Gateway ---
    model_list_markdown += "### [Vercel AI Gateway](https://vercel.com/docs/ai-gateway)\n\n"
    model_list_markdown += "Routes to various supported providers.\n\n"
    model_list_markdown += "**Limits:** [$5/month](https://vercel.com/docs/ai-gateway/pricing)\n\n"
    model_list_markdown += "\n"

    # --- OpenCode Zen ---
    model_list_markdown += "### [OpenCode Zen](https://opencode.ai/docs/zen/)\n\n"
    model_list_markdown += "AI gateway with curated models.\n\n"
    model_list_markdown += "Free models may use data for improvement.\n\n"
    model_list_markdown += "- Big Pickle Stealth\n"
    model_list_markdown += "- MiniMax M2.5 Free\n"
    model_list_markdown += "- Arcee Large Preview Free\n"
    model_list_markdown += "\n"

    # --- Cerebras ---
    model_list_markdown += "### [Cerebras](https://cloud.cerebras.ai/)\n\n"
    model_list_markdown += "<table><thead><tr><th>Model Name</th><th>Model Limits</th></tr></thead><tbody>\n"
    cerebras_models = [
        {
            "name": "gpt-oss-120b",
            "limits_text": "30 requests/minute<br>60,000 tokens/minute<br>900 requests/hour<br>1,000,000 tokens/hour<br>14,400 requests/day<br>1,000,000 tokens/day"
        },
        {
            "name": "Llama 3.1 8B",
            "limits_text": "30 requests/minute<br>60,000 tokens/minute<br>900 requests/hour<br>1,000,000 tokens/hour<br>14,400 requests/day<br>1,000,000 tokens/day"
        },
    ]
    for model in cerebras_models:
        model_list_markdown += (
            f"<tr><td>{model['name']}</td><td>{model['limits_text']}</td></tr>\n"
        )
    model_list_markdown += "</tbody></table>\n\n"

    # --- Groq ---
    model_list_markdown += "### [Groq](https://console.groq.com)\n\n"
    if groq_models:
        model_list_markdown += "<table><thead><tr><th>Model Name</th><th>Model Limits</th></tr></thead><tbody>\n"
        for model in groq_models:
            limits_str = get_human_limits(model)
            model_list_markdown += (
                f"<tr><td>{model['name']}</td><td>{limits_str}</td></tr>\n"
            )
        model_list_markdown += "</tbody></table>\n"
    model_list_markdown += "\n"

    # --- Cohere ---
    model_list_markdown += "### [Cohere](https://cohere.com)\n\n"
    model_list_markdown += "**Limits:**\n\n"
    model_list_markdown += "[20 requests/minute<br>1,000 requests/month](https://docs.cohere.com/docs/rate-limits)\n\n"
    model_list_markdown += "Models share a common monthly quota.\n\n"
    if cohere_models:
        for model in cohere_models:
            model_list_markdown += f"- {model['name']}\n"
    else:
        model_list_markdown += "- No chat models available right now.\n"
    model_list_markdown += "\n"

    # --- GitHub Models ---
    model_list_markdown += (
        "### [GitHub Models](https://github.com/marketplace/models)\n\n"
    )
    model_list_markdown += "Extremely restrictive input/output token limits.\n\n"
    model_list_markdown += "**Limits:** [Dependent on Copilot subscription tier (Free/Pro/Pro+/Business/Enterprise)](https://docs.github.com/en/github-models/prototyping-with-ai-models#rate-limits)\n\n"
    if github_models:
        for model in github_models:
            model_list_markdown += f"- {model['name']}\n"
    model_list_markdown += "\n"

    # --- Cloudflare Workers AI ---
    model_list_markdown += (
        "### [Cloudflare Workers AI](https://developers.cloudflare.com/workers-ai)\n\n"
    )
    model_list_markdown += "**Limits:** [10,000 neurons/day](https://developers.cloudflare.com/workers-ai/platform/pricing/#free-allocation)\n\n"
    if cloudflare_models:
        for model in cloudflare_models:
            model_list_markdown += f"- {model['name']}\n"
    model_list_markdown += "\n"

    # --- Google Cloud Vertex AI ---
    vertex_llama_models = []
    vertex_gemini_models = []
    vertex_deepseek_models = []
    if vertex_llama_models or vertex_gemini_models or vertex_deepseek_models:
        model_list_markdown += "### [Google Cloud Vertex AI](https://console.cloud.google.com/vertex-ai/model-garden)\n\n"
        model_list_markdown += "Very stringent payment verification for Google Cloud.\n\n"
        model_list_markdown += "<table><thead><tr><th>Model Name</th><th>Model Limits</th></tr></thead><tbody>\n"

    # Write Gemini models to table
    first_gemini = True
    if vertex_gemini_models:
        for model in vertex_gemini_models:
            limits_str = get_human_limits(model)
            model_list_markdown += f'<tr><td><a href="https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/gemini-experimental" target="_blank">{model['name']}</a></td>'
            if first_gemini:
                model_list_markdown += f'<td rowspan="{len(vertex_gemini_models)}">{limits_str}<br>Shared Quota</td>'
                first_gemini = False
            model_list_markdown += "</tr>\n"

    # Write Llama models to table
    if vertex_llama_models:
        for model in vertex_llama_models:
            limits_str = get_human_limits(model)
            model_list_markdown += f'<tr><td><a href="https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/{model['urlId']}" target="_blank">{model['name']}</a></td><td>{limits_str}<br>Free during preview</td></tr>\n'

    # Write DeepSeek models to table
    if vertex_deepseek_models:
        for model in vertex_deepseek_models:
            limits_str = get_human_limits(model)
            model_list_markdown += f'<tr><td><a href="https://console.cloud.google.com/vertex-ai/publishers/deepseek-ai/model-garden/{model['urlId']}" target="_blank">{model['name']}</a></td><td>{limits_str}<br>Free during preview</td></tr>\n'

    model_list_markdown += "</tbody></table>\n\n"

    # --- Trial Providers Section Generation ---
    trial_list_markdown = ""

    # --- Static Trial Providers (Markdown List/Simple Entry) ---
    trial_providers_static = [
        {
            "name": "Fireworks",
            "url": "https://fireworks.ai/",
            "credits": "$1",
            "requirements": "",
            "models_desc": "[Various open models](https://fireworks.ai/models)",
        },
        {
            "name": "Baseten",
            "url": "https://app.baseten.co/",
            "credits": "$30",
            "requirements": "",
            "models_desc": "[Any supported model - pay by compute time](https://www.baseten.co/library/)",
        },
        {
            "name": "Nebius",
            "url": "https://tokenfactory.nebius.com/",
            "credits": "$1",
            "requirements": "",
            "models_desc": "[Various open models](https://tokenfactory.nebius.com/models)",
        },
        {
            "name": "Novita",
            "url": "https://novita.ai/?ref=ytblmjc&utm_source=affiliate",
            "credits": "$0.5 for 1 year",
            "requirements": "",
            "models_desc": "[Various open models](https://novita.ai/models)",
        },
        {
            "name": "AI21",
            "url": "https://studio.ai21.com/",
            "credits": "$10 for 3 months",
            "requirements": "",
            "models_desc": "Jamba family of models",
        },
        {
            "name": "Upstage",
            "url": "https://console.upstage.ai/",
            "credits": "$10 for 3 months",
            "requirements": "",
            "models_desc": "Solar Pro/Mini",
        },
        {
            "name": "NLP Cloud",
            "url": "https://nlpcloud.com/home",
            "credits": "$15",
            "requirements": "Phone number verification",
            "models_desc": "Various open models",
        },
        {
            "name": "Alibaba Cloud (International) Model Studio",
            "url": "https://bailian.console.alibabacloud.com/",
            "credits": "1 million tokens/model",
            "requirements": "",
            "models_desc": "[Various open and proprietary Qwen models](https://www.alibabacloud.com/en/product/modelstudio)",
        },
        {
            "name": "Modal",
            "url": "https://modal.com",
            "credits": "$5/month upon sign up, $30/month with payment method added",
            "requirements": "",
            "models_desc": "Any supported model - pay by compute time",
        },
        {
            "name": "Inference.net",
            "url": "https://inference.net",
            "credits": "$1, $25 on responding to email survey",
            "requirements": "",
            "models_desc": "Various open models",
        },
    ]

    for provider in trial_providers_static:
        trial_list_markdown += f"### [{provider['name']}]({provider['url']})\n\n"
        trial_list_markdown += f"**Credits:** {provider['credits']}\n\n"
        if provider["requirements"]:
            trial_list_markdown += f"**Requirements:** {provider['requirements']}\n\n"
        trial_list_markdown += f"**Models:** {provider['models_desc']}\n\n"

    # --- Hyperbolic (Trial - Table) ---
    if hyperbolic_models:
        trial_list_markdown += "### [Hyperbolic](https://app.hyperbolic.ai/)\n\n"
        trial_list_markdown += "**Credits:** $1\n\n"
        trial_list_markdown += "**Models:**\n"
        for model in hyperbolic_models:
            trial_list_markdown += f"- {model['name']}\n"
        trial_list_markdown += "\n"

    # --- SambaNova Cloud (Trial - Table) ---
    if samba_models:
        trial_list_markdown += "### [SambaNova Cloud](https://cloud.sambanova.ai/)\n\n"
        trial_list_markdown += "**Credits:** $5 for 3 months\n\n"
        trial_list_markdown += "**Models:**\n"
        for model in samba_models:
            trial_list_markdown += f"- {model['name']}\n"   
        trial_list_markdown += "\n"

    # --- Scaleway Generative APIs (Trial - Table) ---
    if scaleway_models:
        trial_list_markdown += "### [Scaleway Generative APIs](https://console.scaleway.com/generative-api/models)\n\n"
        trial_list_markdown += "**Credits:** 1,000,000 free tokens\n\n"
        trial_list_markdown += "**Models:**\n"
        for model in scaleway_models:
            trial_list_markdown += f"- {model['name']}\n"
        trial_list_markdown += "\n"

    if MISSING_MODELS:
        logger.warning("Missing models:")
        logger.warning(
            "\n" + "\n".join([f'"{model}": "{model}",' for model in MISSING_MODELS])
        )

    with open(os.path.join(script_dir, "README_template.md"), "r") as f:
        readme = f.read()
    warning = """<!---
WARNING: DO NOT EDIT THIS FILE DIRECTLY. IT IS GENERATED BY src/pull_available_models.py
--->
"""
    initial_templated = (
        (warning + readme)
        .replace("{{MODEL_LIST}}", model_list_markdown)
        .replace("{{TRIAL_LIST_MARKDOWN}}", trial_list_markdown)
    )
    toc_markdown = generate_toc(initial_templated)
    with open(os.path.join(script_dir, "..", "README.md"), "w") as f:
        f.write(initial_templated.replace("{{TOC}}", toc_markdown))
    logger.info("Wrote models to README.md")


if __name__ == "__main__":
    main()


================================================
FILE: src/requirements.txt
================================================
requests
python-dotenv
google-cloud-quotas
mistralai
beautifulsoup4