Repository: cheahjs/free-llm-api-resources Branch: main Commit: c3ac91133bf8 Files: 9 Total size: 77.0 KB Directory structure: gitextract_rnr7ukix/ ├── .github/ │ ├── pull_request_template.md │ └── workflows/ │ ├── readme-change-validator.yml │ └── update-readme.yml ├── .gitignore ├── README.md └── src/ ├── README_template.md ├── data.py ├── pull_available_models.py └── requirements.txt ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/pull_request_template.md ================================================ ================================================ FILE: .github/workflows/readme-change-validator.yml ================================================ name: README Change Validator on: pull_request: paths: - 'README.md' - 'src/README_template.md' - 'src/pull_available_models.py' permissions: issues: write jobs: check-readme-changes: if: github.actor != 'github-actions[bot]' runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v6 with: fetch-depth: 0 # Fetch all history to compare changes - name: Get changed files id: changed-files run: | README_CHANGED=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep -c "README.md" || true) TEMPLATE_CHANGED=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep -c "src/README_template.md" || true) SCRIPT_CHANGED=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | grep -c "src/pull_available_models.py" || true) echo "readme_changed=$README_CHANGED" >> $GITHUB_OUTPUT echo "template_changed=$TEMPLATE_CHANGED" >> $GITHUB_OUTPUT echo "script_changed=$SCRIPT_CHANGED" >> $GITHUB_OUTPUT - name: Validate README changes run: | if [ "${{ steps.changed-files.outputs.readme_changed }}" -gt 0 ] && \ [ "${{ steps.changed-files.outputs.template_changed }}" -eq 0 ] && \ [ "${{ steps.changed-files.outputs.script_changed }}" -eq 0 ]; then echo "Error: README.md was modified without corresponding changes in src/README_template.md or src/pull_available_models.py" echo "Please update the template or script instead of modifying README.md directly." exit 1 fi - name: Add PR comment if check fails if: failure() uses: actions/github-script@v8 with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | github.rest.issues.createComment({ owner: context.repo.owner, repo: context.repo.repo, issue_number: context.issue.number, body: '❌ README.md was modified without corresponding changes in `src/README_template.md` or `src/pull_available_models.py`\n\nPlease update the template or script instead of modifying README.md directly.' }) ================================================ FILE: .github/workflows/update-readme.yml ================================================ name: Update README on: schedule: - cron: "0 0 * * *" workflow_dispatch: jobs: update-readme: runs-on: ubuntu-latest permissions: contents: write pull-requests: write id-token: write steps: - name: Checkout repository uses: actions/checkout@v6 - name: Setup Python uses: actions/setup-python@v6 with: python-version: "3.12" - name: Install dependencies run: | pip install -r src/requirements.txt - id: "auth" uses: "google-github-actions/auth@v3" with: workload_identity_provider: "projects/576328904266/locations/global/workloadIdentityPools/github/providers/cheahjs-org" project_id: ${{ secrets.GCP_PROJECT }} - name: Run script env: GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} CLOUDFLARE_API_KEY: ${{ secrets.CLOUDFLARE_API_KEY }} HYPERBOLIC_API_KEY: ${{ secrets.HYPERBOLIC_API_KEY }} GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }} GOOGLE_APPLICATION_CREDENTIALS: ${{ steps.auth.outputs.credentials_file_path }} LAMBDA_API_KEY: ${{ secrets.LAMBDA_API_KEY }} MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} SCALEWAY_API_KEY: ${{ secrets.SCALEWAY_API_KEY }} COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} run: | python -u src/pull_available_models.py - name: Remove credentials run: | rm ${{ steps.auth.outputs.credentials_file_path }} - name: Create Pull Request uses: peter-evans/create-pull-request@v8 with: title: "Update README with latest models" body: "This PR updates the README with the latest available models." branch: update-readme base: main commit-message: "Update README with latest models" ================================================ FILE: .gitignore ================================================ venv .venv .env .aider* *.json *.pyc ================================================ FILE: README.md ================================================ # Free LLM API resources This lists various services that provide free access or credits towards API-based LLM usage. > [!NOTE] > Please don't abuse these services, else we might lose them. > [!WARNING] > This list explicitly excludes any services that are not legitimate (eg reverse engineers an existing chatbot) - [Free Providers](#free-providers) - [OpenRouter](#openrouter) - [Google AI Studio](#google-ai-studio) - [NVIDIA NIM](#nvidia-nim) - [Mistral (La Plateforme)](#mistral-la-plateforme) - [Mistral (Codestral)](#mistral-codestral) - [HuggingFace Inference Providers](#huggingface-inference-providers) - [Vercel AI Gateway](#vercel-ai-gateway) - [OpenCode Zen](#opencode-zen) - [Cerebras](#cerebras) - [Groq](#groq) - [Cohere](#cohere) - [GitHub Models](#github-models) - [Cloudflare Workers AI](#cloudflare-workers-ai) - [Providers with trial credits](#providers-with-trial-credits) - [Fireworks](#fireworks) - [Baseten](#baseten) - [Nebius](#nebius) - [Novita](#novita) - [AI21](#ai21) - [Upstage](#upstage) - [NLP Cloud](#nlp-cloud) - [Alibaba Cloud (International) Model Studio](#alibaba-cloud-international-model-studio) - [Modal](#modal) - [Inference.net](#inferencenet) - [Hyperbolic](#hyperbolic) - [SambaNova Cloud](#sambanova-cloud) - [Scaleway Generative APIs](#scaleway-generative-apis) ## Free Providers ### [OpenRouter](https://openrouter.ai) **Limits:** [20 requests/minute
50 requests/day
Up to 1000 requests/day with $10 lifetime topup](https://openrouter.ai/docs/api/reference/limits) Models share a common quota. - [Gemma 3 12B Instruct](https://openrouter.ai/google/gemma-3-12b-it:free) - [Gemma 3 27B Instruct](https://openrouter.ai/google/gemma-3-27b-it:free) - [Gemma 3 4B Instruct](https://openrouter.ai/google/gemma-3-4b-it:free) - [Hermes 3 Llama 3.1 405B](https://openrouter.ai/nousresearch/hermes-3-llama-3.1-405b:free) - [Llama 3.2 3B Instruct](https://openrouter.ai/meta-llama/llama-3.2-3b-instruct:free) - [Llama 3.3 70B Instruct](https://openrouter.ai/meta-llama/llama-3.3-70b-instruct:free) - [Mistral Small 3.1 24B Instruct](https://openrouter.ai/mistralai/mistral-small-3.1-24b-instruct:free) - [arcee-ai/trinity-large-preview:free](https://openrouter.ai/arcee-ai/trinity-large-preview:free) - [arcee-ai/trinity-mini:free](https://openrouter.ai/arcee-ai/trinity-mini:free) - [cognitivecomputations/dolphin-mistral-24b-venice-edition:free](https://openrouter.ai/cognitivecomputations/dolphin-mistral-24b-venice-edition:free) - [google/gemma-3n-e2b-it:free](https://openrouter.ai/google/gemma-3n-e2b-it:free) - [google/gemma-3n-e4b-it:free](https://openrouter.ai/google/gemma-3n-e4b-it:free) - [liquid/lfm-2.5-1.2b-instruct:free](https://openrouter.ai/liquid/lfm-2.5-1.2b-instruct:free) - [liquid/lfm-2.5-1.2b-thinking:free](https://openrouter.ai/liquid/lfm-2.5-1.2b-thinking:free) - [nvidia/nemotron-3-nano-30b-a3b:free](https://openrouter.ai/nvidia/nemotron-3-nano-30b-a3b:free) - [nvidia/nemotron-nano-12b-v2-vl:free](https://openrouter.ai/nvidia/nemotron-nano-12b-v2-vl:free) - [nvidia/nemotron-nano-9b-v2:free](https://openrouter.ai/nvidia/nemotron-nano-9b-v2:free) - [openai/gpt-oss-120b:free](https://openrouter.ai/openai/gpt-oss-120b:free) - [openai/gpt-oss-20b:free](https://openrouter.ai/openai/gpt-oss-20b:free) - [qwen/qwen3-4b:free](https://openrouter.ai/qwen/qwen3-4b:free) - [qwen/qwen3-coder:free](https://openrouter.ai/qwen/qwen3-coder:free) - [qwen/qwen3-next-80b-a3b-instruct:free](https://openrouter.ai/qwen/qwen3-next-80b-a3b-instruct:free) - [stepfun/step-3.5-flash:free](https://openrouter.ai/stepfun/step-3.5-flash:free) - [z-ai/glm-4.5-air:free](https://openrouter.ai/z-ai/glm-4.5-air:free) ### [Google AI Studio](https://aistudio.google.com) Data is used for training when used outside of the UK/CH/EEA/EU.
Model NameModel Limits
Gemini 3 Flash250,000 tokens/minute
20 requests/day
5 requests/minute
Gemini 3.1 Flash-Lite250,000 tokens/minute
500 requests/day
15 requests/minute
Gemini 2.5 Flash250,000 tokens/minute
20 requests/day
5 requests/minute
Gemini 2.5 Flash-Lite250,000 tokens/minute
20 requests/day
10 requests/minute
Gemma 3 27B Instruct15,000 tokens/minute
14,400 requests/day
30 requests/minute
Gemma 3 12B Instruct15,000 tokens/minute
14,400 requests/day
30 requests/minute
Gemma 3 4B Instruct15,000 tokens/minute
14,400 requests/day
30 requests/minute
Gemma 3 1B Instruct15,000 tokens/minute
14,400 requests/day
30 requests/minute
### [NVIDIA NIM](https://build.nvidia.com/explore/discover) Phone number verification required. Models tend to be context window limited. **Limits:** 40 requests/minute - [Various open models](https://build.nvidia.com/models) ### [Mistral (La Plateforme)](https://console.mistral.ai/) * Free tier (Experiment plan) requires opting into data training * Requires phone number verification. **Limits (per-model):** 1 request/second, 500,000 tokens/minute, 1,000,000,000 tokens/month - [Open and Proprietary Mistral models](https://docs.mistral.ai/getting-started/models/models_overview/) ### [Mistral (Codestral)](https://codestral.mistral.ai/) * Currently free to use * Monthly subscription based * Requires phone number verification **Limits:** 30 requests/minute, 2,000 requests/day - Codestral ### [HuggingFace Inference Providers](https://huggingface.co/docs/inference-providers/en/index) HuggingFace Serverless Inference limited to models smaller than 10GB. Some popular models are supported even if they exceed 10GB. **Limits:** [$0.10/month in credits](https://huggingface.co/docs/inference-providers/en/pricing) - Various open models across supported providers ### [Vercel AI Gateway](https://vercel.com/docs/ai-gateway) Routes to various supported providers. **Limits:** [$5/month](https://vercel.com/docs/ai-gateway/pricing) ### [OpenCode Zen](https://opencode.ai/docs/zen/) AI gateway with curated models. Free models may use data for improvement. - Big Pickle Stealth - MiniMax M2.5 Free - Arcee Large Preview Free ### [Cerebras](https://cloud.cerebras.ai/)
Model NameModel Limits
gpt-oss-120b30 requests/minute
60,000 tokens/minute
900 requests/hour
1,000,000 tokens/hour
14,400 requests/day
1,000,000 tokens/day
Llama 3.1 8B30 requests/minute
60,000 tokens/minute
900 requests/hour
1,000,000 tokens/hour
14,400 requests/day
1,000,000 tokens/day
### [Groq](https://console.groq.com)
Model NameModel Limits
Allam 2 7B7,000 requests/day
6,000 tokens/minute
Llama 3.1 8B14,400 requests/day
6,000 tokens/minute
Llama 3.3 70B1,000 requests/day
12,000 tokens/minute
Llama 4 Maverick 17B 128E Instruct1,000 requests/day
6,000 tokens/minute
Llama 4 Scout Instruct1,000 requests/day
30,000 tokens/minute
Whisper Large v37,200 audio-seconds/minute
2,000 requests/day
Whisper Large v3 Turbo7,200 audio-seconds/minute
2,000 requests/day
canopylabs/orpheus-arabic-saudi
canopylabs/orpheus-v1-english
groq/compound250 requests/day
70,000 tokens/minute
groq/compound-mini250 requests/day
70,000 tokens/minute
meta-llama/llama-guard-4-12b14,400 requests/day
15,000 tokens/minute
meta-llama/llama-prompt-guard-2-22m
meta-llama/llama-prompt-guard-2-86m
moonshotai/kimi-k2-instruct1,000 requests/day
10,000 tokens/minute
moonshotai/kimi-k2-instruct-09051,000 requests/day
10,000 tokens/minute
openai/gpt-oss-120b1,000 requests/day
8,000 tokens/minute
openai/gpt-oss-20b1,000 requests/day
8,000 tokens/minute
openai/gpt-oss-safeguard-20b1,000 requests/day
8,000 tokens/minute
qwen/qwen3-32b1,000 requests/day
6,000 tokens/minute
### [Cohere](https://cohere.com) **Limits:** [20 requests/minute
1,000 requests/month](https://docs.cohere.com/docs/rate-limits) Models share a common monthly quota. - c4ai-aya-expanse-32b - c4ai-aya-vision-32b - command-a-03-2025 - command-a-reasoning-08-2025 - command-a-translate-08-2025 - command-a-vision-07-2025 - command-r-08-2024 - command-r-plus-08-2024 - command-r7b-12-2024 - command-r7b-arabic-02-2025 - tiny-aya-earth - tiny-aya-fire - tiny-aya-global - tiny-aya-water ### [GitHub Models](https://github.com/marketplace/models) Extremely restrictive input/output token limits. **Limits:** [Dependent on Copilot subscription tier (Free/Pro/Pro+/Business/Enterprise)](https://docs.github.com/en/github-models/prototyping-with-ai-models#rate-limits) - AI21 Jamba 1.5 Large - Codestral 25.01 - Cohere Command A - Cohere Command R 08-2024 - Cohere Command R+ 08-2024 - DeepSeek-R1 - DeepSeek-R1-0528 - DeepSeek-V3-0324 - Grok 3 - Grok 3 Mini - Llama 4 Maverick 17B 128E Instruct FP8 - Llama 4 Scout 17B 16E Instruct - Llama-3.2-11B-Vision-Instruct - Llama-3.2-90B-Vision-Instruct - Llama-3.3-70B-Instruct - MAI-DS-R1 - Meta-Llama-3.1-405B-Instruct - Meta-Llama-3.1-8B-Instruct - Ministral 3B - Mistral Medium 3 (25.05) - Mistral Small 3.1 - OpenAI GPT-4.1 - OpenAI GPT-4.1-mini - OpenAI GPT-4.1-nano - OpenAI GPT-4o - OpenAI GPT-4o mini - OpenAI Text Embedding 3 (large) - OpenAI Text Embedding 3 (small) - OpenAI gpt-5 - OpenAI gpt-5-chat (preview) - OpenAI gpt-5-mini - OpenAI gpt-5-nano - OpenAI o1 - OpenAI o1-mini - OpenAI o1-preview - OpenAI o3 - OpenAI o3-mini - OpenAI o4-mini - Phi-4 - Phi-4-mini-instruct - Phi-4-mini-reasoning - Phi-4-multimodal-instruct - Phi-4-reasoning ### [Cloudflare Workers AI](https://developers.cloudflare.com/workers-ai) **Limits:** [10,000 neurons/day](https://developers.cloudflare.com/workers-ai/platform/pricing/#free-allocation) - @cf/aisingapore/gemma-sea-lion-v4-27b-it - @cf/ibm-granite/granite-4.0-h-micro - @cf/openai/gpt-oss-120b - @cf/openai/gpt-oss-20b - @cf/qwen/qwen3-30b-a3b-fp8 - @cf/zai-org/glm-4.7-flash - DeepSeek R1 Distill Qwen 32B - Deepseek Coder 6.7B Base (AWQ) - Deepseek Coder 6.7B Instruct (AWQ) - Deepseek Math 7B Instruct - Discolm German 7B v1 (AWQ) - Falcom 7B Instruct - Gemma 2B Instruct (LoRA) - Gemma 3 12B Instruct - Gemma 7B Instruct - Gemma 7B Instruct (LoRA) - Hermes 2 Pro Mistral 7B - Llama 2 13B Chat (AWQ) - Llama 2 7B Chat (FP16) - Llama 2 7B Chat (INT8) - Llama 2 7B Chat (LoRA) - Llama 3 8B Instruct - Llama 3 8B Instruct (AWQ) - Llama 3.1 8B Instruct (AWQ) - Llama 3.1 8B Instruct (FP8) - Llama 3.2 11B Vision Instruct - Llama 3.2 1B Instruct - Llama 3.2 3B Instruct - Llama 3.3 70B Instruct (FP8) - Llama 4 Scout Instruct - Llama Guard 3 8B - Mistral 7B Instruct v0.1 - Mistral 7B Instruct v0.1 (AWQ) - Mistral 7B Instruct v0.2 - Mistral 7B Instruct v0.2 (LoRA) - Mistral Small 3.1 24B Instruct - Neural Chat 7B v3.1 (AWQ) - OpenChat 3.5 0106 - OpenHermes 2.5 Mistral 7B (AWQ) - Phi-2 - Qwen 1.5 0.5B Chat - Qwen 1.5 1.8B Chat - Qwen 1.5 14B Chat (AWQ) - Qwen 1.5 7B Chat (AWQ) - Qwen 2.5 Coder 32B Instruct - Qwen QwQ 32B - SQLCoder 7B 2 - Starling LM 7B Beta - TinyLlama 1.1B Chat v1.0 - Una Cybertron 7B v2 (BF16) - Zephyr 7B Beta (AWQ) ## Providers with trial credits ### [Fireworks](https://fireworks.ai/) **Credits:** $1 **Models:** [Various open models](https://fireworks.ai/models) ### [Baseten](https://app.baseten.co/) **Credits:** $30 **Models:** [Any supported model - pay by compute time](https://www.baseten.co/library/) ### [Nebius](https://tokenfactory.nebius.com/) **Credits:** $1 **Models:** [Various open models](https://tokenfactory.nebius.com/models) ### [Novita](https://novita.ai/?ref=ytblmjc&utm_source=affiliate) **Credits:** $0.5 for 1 year **Models:** [Various open models](https://novita.ai/models) ### [AI21](https://studio.ai21.com/) **Credits:** $10 for 3 months **Models:** Jamba family of models ### [Upstage](https://console.upstage.ai/) **Credits:** $10 for 3 months **Models:** Solar Pro/Mini ### [NLP Cloud](https://nlpcloud.com/home) **Credits:** $15 **Requirements:** Phone number verification **Models:** Various open models ### [Alibaba Cloud (International) Model Studio](https://bailian.console.alibabacloud.com/) **Credits:** 1 million tokens/model **Models:** [Various open and proprietary Qwen models](https://www.alibabacloud.com/en/product/modelstudio) ### [Modal](https://modal.com) **Credits:** $5/month upon sign up, $30/month with payment method added **Models:** Any supported model - pay by compute time ### [Inference.net](https://inference.net) **Credits:** $1, $25 on responding to email survey **Models:** Various open models ### [Hyperbolic](https://app.hyperbolic.ai/) **Credits:** $1 **Models:** - DeepSeek V3 - DeepSeek V3 0324 - Llama 3.1 405B Base - Llama 3.1 405B Instruct - Llama 3.1 8B Instruct - Llama 3.2 3B Instruct - Llama 3.3 70B Instruct - Pixtral 12B (2409) - Qwen QwQ 32B - Qwen2.5 72B Instruct - Qwen2.5 Coder 32B Instruct - Qwen2.5 VL 72B Instruct - Qwen2.5 VL 7B Instruct - deepseek-ai/deepseek-r1-0528 - openai/gpt-oss-120b - openai/gpt-oss-120b-turbo - openai/gpt-oss-20b - qwen/qwen3-235b-a22b - qwen/qwen3-235b-a22b-instruct-2507 - qwen/qwen3-coder-480b-a35b-instruct - qwen/qwen3-next-80b-a3b-instruct - qwen/qwen3-next-80b-a3b-thinking ### [SambaNova Cloud](https://cloud.sambanova.ai/) **Credits:** $5 for 3 months **Models:** - E5-Mistral-7B-Instruct - Llama 3.1 8B - Llama 3.3 70B - Llama 3.3 70B - Llama-4-Maverick-17B-128E-Instruct - Qwen/Qwen3-235B - Qwen/Qwen3-32B - Whisper-Large-v3 - deepseek-ai/DeepSeek-R1-0528 - deepseek-ai/DeepSeek-R1-Distill-Llama-70B - deepseek-ai/DeepSeek-V3-0324 - deepseek-ai/DeepSeek-V3.1 - deepseek-ai/DeepSeek-V3.1-Terminus - deepseek-ai/DeepSeek-V3.2 - minimaxai/minimax-m2.5 - openai/gpt-oss-120b - tbd ### [Scaleway Generative APIs](https://console.scaleway.com/generative-api/models) **Credits:** 1,000,000 free tokens **Models:** - BGE-Multilingual-Gemma2 - DeepSeek R1 Distill Llama 70B - Gemma 3 27B Instruct - Llama 3.1 8B Instruct - Llama 3.3 70B Instruct - Mistral Nemo 2407 - Pixtral 12B (2409) - Whisper Large v3 - devstral-2-123b-instruct-2512 - gpt-oss-120b - holo2-30b-a3b - mistral-small-3.2-24b-instruct-2506 - qwen3-235b-a22b-instruct-2507 - qwen3-coder-30b-a3b-instruct - qwen3-embedding-8b - voxtral-small-24b-2507 ================================================ FILE: src/README_template.md ================================================ # Free LLM API resources This lists various services that provide free access or credits towards API-based LLM usage. > [!NOTE] > Please don't abuse these services, else we might lose them. > [!WARNING] > This list explicitly excludes any services that are not legitimate (eg reverse engineers an existing chatbot) {{TOC}} ## Free Providers {{MODEL_LIST}} ## Providers with trial credits {{TRIAL_LIST_MARKDOWN}} ================================================ FILE: src/data.py ================================================ MODEL_TO_NAME_MAPPING = { "@cf/deepseek-ai/deepseek-math-7b-instruct": "Deepseek Math 7B Instruct", "@cf/defog/sqlcoder-7b-2": "SQLCoder 7B 2", "@cf/fblgit/una-cybertron-7b-v2-bf16": "Una Cybertron 7B v2 (BF16)", "@cf/google/gemma-2b-it-lora": "Gemma 2B Instruct (LoRA)", "@cf/google/gemma-7b-it-lora": "Gemma 7B Instruct (LoRA)", "@cf/meta-llama/llama-2-7b-chat-hf-lora": "Llama 2 7B Chat (LoRA)", "@cf/meta/llama-2-7b-chat-fp16": "Llama 2 7B Chat (FP16)", "@cf/meta/llama-2-7b-chat-int8": "Llama 2 7B Chat (INT8)", "@cf/meta/llama-3-8b-instruct-awq": "Llama 3 8B Instruct (AWQ)", "@cf/meta/llama-3-8b-instruct": "Llama 3 8B Instruct", "@cf/meta/llama-3.1-8b-instruct-awq": "Llama 3.1 8B Instruct (AWQ)", "@cf/meta/llama-3.1-8b-instruct-fp8": "Llama 3.1 8B Instruct (FP8)", "@cf/meta/llama-3.1-8b-instruct": "Llama 3.1 8B Instruct", "@cf/microsoft/phi-2": "Phi-2", "@cf/mistral/mistral-7b-instruct-v0.1-vllm": "Mistral 7B Instruct v0.1", "@cf/mistral/mistral-7b-instruct-v0.1": "Mistral 7B Instruct v0.1", "@cf/mistral/mistral-7b-instruct-v0.2-lora": "Mistral 7B Instruct v0.2 (LoRA)", "@cf/openchat/openchat-3.5-0106": "OpenChat 3.5 0106", "@cf/qwen/qwen1.5-0.5b-chat": "Qwen 1.5 0.5B Chat", "@cf/qwen/qwen1.5-1.8b-chat": "Qwen 1.5 1.8B Chat", "@cf/qwen/qwen1.5-14b-chat-awq": "Qwen 1.5 14B Chat (AWQ)", "@cf/qwen/qwen1.5-7b-chat-awq": "Qwen 1.5 7B Chat (AWQ)", "@cf/thebloke/discolm-german-7b-v1-awq": "Discolm German 7B v1 (AWQ)", "@cf/tiiuae/falcon-7b-instruct": "Falcom 7B Instruct", "@cf/tinyllama/tinyllama-1.1b-chat-v1.0": "TinyLlama 1.1B Chat v1.0", "@hf/google/gemma-7b-it": "Gemma 7B Instruct", "@hf/meta-llama/meta-llama-3-8b-instruct": "Llama 3 8B Instruct", "@hf/mistral/mistral-7b-instruct-v0.2": "Mistral 7B Instruct v0.2", "@hf/nexusflow/starling-lm-7b-beta": "Starling LM 7B Beta", "@hf/nousresearch/hermes-2-pro-mistral-7b": "Hermes 2 Pro Mistral 7B", "@hf/thebloke/deepseek-coder-6.7b-base-awq": "Deepseek Coder 6.7B Base (AWQ)", "@hf/thebloke/deepseek-coder-6.7b-instruct-awq": "Deepseek Coder 6.7B Instruct (AWQ)", "@hf/thebloke/llama-2-13b-chat-awq": "Llama 2 13B Chat (AWQ)", "@hf/thebloke/llamaguard-7b-awq": "LlamaGuard 7B (AWQ)", "@hf/thebloke/mistral-7b-instruct-v0.1-awq": "Mistral 7B Instruct v0.1 (AWQ)", "@hf/thebloke/neural-chat-7b-v3-1-awq": "Neural Chat 7B v3.1 (AWQ)", "@hf/thebloke/openhermes-2.5-mistral-7b-awq": "OpenHermes 2.5 Mistral 7B (AWQ)", "@hf/thebloke/zephyr-7b-beta-awq": "Zephyr 7B Beta (AWQ)", "codellama-13b-instruct-hf": "CodeLlama 13B Instruct", "distil-whisper-large-v3-en": "Distil Whisper Large v3", "gemma-7b-it": "Gemma 7B Instruct (Deprecated)", "gemma2-9b-it": "Gemma 2 9B Instruct", "google/gemma-2-9b-it:free": "Gemma 2 9B Instruct", "google/gemma-7b-it:free": "Gemma 7B Instruct", "gryphe/mythomist-7b:free": "Mythomist 7B", "huggingfaceh4/zephyr-7b-beta:free": "Zephyr 7B Beta", "llama-2-13b-chat-hf": "Llama 2 13B Chat", "llama-3-70b-instruct": "Llama 3 70B Instruct", "llama-3-8b-instruct": "Llama 3 8B Instruct", "llama-3.1-405b-reasoning": "Llama 3.1 405B", "llama-3.1-70b-versatile": "Llama 3.1 70B", "llama-3.1-8b-instant": "Llama 3.1 8B", "llama-guard-3-8b": "Llama Guard 3 8B", "llama3-70b-8192": "Llama 3 70B", "llama3-8b-8192": "Llama 3 8B", "llama3-groq-70b-8192-tool-use-preview": "Llama 3 70B - Groq Tool Use Preview", "llama3-groq-8b-8192-tool-use-preview": "Llama 3 8B - Groq Tool Use Preview", "meta-llama/llama-3-8b-instruct:free": "Llama 3 8B Instruct", "meta-llama/llama-3.1-8b-instruct:free": "Llama 3.1 8B Instruct", "meta-llama/meta-llama-3-70b-instruct": "Llama 3 70B Instruct", "meta-llama/meta-llama-3.1-405b": "Llama 3.1 405B Base", "meta-llama/meta-llama-3.1-405b-fp8": "Llama 3.1 405B Base (FP8)", "meta-llama/meta-llama-3.1-405b-instruct": "Llama 3.1 405B Instruct", "meta-llama/meta-llama-3.1-70b-instruct": "Llama 3.1 70B Instruct", "meta-llama/meta-llama-3.1-8b-instruct": "Llama 3.1 8B Instruct", "microsoft/phi-3-medium-128k-instruct:free": "Phi-3 Medium 128k Instruct", "microsoft/phi-3-mini-128k-instruct:free": "Phi-3 Mini 128k Instruct", "mistral-7b-instruct": "Mistral 7B Instruct", "mistralai/mistral-7b-instruct:free": "Mistral 7B Instruct", "mixtral-8x22b-instruct": "Mixtral 8x22B Instruct", "mixtral-8x7b-32768": "Mixtral 8x7B", "mixtral-8x7b-instruct": "Mixtral 8x7B Instruct", "nousresearch/hermes-3-llama-3.1-70b": "Hermes 3 Llama 3.1 70B", "nousresearch/nous-capybara-7b:free": "Nous Capybara 7B", "openchat/openchat-7b:free": "OpenChat 7B", "qwen/qwen-2-7b-instruct:free": "Qwen 2 7B Instruct", "qwen/qwen2-72b-instruct": "Qwen 2 72B Instruct", "undi95/toppy-m-7b:free": "Toppy M 7B", "whisper-large-v3": "Whisper Large v3", "whisper-large-v3-turbo": "Whisper Large v3 Turbo", "01-ai/yi-34b-chat": "Yi 34B Chat", "01-ai/yi-1.5-34b-chat": "Yi 1.5 34B Chat", "nousresearch/hermes-3-llama-3.1-70b-fp8": "Hermes 3 Llama 3.1 70B (FP8)", "nousresearch/hermes-3-llama-3.1-405b:free": "Hermes 3 Llama 3.1 405B", "llava-v1.5-7b-4096-preview": "LLaVA 1.5 7B", "mattshumer/reflection-llama-3.1-70b": "Reflection Llama 3.1 70B", "mattshumer/reflection-70b:free": "Reflection Llama 3.1 70B", "mattshumer/reflection-llama-3.1-70b-completions": "Reflection Llama 3.1 70B Completions", "deepseek-ai/deepseek-v2.5": "DeepSeek V2.5", "mistralai/pixtral-12b-2409": "Pixtral 12B (2409)", "qwen/qwen2-vl-7b-instruct": "Qwen2-VL 7B Instruct", "mistralai/pixtral-12b:free": "Pixtral 12B", "qwen/qwen-2-vl-7b-instruct:free": "Qwen2-VL 7B Instruct", "qwen/qwen2-vl-72b-instruct": "Qwen2-VL 72B Instruct", "qwen/qwen2.5-72b-instruct": "Qwen2.5 72B Instruct", "llama-3.2-90b-text-preview": "Llama 3.2 90B (Text Only)", "llama-3.2-3b-preview": "Llama 3.2 3B", "llama-3.2-11b-text-preview": "Llama 3.2 11B (Text Only)", "llama-3.2-1b-preview": "Llama 3.2 1B", "@cf/meta/llama-3.2-1b-instruct": "Llama 3.2 1B Instruct", "meta-llama/llama-3.2-11b-vision-instruct:free": "Llama 3.2 11B Vision Instruct", "@cf/meta/llama-3.2-11b-vision-instruct": "Llama 3.2 11B Vision Instruct", "@cf/meta/llama-3.2-3b-instruct": "Llama 3.2 3B Instruct", "meta-llama/llama-3.2-90b-vision-instruct": "Llama 3.2 90B Vision Instruct", "meta-llama/llama-3.2-3b-instruct": "Llama 3.2 3B Instruct", "llama-3.2-11b-vision-preview": "Llama 3.2 11B Vision", "llama-3.2-90b-vision-preview": "Llama 3.2 90B Vision", "meta-llama/llama-3.2-90b-vision": "Llama 3.2 90B Vision", "meta-llama/llama-3.1-70b-instruct:free": "Llama 3.1 70B Instruct", "meta-llama/llama-3.2-1b-instruct:free": "Llama 3.2 1B Instruct", "liquid/lfm-40b:free": "Liquid LFM 40B", "meta-llama/llama-3.2-3b-instruct:free": "Llama 3.2 3B Instruct", "meta-llama/llama-3.1-405b-instruct:free": "Llama 3.1 405B Instruct", "mathstral-7b-v0.1": "Mathstral 7B v0.1", "llama-3.1-70b-instruct": "Llama 3.1 70B Instruct", "gryphe/mythomax-l2-13b:free": "Mythomax L2 13B", "meta-llama/llama-3.2-90b-vision-instruct:free": "Llama 3.2 90B Vision Instruct", "mamba-codestral-7b-v0-1": "Codestral Mamba 7B v0.1", "hermes3-70b": "Hermes 3 70B", "llama3.1-nemotron-70b-instruct": "Llama 3.1 Nemotron 70B Instruct", "llama3.2-3b-instruct": "Llama 3.2 3B Instruct", "llama3.1-8b-instruct": "Llama 3.1 8B Instruct", "llama3.1-70b-instruct-fp8": "Llama 3.1 70B Instruct (FP8)", "llama3.1-405b-instruct-fp8": "Llama 3.1 405B Instruct (FP8)", "hermes3-405b": "Hermes 3 405B", "deepseek-coder-v2-lite-instruct": "DeepSeek Coder v2 Lite Instruct", "hermes3-8b": "Hermes 3 8B", "dracarys2-72b-instruct": "Dracarys 2 72B Instruct", "lfm-40b": "Liquid LFM 40B", "qwen/qwen2.5-coder-32b-instruct": "Qwen2.5 Coder 32B Instruct", "thedrummer/unslopnemo-12b:free": "UnslopNemo 12B", "mistral-nemo-instruct-2407": "Mistral Nemo 2407", "google/gemini-exp-1121:free": "Gemini Experimental 1121", "meta-llama/llama-3.1-70b-instruct-fp8": "Llama 3.1 70B Instruct (FP8)", "google/learnlm-1.5-pro-experimental:free": "LearnLM 1.5 Pro Experimental", "google/gemini-exp-1114:free": "Gemini Experimental 1114", "qwen25-coder-32b-instruct": "Qwen2.5 Coder 32B Instruct", "qwen/qwq-32b-preview": "Qwen QwQ 32B Preview", "meta-llama/llama-3.3-70b-instruct": "Llama 3.3 70B Instruct", "llama-3.3-70b-versatile": "Llama 3.3 70B", "google/gemini-exp-1206:free": "Gemini Experimental 1206", "llama3.1-nemotron-70b-instruct-fp8": "Llama 3.1 Nemotron 70B Instruct (FP8)", "llama-3.3-70b-specdec": "Llama 3.3 70B (Speculative Decoding)", "@cf/meta/llama-3.3-70b-instruct-fp8-fast": "Llama 3.3 70B Instruct (FP8)", "google/gemini-2.0-flash-exp:free": "Gemini 2.0 Flash Experimental", "qwen2.5-coder-32b-instruct": "Qwen2.5 Coder 32B Instruct", "bge-multilingual-gemma2": "BGE-Multilingual-Gemma2", "pixtral-12b-2409": "Pixtral 12B (2409)", "google/gemini-2.0-flash-thinking-exp:free": "Gemini 2.0 Flash Thinking Experimental", "sentence-t5-xxl": "sentence-t5-xxl", "meta-llama/meta-llama-3.1-405b-instruct-virtuals": "Llama 3.1 405B Instruct Virtuals", "llama-3.1-8b-instruct": "Llama 3.1 8B Instruct", "deepseek-ai/deepseek-v3": "DeepSeek V3", "llava-next-mistral-7b": "Llava Next Mistral 7B", "llama-3.3-70b-instruct": "Llama 3.3 70B Instruct", "google/gemini-2.0-flash-thinking-exp-1219:free": "Gemini 2.0 Flash Thinking Experimental 1219", "sophosympatheia/rogue-rose-103b-v0.2:free": "Rogue Rose 103B v0.2", "deepseek-ai/deepseek-r1": "DeepSeek R1", "deepseek-ai/deepseek-r1-zero": "DeepSeek R1-Zero", "deepseek/deepseek-r1:free": "DeepSeek R1", "deepseek-r1-distill-llama-70b": "DeepSeek R1 Distill Llama 70B", "@cf/deepseek-ai/deepseek-r1-distill-qwen-32b": "DeepSeek R1 Distill Qwen 32B", "deepseek-ai/janus-pro-7b": "DeepSeek Janus Pro 7B", "deepseek-r1-distill-llama-8b": "DeepSeek R1 Distill Llama 8B", "nvidia/llama-3.1-nemotron-70b-instruct:free": "Llama 3.1 Nemotron 70B Instruct", "deepseek/deepseek-r1-distill-llama-70b:free": "DeepSeek R1 Distill Llama 70B", "qwen/qwen2.5-vl-72b-instruct:free": "Qwen2.5 VL 72B Instruct", "google/gemini-2.0-flash-lite-preview-02-05:free": "Gemini 2.0 Flash Lite Preview 02-05", "qwen/qwen-vl-plus:free": "Qwen VL Plus", "google/gemini-2.0-pro-exp-02-05:free": "Gemini 2.0 Pro Experimental 02-05", "deepseek-r1": "DeepSeek R1", "meta-llama/llama-3.3-70b-instruct:free": "Llama 3.3 70B Instruct", "deepseek/deepseek-chat:free": "DeepSeek V3", "deepseek-r1-distill-qwen-32b": "DeepSeek R1 Distill Qwen 32B", "mistralai/mistral-nemo:free": "Mistral Nemo", "allam-2-7b": "Allam 2 7B", "mistralai/mistral-small-24b-instruct-2501:free": "Mistral Small 24B Instruct 2501", "qwen-2.5-32b": "Qwen 2.5 32B", "cognitivecomputations/dolphin3.0-r1-mistral-24b:free": "Dolphin 3.0 R1 Mistral 24B", "qwen-2.5-coder-32b": "Qwen 2.5 Coder 32B", "cognitivecomputations/dolphin3.0-mistral-24b:free": "Dolphin 3.0 Mistral 24B", "deepseek-r1-671b": "DeepSeek R1", "@cf/meta/llama-guard-3-8b": "Llama Guard 3 8B", "mistral-saba-24b": "Mistral Saba 24B", "deepseek/deepseek-r1-zero:free": "DeepSeek R1 Zero", "nousresearch/deephermes-3-llama-3-8b-preview:free": "DeepHermes 3 Llama 3 8B Preview", "qwen-qwq-32b": "Qwen QwQ 32B", "qwen/qwq-32b": "Qwen QwQ 32B", "qwen/qwq-32b:free": "Qwen QwQ 32B", "qwen/qwen2.5-vl-7b-instruct": "Qwen2.5 VL 7B Instruct", "qwen/qwen-2.5-coder-32b-instruct:free": "Qwen2.5 Coder 32B Instruct", "mistral-7b-instruct-v0.3": "Mistral 7B Instruct v0.3", "moonshotai/moonlight-16b-a3b-instruct:free": "Moonlight-16B-A3B-Instruct", "google/gemma-3-27b-it:free": "Gemma 3 27B Instruct", "qwen/qwen-2.5-72b-instruct:free": "Qwen 2.5 72B Instruct", "rekaai/reka-flash-3:free": "Reka Flash 3", "deepseek/deepseek-r1-distill-qwen-32b:free": "DeepSeek R1 Distill Qwen 32B", "deepseek/deepseek-r1-distill-qwen-14b:free": "DeepSeek R1 Distill Qwen 14B", "qwen/qwen2.5-vl-72b-instruct": "Qwen2.5 VL 72B Instruct", "qwen/qwq-32b-preview:free": "Qwen QwQ 32B Preview", "google/gemma-3-12b-it:free": "Gemma 3 12B Instruct", "google/gemma-3-1b-it:free": "Gemma 3 1B Instruct", "google/gemma-3-4b-it:free": "Gemma 3 4B Instruct", "open-r1/olympiccoder-32b:free": "OlympicCoder 32B", "open-r1/olympiccoder-7b:free": "OlympicCoder 7B", "featherless/qwerky-72b:free": "Featherless Qwerky 72B", "qwen/qwen2.5-vl-32b-instruct:free": "Qwen 2.5 VL 32B Instruct", "deepseek/deepseek-chat-v3-0324:free": "DeepSeek V3 0324", "qwen/qwen-2.5-vl-7b-instruct:free": "Qwen 2.5 VL 7B Instruct", "deepseek-ai/deepseek-v3-0324": "DeepSeek V3 0324", "allenai/molmo-7b-d:free": "Molmo 7B D", "qwen/qwen2.5-vl-3b-instruct:free": "Qwen 2.5 VL 3B Instruct", "google/gemini-2.5-pro-exp-03-25:free": "Gemini 2.5 Pro Experimental 03-25", "mistralai/mistral-small-3.1-24b-instruct:free": "Mistral Small 3.1 24B Instruct", "bytedance-research/ui-tars-72b:free": "Bytedance UI Tars 72B", "meta-llama-3_3-70b-instruct": "Llama 3.3 70B Instruct", "mixtral-8x7b-instruct-v0.1": "Mixtral 8x7B Instruct v0.1", "deepseek/deepseek-v3-base:free": "DeepSeek V3 Base", "qwen2.5-vl-72b-instruct": "Qwen 2.5 VL 72B Instruct", "meta-llama-3_1-70b-instruct": "Llama 3.1 70B Instruct", "qwen/qwen-2.5-7b-instruct:free": "Qwen 2.5 7B Instruct", "mamba-codestral-7b-v0.1": "Mamba Codestral 7B v0.1", "meta-llama/llama-4-scout-17b-16e-instruct": "Llama 4 Scout Instruct", "@cf/meta/llama-4-scout-17b-16e-instruct": "Llama 4 Scout Instruct", "meta-llama/llama-4-scout:free": "Llama 4 Scout", "meta-llama/llama-4-maverick:free": "Llama 4 Maverick", "rekaai/reka-flash-3": "Reka Flash 3", "cognitivecomputations/dolphin3.0-mistral-24b": "Dolphin 3.0 Mistral 24B", "unsloth/gemma-3-12b-it": "Gemma 3 12B Instruct", "chutesai/llama-4-maverick-17b-128e-instruct-fp8": "Llama 4 Maverick 17B 128E Instruct FP8", "unsloth/gemma-3-1b-it": "Gemma 3 1B Instruct", "deepseek-ai/deepseek-v3-base": "DeepSeek V3 Base", "unsloth/gemma-3-4b-it": "Gemma 3 4B Instruct", "open-r1/olympiccoder-32b": "OlympicCoder 32B", "chutesai/llama-4-scout-17b-16e-instruct": "Llama 4 Scout 17B 16E Instruct", "cognitivecomputations/dolphin3.0-r1-mistral-24b": "Dolphin 3.0 R1 Mistral 24B", "open-r1/olympiccoder-7b": "OlympicCoder 7B", "nousresearch/deephermes-3-llama-3-8b-preview": "DeepHermes 3 Llama 3 8B Preview", "chutesai/mistral-small-3.1-24b-instruct-2503": "Mistral Small 3.1 24B Instruct 2503", "qwen/qwen2.5-vl-32b-instruct": "Qwen 2.5 VL 32B Instruct", "nvidia/llama-3_1-nemotron-ultra-253b-v1": "Llama 3.1 Nemotron Ultra 253B v1", "nvidia/llama-3.1-nemotron-ultra-253b-v1:free": "Llama 3.1 Nemotron Ultra 253B v1", "nvidia/llama-3.1-nemotron-nano-8b-v1": "Llama 3.1 Nemotron Nano 8B v1", "mistral-small-3.1-24b-instruct-2503": "Mistral Small 3.1 24B Instruct 2503", "nvidia/llama-3_3-nemotron-super-49b-v1": "Llama 3.3 Nemotron Super 49B v1", "gemma-3-27b-it": "Gemma 3 27B Instruct", "nvidia/llama-3.3-nemotron-super-49b-v1:free": "Llama 3.3 Nemotron Super 49B v1", "nvidia/llama-3.1-nemotron-nano-8b-v1:free": "Llama 3.1 Nemotron Nano 8B v1", "meta-llama/llama-4-maverick-17b-128e-instruct": "Llama 4 Maverick 17B 128E Instruct", "moonshotai/kimi-vl-a3b-thinking:free": "Kimi VL A3B Thinking", "moonshotai/kimi-vl-a3b-thinking": "Kimi VL A3B Thinking", "@cf/mistralai/mistral-small-3.1-24b-instruct": "Mistral Small 3.1 24B Instruct", "@cf/google/gemma-3-12b-it": "Gemma 3 12B Instruct", "@cf/qwen/qwq-32b": "Qwen QwQ 32B", "@cf/qwen/qwen2.5-coder-32b-instruct": "Qwen 2.5 Coder 32B Instruct", "arliai/qwq-32b-arliai-rpr-v1:free": "QwQ 32B ArliAI RpR v1", "agentica-org/deepcoder-14b-preview:free": "DeepCoder 14B Preview", "agentica-org/deepcoder-14b-preview": "DeepCoder 14B Preview", "arliai/qwq-32b-arliai-rpr-v1": "QwQ 32B ArliAI RpR v1", "shisa-ai/shisa-v2-llama3.3-70b:free": "Shisa V2 Llama 3.3 70B", "compound-beta-mini": "Groq compound-beta-mini", "compound-beta": "Groq compound-beta", "shisa-ai/shisa-v2-llama3.3-70b": "Shisa V2 Llama 3.3 70B", } HYPERBOLIC_IGNORED_MODELS = { "Wifhat", "FLUX.1-dev", "StableDiffusion", "Monad", "TTS", "deepseek-ai/Janus-Pro-7B", "test", "SDXL1.0-base", # Ignore DeepSeek R1 and R1-Zero because they are not available in the free tier. "deepseek-ai/DeepSeek-R1", "deepseek-ai/DeepSeek-R1-Zero", } LAMBDA_IGNORED_MODELS = {"lfm-40b-vllm", "hermes3-405b-fp8-128k"} OPENROUTER_IGNORED_MODELS = { # Ignore gemini experimental free models because rate limits mean they are unusable. "google/gemini-exp-1121:free", "google/learnlm-1.5-pro-experimental:free", "google/gemini-exp-1114:free", "google/gemini-exp-1206:free", "google/gemini-2.0-flash-exp:free", "google/gemini-2.0-flash-thinking-exp:free", "google/gemini-2.0-flash-thinking-exp-1219:free", "google/gemini-flash-1.5-exp:free", "google/gemini-2.0-pro-exp-02-05:free", } ================================================ FILE: src/pull_available_models.py ================================================ #!/usr/bin/env python3 from collections import defaultdict import logging import json import requests import os from dotenv import load_dotenv from google.cloud import cloudquotas_v1 from mistralai import Mistral from concurrent.futures import ThreadPoolExecutor import time import re from data import ( MODEL_TO_NAME_MAPPING, HYPERBOLIC_IGNORED_MODELS, LAMBDA_IGNORED_MODELS, OPENROUTER_IGNORED_MODELS, ) load_dotenv() script_dir = os.path.dirname(os.path.abspath(__file__)) # Global clients mistral_client = Mistral(api_key=os.environ["MISTRAL_API_KEY"]) last_mistral_request_time = 0 def create_logger(provider_name): logger = logging.getLogger(provider_name) logger.setLevel(logging.DEBUG) handler = logging.StreamHandler() formatter = logging.Formatter(f"{provider_name}: %(message)s") handler.setFormatter(formatter) logger.addHandler(handler) return logger MISSING_MODELS = set() def get_model_name(id): id = id.lower() if id in MODEL_TO_NAME_MAPPING: return MODEL_TO_NAME_MAPPING[id] MISSING_MODELS.add(id) return id def get_groq_limits_for_stt_model(model_id, logger): logger.info(f"Getting limits for STT model {model_id}...") try: r = requests.post( "https://api.groq.com/openai/v1/audio/transcriptions", headers={ "Authorization": f'Bearer {os.environ["GROQ_API_KEY"]}', }, data={ "model": model_id, }, files={ "file": open(os.path.join(script_dir, "1-second-of-silence.mp3"), "rb"), }, ) except Exception as e: logger.error(f"Failed to get limits for model {model_id}: {e}") return {} try: r.raise_for_status() except Exception as e: logger.error(f"Failed to get limits for model {model_id}: {e}") logger.error(r.text) return {} audio_seconds_per_minute = int(r.headers["x-ratelimit-limit-audio-seconds"]) rpd = int(r.headers["x-ratelimit-limit-requests"]) return { "audio-seconds/minute": audio_seconds_per_minute, "requests/day": rpd, } def get_groq_limits_for_model(model_id, script_dir, logger): if "whisper" in model_id: return get_groq_limits_for_stt_model(model_id, logger) if "tts" in model_id: return None logger.info(f"Getting limits for chat model {model_id}...") try: r = requests.post( "https://api.groq.com/openai/v1/chat/completions", headers={ "Authorization": f'Bearer {os.environ["GROQ_API_KEY"]}', "Content-Type": "application/json", }, json={ "model": model_id, "messages": [{"role": "user", "content": "Hi!"}], "max_tokens": 1, "stream": True, }, stream=True, ) except Exception as e: logger.error(f"Failed to get limits for model {model_id}: {e}") return {} try: r.raise_for_status() rpd = int(r.headers["x-ratelimit-limit-requests"]) tpm = int(r.headers["x-ratelimit-limit-tokens"]) return {"requests/day": rpd, "tokens/minute": tpm} except Exception as e: logger.error(f"Failed to get limits for model {model_id}: {e}") logger.error(r.text) return {} def fetch_groq_models(logger): logger.info("Fetching Groq models...") r = requests.get( "https://api.groq.com/openai/v1/models", headers={ "Authorization": f'Bearer {os.environ["GROQ_API_KEY"]}', "Content-Type": "application/json", }, ) r.raise_for_status() models = r.json()["data"] logger.debug(json.dumps(models, indent=4)) ret_models = [] with ThreadPoolExecutor() as executor: futures = [] for model in models: future = executor.submit( get_groq_limits_for_model, model["id"], script_dir, logger ) futures.append((model, future)) for model, future in futures: limits = future.result() if limits is None: continue ret_models.append( { "id": model["id"], "name": get_model_name(model["id"]), "limits": limits, } ) ret_models = sorted(ret_models, key=lambda x: x["name"]) return ret_models def fetch_kluster_models(logger): logger.info("Fetching Kluster models...") try: r = requests.get( "https://api.kluster.ai/v1/models", headers={ "Content-Type": "application/json", }, timeout=10, ) r.raise_for_status() # Parse the JSON response response = r.json() # Based on the paste-2.txt example, the structure should be: # {"object":"list","data":[{model1}, {model2}, ...]} if isinstance(response, dict) and "data" in response: models = response["data"] else: models = response logger.info(f"Fetched {len(models)} models from Kluster") ret_models = [] for model in models: # Extract fields from the model object model_id = model.get("id") model_name = model.get("name", model_id) # Skip models without an ID if not model_id: continue ret_models.append( { "id": model_id, "name": model_name, # Use actual name rather than lookup, as these are official names } ) logger.debug(json.dumps(ret_models, indent=4)) ret_models = sorted(ret_models, key=lambda x: x["name"]) return ret_models except requests.exceptions.RequestException as e: logger.error(f"Error fetching Kluster models: {e}") return [] except json.JSONDecodeError as e: logger.error(f"Error decoding JSON from Kluster API: {e}") logger.error(f"Response text: {r.text}") return [] def fetch_openrouter_models(logger): logger.info("Fetching OpenRouter models...") r = requests.get( "https://openrouter.ai/api/v1/models", headers={ "Content-Type": "application/json", }, ) r.raise_for_status() models = r.json()["data"] logger.info(f"Fetched {len(models)} models from OpenRouter") ret_models = [] for model in models: pricing = float(model.get("pricing", {}).get("completion", "1")) + float( model.get("pricing", {}).get("prompt", "1") ) if pricing != 0: continue if ":free" not in model["id"]: continue if model["id"].lower() in OPENROUTER_IGNORED_MODELS: logger.debug(f"Ignoring model {model['id']}") continue ret_models.append( { "id": model["id"], "name": get_model_name(model["id"]), "limits": { "requests/minute": 20, "requests/day": 50, }, } ) ret_models = sorted(ret_models, key=lambda x: x["name"]) return ret_models def fetch_cloudflare_models(logger): logger.info("Fetching Cloudflare models...") r = requests.get( f"https://api.cloudflare.com/client/v4/accounts/{os.environ['CLOUDFLARE_ACCOUNT_ID']}/ai/models/search?search=Text+Generation", headers={ "Authorization": f'Bearer {os.environ["CLOUDFLARE_API_KEY"]}', "Content-Type": "application/json", }, ) r.raise_for_status() models = r.json()["result"] logger.info(f"Fetched {len(models)} models from Cloudflare") ret_models = [] for model in models: ret_models.append( { "id": model["name"], "name": get_model_name(model["name"]), } ) ret_models = sorted(ret_models, key=lambda x: x["name"]) return ret_models def fetch_ovh_models(logger): logger.info("Fetching OVH models...") r = requests.get( "https://endpoints-backend.ai.cloud.ovh.net/rest/v1/models_v2", params={"select": "*", "order": "id.desc", "offset": "0", "limit": "100"}, headers={ "accept": "*/*", "accept-language": "en-GB,en-US;q=0.9,en;q=0.8", "accept-profile": "public", "apikey": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ewogICJyb2xlIjogImFub24iLAogICJpc3MiOiAic3VwYWJhc2UiLAogICJpYXQiOiAxNzEwNzE2NDAwLAogICJleHAiOiAxODY4NDgyODAwCn0.Jty_eO4oWqLm4Lx_LfbpRW5WESXYXtT2humbBq2Pal8", "authorization": "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.ewogICJyb2xlIjogImFub24iLAogICJpc3MiOiAic3VwYWJhc2UiLAogICJpYXQiOiAxNzEwNzE2NDAwLAogICJleHAiOiAxODY4NDgyODAwCn0.Jty_eO4oWqLm4Lx_LfbpRW5WESXYXtT2humbBq2Pal8", "priority": "u=1, i", "sec-ch-ua": '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"macOS"', "sec-fetch-dest": "empty", "sec-fetch-mode": "cors", "sec-fetch-site": "same-site", "x-client-info": "supabase-js-web/2.39.7", }, ) r.raise_for_status() models = list(filter(lambda x: x["available"] and "LLM" in x["category"], r.json())) logger.info(f"Fetched {len(models)} models from OVH") ret_models = [] for model in models: ret_models.append( { "id": model["name"], "name": get_model_name(model["name"]), "limits": { "requests/minute": 12, }, } ) ret_models = sorted(ret_models, key=lambda x: x["name"]) return ret_models def fetch_hyperbolic_models(logger): logger.info("Fetching Hyperbolic models from API...") r = requests.get( "https://api.hyperbolic.xyz/v1/models", headers={ "accept": "application/json", "authorization": f"Bearer {os.environ['HYPERBOLIC_API_KEY']}", }, ) r.raise_for_status() models = r.json()["data"] logger.info(f"Fetched {len(models)} models from Hyperbolic's API") ret_models = [] for model in models: if model["id"] in HYPERBOLIC_IGNORED_MODELS: logger.debug(f"Ignoring model {model['id']}") continue ret_models.append( { "id": model["id"], "name": get_model_name(model["id"]), "limits": { "requests/minute": 60, }, } ) logger.debug(json.dumps(ret_models, indent=4)) return sorted(ret_models, key=lambda x: x["name"]) def fetch_github_models(logger): logger.info("Fetching GitHub models...") all_models_data = [] page = 1 total_pages = 1 # Initialize with 1 to start the loop while page <= total_pages: try: url = f"https://github.com/marketplace?type=models&page={page}" logger.info(f"Fetching from {url}") r = requests.get( url, headers={ "Accept": "application/json", "Content-Type": "application/json", "x-requested-with": "XMLHttpRequest", }, ) r.raise_for_status() data = r.json() current_page_models = data.get("results", []) if not current_page_models: logger.info(f"No models found on page {page}. Stopping.") break all_models_data.extend(current_page_models) total_pages = data.get("totalPages", 0) logger.info( f"Fetched page {page}/{total_pages}. Found {len(current_page_models)} models on this page." ) if page >= total_pages: break page += 1 time.sleep(0.5) # Be respectful to the API except requests.exceptions.RequestException as e: logger.error(f"Error fetching GitHub models on page {page}: {e}") if ( r.status_code == 404 and page == 1 ): # If first page is 404, likely endpoint changed or no models logger.error( "Initial request failed (404), assuming no models or endpoint issue." ) return [] elif ( r.status_code == 404 ): # If a subsequent page is 404, means we've gone past the last page logger.info(f"Reached end of pages (404 on page {page}).") break # For other errors, break or implement retry logic if desired break except json.JSONDecodeError as e: logger.error( f"Error decoding JSON from GitHub models API on page {page}: {e}" ) logger.error(f"Response text: {r.text}") break logger.info( f"Fetched a total of {len(all_models_data)} models from GitHub over {page-1 if page > 1 else 1} page(s)." ) ret_models = [] for model_data in all_models_data: # Ensure model_data is a dictionary and has the required keys if ( isinstance(model_data, dict) and "name" in model_data and "friendly_name" in model_data ): ret_models.append( { "id": model_data[ "name" ], # Using 'name' as id, can be changed if another field is more suitable "name": model_data["friendly_name"], } ) else: logger.warning(f"Skipping malformed model data: {model_data}") ret_models = sorted(ret_models, key=lambda x: x["name"]) return ret_models def fetch_gemini_limits(logger): logger.info("Fetching Gemini limits...") client = cloudquotas_v1.CloudQuotasClient() request = cloudquotas_v1.ListQuotaInfosRequest( parent=f"projects/{os.environ["GCP_PROJECT_ID"]}/locations/global/services/generativelanguage.googleapis.com" ) pager = client.list_quota_infos(request=request) models = defaultdict(dict) for quota in pager: if ( quota.metric == "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count" ): for dimension in quota.dimensions_infos: if dimension.details.value == -1: # -1 means unlimited continue models[dimension.dimensions.get("model")][ f"tokens/{quota.refresh_interval}" ] = dimension.details.value elif ( quota.metric == "generativelanguage.googleapis.com/generate_content_free_tier_requests" ): for dimension in quota.dimensions_infos: if dimension.details.value == -1: # -1 means unlimited continue models[dimension.dimensions.get("model")][ f"requests/{quota.refresh_interval}" ] = dimension.details.value logger.debug(json.dumps(models, indent=4)) return models def fetch_lambda_models(logger): logger.info("Fetching Lambda Labs models...") r = requests.get( "https://api.lambdalabs.com/v1/models", headers={ "Authorization": f"Bearer {os.environ['LAMBDA_API_KEY']}", }, ) r.raise_for_status() models = r.json()["data"] logger.info(f"Fetched {len(models)} models from Lambda Labs") ret_models = [] for model in models: if model["id"] in LAMBDA_IGNORED_MODELS: logger.debug(f"Ignoring model {model['id']}") continue ret_models.append( { "id": model["id"], "name": get_model_name(model["id"]), } ) ret_models = sorted(ret_models, key=lambda x: x["name"]) return ret_models def rate_limited_mistral_chat(client, **kwargs): global last_mistral_request_time # Ensure at least 1 second between requests current_time = time.time() time_since_last = current_time - last_mistral_request_time if time_since_last < 1: time.sleep(1 - time_since_last) response = client.chat.complete(**kwargs) last_mistral_request_time = time.time() return response def fetch_samba_models(logger): logger.info("Fetching SambaNova models...") r = requests.get("https://cloud.sambanova.ai/api/pricing") r.raise_for_status() models = r.json()["prices"] logger.info(f"Fetched {len(models)} models from SambaNova") ret_models = [] for model in models: ret_models.append( { "id": model["model_id"], "name": model["model_name"] or model["model_id"], } ) ret_models = sorted(ret_models, key=lambda x: x["name"]) return ret_models def fetch_scaleway_models(logger): logger.info("Fetching Scaleway models...") r = requests.get( "https://api.scaleway.ai/v1/models", headers={"Authorization": f"Bearer {os.environ['SCALEWAY_API_KEY']}"}, ) r.raise_for_status() models = r.json()["data"] logger.info(f"Fetched {len(models)} models from Scaleway") ret_models = [] for model in models: ret_models.append( { "id": model["id"], "name": get_model_name(model["id"]), } ) ret_models = sorted(ret_models, key=lambda x: x["name"]) return ret_models def fetch_cohere_models(logger): logger.info("Fetching Cohere models...") headers = { "accept": "application/json", "Authorization": f"Bearer {os.environ['COHERE_API_KEY']}", } params = {} all_models = [] page = 1 try: while True: response = requests.get( "https://api.cohere.com/v1/models", headers=headers, params=params or None, timeout=10, ) response.raise_for_status() payload = response.json() models = payload.get("models", []) logger.info(f"Fetched {len(models)} models from Cohere (page {page})") all_models.extend(models) next_token = payload.get("next_page_token") if not next_token: break params["page_token"] = next_token page += 1 except requests.exceptions.RequestException as exc: logger.error(f"Error fetching Cohere models: {exc}") return [] except json.JSONDecodeError as exc: logger.error(f"Error decoding Cohere API response: {exc}") return [] ret_models = [] for model in all_models: model_id = model.get("name") if not model_id: continue if model.get("is_deprecated"): logger.debug(f"Skipping deprecated Cohere model {model_id}") continue endpoints = set(model.get("endpoints") or []) | set( model.get("default_endpoints") or [] ) if "chat" not in endpoints: logger.debug(f"Skipping non-chat Cohere model {model_id}") continue ret_models.append( { "id": model_id, "name": get_model_name(model_id), } ) logger.info(f"Found {len(ret_models)} Cohere chat models") return sorted(ret_models, key=lambda x: x["name"]) def fetch_chutes_models(logger): logger.info("Fetching Chutes models...") r = requests.get( "https://api.chutes.ai/chutes/?include_public=true&limit=1000", headers={ "Content-Type": "application/json", }, ) r.raise_for_status() models = r.json()["items"] logger.info(f"Fetched {len(models)} models from Chutes") # Filter for free models based on per_million_token price free_models = [] for model in models: price_info = model.get("current_estimated_price", {}) # Check if per_million_tokens field exists and is set to 0 for USD if price_info.get("per_million_tokens", {}).get("usd", 1) == 0: model_name = model.get("name", "Unknown model") free_models.append( { "id": model_name, "name": get_model_name(model_name), "description": model.get("tagline", ""), } ) logger.info(f"Found {len(free_models)} free models from Chutes") return sorted(free_models, key=lambda x: x["name"]) def get_human_limits(model, seperator="
"): if "limits" not in model: return "" limits = model["limits"] return seperator.join([f"{value:,} {key}" for key, value in limits.items()]) def generate_toc(markdown): toc_lines = [] # Find all ## and ### headings, but skip the main title (# ...) headings = re.findall(r"^(#{2,3}) +(.+)", markdown, re.MULTILINE) for hashes, title in headings: # Remove markdown links for anchor text, keep display text display = re.sub(r"\[(.*?)\]\([^)]*\)", r"\1", title) # Build anchor (GitHub style) anchor = display.lower() anchor = re.sub(r"[^a-z0-9 \-_]", "", anchor) anchor = anchor.replace(" ", "-") anchor = anchor.replace("--", "-") anchor = anchor.strip("-") indent = " " if len(hashes) == 3 else "" toc_lines.append(f"{indent}- [{display}](#{anchor})") return "\n".join(toc_lines) def main(): logger = create_logger("Main") groq_logger = create_logger("Groq") openrouter_logger = create_logger("OpenRouter") google_ai_studio_logger = create_logger("Google AI Studio") cloudflare_logger = create_logger("Cloudflare") github_logger = create_logger("GitHub") hyperbolic_logger = create_logger("Hyperbolic") samba_logger = create_logger("SambaNova") scaleway_logger = create_logger("Scaleway") cohere_logger = create_logger("Cohere") fetch_concurrently = os.getenv("FETCH_CONCURRENTLY", "false").lower() == "true" if fetch_concurrently: with ThreadPoolExecutor() as executor: futures = [ executor.submit(fetch_gemini_limits, google_ai_studio_logger), executor.submit(fetch_openrouter_models, openrouter_logger), executor.submit(fetch_hyperbolic_models, hyperbolic_logger), executor.submit(fetch_cloudflare_models, cloudflare_logger), executor.submit(fetch_github_models, github_logger), executor.submit(fetch_samba_models, samba_logger), executor.submit(fetch_scaleway_models, scaleway_logger), executor.submit(fetch_cohere_models, cohere_logger), ] ( gemini_models, openrouter_models, hyperbolic_models, cloudflare_models, github_models, samba_models, scaleway_models, cohere_models, ) = [f.result() for f in futures] # Fetch groq models after others complete groq_models = fetch_groq_models(groq_logger) else: gemini_models = fetch_gemini_limits(google_ai_studio_logger) openrouter_models = fetch_openrouter_models(openrouter_logger) hyperbolic_models = fetch_hyperbolic_models(hyperbolic_logger) cloudflare_models = fetch_cloudflare_models(cloudflare_logger) github_models = fetch_github_models(github_logger) samba_models = fetch_samba_models(samba_logger) scaleway_models = fetch_scaleway_models(scaleway_logger) cohere_models = fetch_cohere_models(cohere_logger) groq_models = fetch_groq_models(groq_logger) # Initialize markdown string for free providers model_list_markdown = "" # --- OpenRouter --- model_list_markdown += "### [OpenRouter](https://openrouter.ai)\n\n" if openrouter_models: provider_limits = get_human_limits(openrouter_models[0]) model_list_markdown += "**Limits:**\n\n" model_list_markdown += f"[{provider_limits}
Up to 1000 requests/day with $10 lifetime topup](https://openrouter.ai/docs/api/reference/limits)\n\n" model_list_markdown += "Models share a common quota.\n\n" for model in openrouter_models: model_list_markdown += ( f"- [{model['name']}](https://openrouter.ai/{model['id']})\n" ) model_list_markdown += "\n" # --- Google AI Studio --- model_list_markdown += "### [Google AI Studio](https://aistudio.google.com)\n\n" model_list_markdown += ( "Data is used for training when used outside of the UK/CH/EEA/EU.\n\n" ) model_list_markdown += "\n" gemini_text_models = [ { "id": "gemini-3-flash-preview", "name": "Gemini 3 Flash", "limits": gemini_models.get("gemini-3-flash", {}), }, { "id": "gemini-3.1-flash-lite-preview", "name": "Gemini 3.1 Flash-Lite", "limits": gemini_models.get("gemini-3.1-flash-lite", {}), }, { "id": "gemini-2.5-flash", "name": "Gemini 2.5 Flash", "limits": gemini_models.get("gemini-2.5-flash", {}), }, { "id": "gemini-2.5-flash-lite", "name": "Gemini 2.5 Flash-Lite", "limits": gemini_models.get("gemini-2.5-flash-lite", {}), }, { "id": "gemma-3-27b-it", "name": "Gemma 3 27B Instruct", "limits": gemini_models.get("gemma-3-27b", {}), }, { "id": "gemma-3-12b-it", "name": "Gemma 3 12B Instruct", "limits": gemini_models.get("gemma-3-12b", {}), }, { "id": "gemma-3-4b-it", "name": "Gemma 3 4B Instruct", "limits": gemini_models.get("gemma-3-4b", {}), }, { "id": "gemma-3-1b-it", "name": "Gemma 3 1B Instruct", "limits": gemini_models.get("gemma-3-1b", {}), }, ] # Write text models to table for model in gemini_text_models: limits_str = get_human_limits(model) model_list_markdown += ( f"\n" ) model_list_markdown += "
Model NameModel Limits
{model['name']}{limits_str}
\n\n" # --- NVIDIA NIM --- model_list_markdown += ( "### [NVIDIA NIM](https://build.nvidia.com/explore/discover)\n\n" ) model_list_markdown += "Phone number verification required.\n" model_list_markdown += "Models tend to be context window limited.\n\n" model_list_markdown += "**Limits:** 40 requests/minute\n\n" model_list_markdown += "- [Various open models](https://build.nvidia.com/models)\n" model_list_markdown += "\n" # --- Mistral (La Plateforme) --- model_list_markdown += ( "### [Mistral (La Plateforme)](https://console.mistral.ai/)\n\n" ) model_list_markdown += ( "* Free tier (Experiment plan) requires opting into data training\n" ) model_list_markdown += "* Requires phone number verification.\n\n" model_list_markdown += "**Limits (per-model):** 1 request/second, 500,000 tokens/minute, 1,000,000,000 tokens/month\n\n" model_list_markdown += "- [Open and Proprietary Mistral models](https://docs.mistral.ai/getting-started/models/models_overview/)\n" model_list_markdown += "\n" # --- Mistral (Codestral) --- model_list_markdown += ( "### [Mistral (Codestral)](https://codestral.mistral.ai/)\n\n" ) model_list_markdown += "* Currently free to use\n" model_list_markdown += "* Monthly subscription based\n" model_list_markdown += "* Requires phone number verification\n\n" model_list_markdown += "**Limits:** 30 requests/minute, 2,000 requests/day\n\n" model_list_markdown += "- Codestral\n" model_list_markdown += "\n" # --- HuggingFace Serverless Inference --- model_list_markdown += "### [HuggingFace Inference Providers](https://huggingface.co/docs/inference-providers/en/index)\n\n" model_list_markdown += "HuggingFace Serverless Inference limited to models smaller than 10GB. Some popular models are supported even if they exceed 10GB.\n\n" model_list_markdown += "**Limits:** [$0.10/month in credits](https://huggingface.co/docs/inference-providers/en/pricing)\n\n" model_list_markdown += "- Various open models across supported providers\n" model_list_markdown += "\n" # --- Vercel AI Gateway --- model_list_markdown += "### [Vercel AI Gateway](https://vercel.com/docs/ai-gateway)\n\n" model_list_markdown += "Routes to various supported providers.\n\n" model_list_markdown += "**Limits:** [$5/month](https://vercel.com/docs/ai-gateway/pricing)\n\n" model_list_markdown += "\n" # --- OpenCode Zen --- model_list_markdown += "### [OpenCode Zen](https://opencode.ai/docs/zen/)\n\n" model_list_markdown += "AI gateway with curated models.\n\n" model_list_markdown += "Free models may use data for improvement.\n\n" model_list_markdown += "- Big Pickle Stealth\n" model_list_markdown += "- MiniMax M2.5 Free\n" model_list_markdown += "- Arcee Large Preview Free\n" model_list_markdown += "\n" # --- Cerebras --- model_list_markdown += "### [Cerebras](https://cloud.cerebras.ai/)\n\n" model_list_markdown += "\n" cerebras_models = [ { "name": "gpt-oss-120b", "limits_text": "30 requests/minute
60,000 tokens/minute
900 requests/hour
1,000,000 tokens/hour
14,400 requests/day
1,000,000 tokens/day" }, { "name": "Llama 3.1 8B", "limits_text": "30 requests/minute
60,000 tokens/minute
900 requests/hour
1,000,000 tokens/hour
14,400 requests/day
1,000,000 tokens/day" }, ] for model in cerebras_models: model_list_markdown += ( f"\n" ) model_list_markdown += "
Model NameModel Limits
{model['name']}{model['limits_text']}
\n\n" # --- Groq --- model_list_markdown += "### [Groq](https://console.groq.com)\n\n" if groq_models: model_list_markdown += "\n" for model in groq_models: limits_str = get_human_limits(model) model_list_markdown += ( f"\n" ) model_list_markdown += "
Model NameModel Limits
{model['name']}{limits_str}
\n" model_list_markdown += "\n" # --- Cohere --- model_list_markdown += "### [Cohere](https://cohere.com)\n\n" model_list_markdown += "**Limits:**\n\n" model_list_markdown += "[20 requests/minute
1,000 requests/month](https://docs.cohere.com/docs/rate-limits)\n\n" model_list_markdown += "Models share a common monthly quota.\n\n" if cohere_models: for model in cohere_models: model_list_markdown += f"- {model['name']}\n" else: model_list_markdown += "- No chat models available right now.\n" model_list_markdown += "\n" # --- GitHub Models --- model_list_markdown += ( "### [GitHub Models](https://github.com/marketplace/models)\n\n" ) model_list_markdown += "Extremely restrictive input/output token limits.\n\n" model_list_markdown += "**Limits:** [Dependent on Copilot subscription tier (Free/Pro/Pro+/Business/Enterprise)](https://docs.github.com/en/github-models/prototyping-with-ai-models#rate-limits)\n\n" if github_models: for model in github_models: model_list_markdown += f"- {model['name']}\n" model_list_markdown += "\n" # --- Cloudflare Workers AI --- model_list_markdown += ( "### [Cloudflare Workers AI](https://developers.cloudflare.com/workers-ai)\n\n" ) model_list_markdown += "**Limits:** [10,000 neurons/day](https://developers.cloudflare.com/workers-ai/platform/pricing/#free-allocation)\n\n" if cloudflare_models: for model in cloudflare_models: model_list_markdown += f"- {model['name']}\n" model_list_markdown += "\n" # --- Google Cloud Vertex AI --- vertex_llama_models = [] vertex_gemini_models = [] vertex_deepseek_models = [] if vertex_llama_models or vertex_gemini_models or vertex_deepseek_models: model_list_markdown += "### [Google Cloud Vertex AI](https://console.cloud.google.com/vertex-ai/model-garden)\n\n" model_list_markdown += "Very stringent payment verification for Google Cloud.\n\n" model_list_markdown += "\n" # Write Gemini models to table first_gemini = True if vertex_gemini_models: for model in vertex_gemini_models: limits_str = get_human_limits(model) model_list_markdown += f'' if first_gemini: model_list_markdown += f'' first_gemini = False model_list_markdown += "\n" # Write Llama models to table if vertex_llama_models: for model in vertex_llama_models: limits_str = get_human_limits(model) model_list_markdown += f'\n' # Write DeepSeek models to table if vertex_deepseek_models: for model in vertex_deepseek_models: limits_str = get_human_limits(model) model_list_markdown += f'\n' model_list_markdown += "
Model NameModel Limits
{model['name']}{limits_str}
Shared Quota
{model['name']}{limits_str}
Free during preview
{model['name']}{limits_str}
Free during preview
\n\n" # --- Trial Providers Section Generation --- trial_list_markdown = "" # --- Static Trial Providers (Markdown List/Simple Entry) --- trial_providers_static = [ { "name": "Fireworks", "url": "https://fireworks.ai/", "credits": "$1", "requirements": "", "models_desc": "[Various open models](https://fireworks.ai/models)", }, { "name": "Baseten", "url": "https://app.baseten.co/", "credits": "$30", "requirements": "", "models_desc": "[Any supported model - pay by compute time](https://www.baseten.co/library/)", }, { "name": "Nebius", "url": "https://tokenfactory.nebius.com/", "credits": "$1", "requirements": "", "models_desc": "[Various open models](https://tokenfactory.nebius.com/models)", }, { "name": "Novita", "url": "https://novita.ai/?ref=ytblmjc&utm_source=affiliate", "credits": "$0.5 for 1 year", "requirements": "", "models_desc": "[Various open models](https://novita.ai/models)", }, { "name": "AI21", "url": "https://studio.ai21.com/", "credits": "$10 for 3 months", "requirements": "", "models_desc": "Jamba family of models", }, { "name": "Upstage", "url": "https://console.upstage.ai/", "credits": "$10 for 3 months", "requirements": "", "models_desc": "Solar Pro/Mini", }, { "name": "NLP Cloud", "url": "https://nlpcloud.com/home", "credits": "$15", "requirements": "Phone number verification", "models_desc": "Various open models", }, { "name": "Alibaba Cloud (International) Model Studio", "url": "https://bailian.console.alibabacloud.com/", "credits": "1 million tokens/model", "requirements": "", "models_desc": "[Various open and proprietary Qwen models](https://www.alibabacloud.com/en/product/modelstudio)", }, { "name": "Modal", "url": "https://modal.com", "credits": "$5/month upon sign up, $30/month with payment method added", "requirements": "", "models_desc": "Any supported model - pay by compute time", }, { "name": "Inference.net", "url": "https://inference.net", "credits": "$1, $25 on responding to email survey", "requirements": "", "models_desc": "Various open models", }, ] for provider in trial_providers_static: trial_list_markdown += f"### [{provider['name']}]({provider['url']})\n\n" trial_list_markdown += f"**Credits:** {provider['credits']}\n\n" if provider["requirements"]: trial_list_markdown += f"**Requirements:** {provider['requirements']}\n\n" trial_list_markdown += f"**Models:** {provider['models_desc']}\n\n" # --- Hyperbolic (Trial - Table) --- if hyperbolic_models: trial_list_markdown += "### [Hyperbolic](https://app.hyperbolic.ai/)\n\n" trial_list_markdown += "**Credits:** $1\n\n" trial_list_markdown += "**Models:**\n" for model in hyperbolic_models: trial_list_markdown += f"- {model['name']}\n" trial_list_markdown += "\n" # --- SambaNova Cloud (Trial - Table) --- if samba_models: trial_list_markdown += "### [SambaNova Cloud](https://cloud.sambanova.ai/)\n\n" trial_list_markdown += "**Credits:** $5 for 3 months\n\n" trial_list_markdown += "**Models:**\n" for model in samba_models: trial_list_markdown += f"- {model['name']}\n" trial_list_markdown += "\n" # --- Scaleway Generative APIs (Trial - Table) --- if scaleway_models: trial_list_markdown += "### [Scaleway Generative APIs](https://console.scaleway.com/generative-api/models)\n\n" trial_list_markdown += "**Credits:** 1,000,000 free tokens\n\n" trial_list_markdown += "**Models:**\n" for model in scaleway_models: trial_list_markdown += f"- {model['name']}\n" trial_list_markdown += "\n" if MISSING_MODELS: logger.warning("Missing models:") logger.warning( "\n" + "\n".join([f'"{model}": "{model}",' for model in MISSING_MODELS]) ) with open(os.path.join(script_dir, "README_template.md"), "r") as f: readme = f.read() warning = """ """ initial_templated = ( (warning + readme) .replace("{{MODEL_LIST}}", model_list_markdown) .replace("{{TRIAL_LIST_MARKDOWN}}", trial_list_markdown) ) toc_markdown = generate_toc(initial_templated) with open(os.path.join(script_dir, "..", "README.md"), "w") as f: f.write(initial_templated.replace("{{TOC}}", toc_markdown)) logger.info("Wrote models to README.md") if __name__ == "__main__": main() ================================================ FILE: src/requirements.txt ================================================ requests python-dotenv google-cloud-quotas mistralai beautifulsoup4