{"kind":"llmreference.router.index","schema_version":1,"resource":"routers","source":"data/seed/router.json","routes":{"api_index":"/api/routers","api_detail":"/api/router/{slug}","human_directory":"/routers","human_detail":"/router/{slug}"},"count":18,"active_count":17,"best_pages":[{"slug":"llm-gateways","href":"/best/llm-gateways","title":"Best LLM gateways","filter":{"types":["gateway"]},"count":4},{"slug":"llm-routers","href":"/best/llm-routers","title":"Best LLM routers","filter":{"types":["router"]},"count":10},{"slug":"openrouter-alternatives","href":"/best/openrouter-alternatives","title":"OpenRouter alternatives","filter":{"types":["gateway","hybrid"],"excludedSlugs":["openrouter"]},"count":6},{"slug":"self-hosted-router","href":"/best/self-hosted-router","title":"Self-hosted LLM routers","filter":{"hosting":"self_hosted"},"count":4},{"slug":"open-source-llm-router","href":"/best/open-source-llm-router","title":"Open-source LLM routers","filter":{"openness":"open_source"},"count":5},{"slug":"cheapest-llm-gateway","href":"/best/cheapest-llm-gateway","title":"Cheapest LLM gateway options","filter":{"objective":"cost"},"count":17},{"slug":"llm-cost-optimization","href":"/best/llm-cost-optimization","title":"LLM cost optimization routers","filter":{"objective":"cost"},"count":17}],"routers":[{"slug":"airouter","name":"AIRouter","vendor":"Heureka Labs UG","type":"router","status":"active","summary":"Commercial LLM router that analyzes incoming requests and routes to the optimal model for cost/quality/latency via a drop-in OpenAI-compatible API, with a privacy-preserving embedding mode that avoids sending prompt content.","editor_take":null,"best_for":["cost optimization","OpenAI drop-in replacement","privacy-aware routing"],"target_providers":["openai-api","anthropic-api","google-ai-studio","gcp-vertex-ai","mistral-ai-api"],"target_provider_hrefs":["/provider/openai-api","/provider/anthropic-api","/provider/google-ai-studio","/provider/gcp-vertex-ai","/provider/mistral-ai-api"],"models_count":null,"api_compatibility":["openai"],"pricing_model":"passthrough_plus_fee","data_retention":"unknown","self_host_available":false,"href":"/router/airouter","api_href":"/api/router/airouter","last_checked":"2026-06-08"},{"slug":"bedrock-intelligent-prompt-routing","name":"Amazon Bedrock Intelligent Prompt Routing","vendor":"Amazon Web Services","type":"router","status":"active","summary":"AWS Bedrock's native intelligent prompt router that routes prompts between Anthropic Claude model tiers (Haiku/Sonnet) based on predicted task complexity, with no extra per-routing charge.","editor_take":null,"best_for":["Bedrock users","Claude cost optimization","AWS ecosystem"],"target_providers":["aws-bedrock"],"target_provider_hrefs":["/provider/aws-bedrock"],"models_count":null,"api_compatibility":["native"],"pricing_model":"passthrough","data_retention":"retains","self_host_available":false,"href":"/router/bedrock-intelligent-prompt-routing","api_href":"/api/router/bedrock-intelligent-prompt-routing","last_checked":"2026-06-08"},{"slug":"azure-foundry-model-router","name":"Azure AI Foundry Model Router","vendor":"Microsoft","type":"router","status":"active","summary":"Microsoft Azure AI Foundry's native model router that uses a trained ML model to route each prompt in real time to the optimal Azure-hosted model, with Balanced/Cost/Quality mode selection and automatic failover.","editor_take":null,"best_for":["Azure AI Foundry users","cost optimization","Microsoft ecosystem","zero-configuration routing"],"target_providers":["microsoft-foundry","azure-openai"],"target_provider_hrefs":["/provider/microsoft-foundry","/provider/azure-openai"],"models_count":null,"api_compatibility":["openai"],"pricing_model":"passthrough","data_retention":"retains","self_host_available":false,"href":"/router/azure-foundry-model-router","api_href":"/api/router/azure-foundry-model-router","last_checked":"2026-06-08"},{"slug":"helicone","name":"Helicone","vendor":"Helicone","type":"gateway","status":"active","summary":"Observability-first AI gateway with routing, caching, rate limiting, and request tracing; Apache 2.0 open-source core with a managed hosted tier for logging and analytics.","editor_take":null,"best_for":["observability","LLM tracing","cost tracking","self-hosting"],"target_providers":["openai-api","anthropic-api","microsoft-foundry","azure-openai","google-ai-studio","gcp-vertex-ai"],"target_provider_hrefs":["/provider/openai-api","/provider/anthropic-api","/provider/microsoft-foundry","/provider/azure-openai","/provider/google-ai-studio","/provider/gcp-vertex-ai"],"models_count":null,"api_compatibility":["openai","anthropic"],"pricing_model":"subscription","data_retention":"opt_in_logging","self_host_available":true,"href":"/router/helicone","api_href":"/api/router/helicone","last_checked":"2026-06-08"},{"slug":"kong-ai-gateway","name":"Kong AI Gateway","vendor":"Kong Inc.","type":"gateway","status":"active","summary":"Multi-LLM AI gateway built on Kong Gateway 3.x, adding semantic routing, load balancing, guardrails, and MCP traffic analytics as plugins over Kong's existing API management platform.","editor_take":null,"best_for":["enterprise API management","multi-LLM routing","Kong users","self-hosting"],"target_providers":["openai-api","anthropic-api","google-ai-studio","gcp-vertex-ai","microsoft-foundry","azure-openai","cohere-api"],"target_provider_hrefs":["/provider/openai-api","/provider/anthropic-api","/provider/google-ai-studio","/provider/gcp-vertex-ai","/provider/microsoft-foundry","/provider/azure-openai","/provider/cohere-api"],"models_count":null,"api_compatibility":["openai","native"],"pricing_model":"subscription","data_retention":"unknown","self_host_available":true,"href":"/router/kong-ai-gateway","api_href":"/api/router/kong-ai-gateway","last_checked":"2026-06-08"},{"slug":"litellm","name":"LiteLLM","vendor":"BerriAI","type":"gateway","status":"active","summary":"Open-source Python SDK and proxy server that unifies 100+ LLM APIs behind a single OpenAI-compatible interface, with load balancing, cost tracking, and configurable failover.","editor_take":null,"best_for":["open source","self-hosting","OpenAI-compatible proxy","developer tooling","cost tracking"],"target_providers":["openai-api","anthropic-api","google-ai-studio","gcp-vertex-ai","microsoft-foundry","azure-openai","cohere-api","mistral-ai-api","deepseek-api"],"target_provider_hrefs":["/provider/openai-api","/provider/anthropic-api","/provider/google-ai-studio","/provider/gcp-vertex-ai","/provider/microsoft-foundry","/provider/azure-openai","/provider/cohere-api","/provider/mistral-ai-api","/provider/deepseek-api"],"models_count":100,"api_compatibility":["openai"],"pricing_model":"free_oss","data_retention":"zero_retention","self_host_available":true,"href":"/router/litellm","api_href":"/api/router/litellm","last_checked":"2026-06-08"},{"slug":"martian","name":"Martian","vendor":"Martian, Inc.","type":"router","status":"active","summary":"AI-powered LLM router that analyzes each prompt in real-time to select the optimal model, targeting 20–97% cost reduction while maintaining quality; San Francisco startup reportedly nearing $1.3B valuation.","editor_take":null,"best_for":["cost reduction","quality optimization","enterprise routing"],"target_providers":["openai-api","anthropic-api","google-ai-studio","gcp-vertex-ai","mistral-ai-api"],"target_provider_hrefs":["/provider/openai-api","/provider/anthropic-api","/provider/google-ai-studio","/provider/gcp-vertex-ai","/provider/mistral-ai-api"],"models_count":null,"api_compatibility":["openai","native"],"pricing_model":"passthrough_plus_fee","data_retention":"unknown","self_host_available":false,"href":"/router/martian","api_href":"/api/router/martian","last_checked":"2026-06-08"},{"slug":"neutrino","name":"Neutrino AI","vendor":"Neutrino AI","type":"router","status":"active","summary":"Commercial LLM router that dynamically routes each query to the best-suited model with load balancing and fallback handling, charging 3% of underlying AI spend.","editor_take":null,"best_for":["cost reduction","dynamic model selection","load balancing","fallback handling"],"target_providers":["openai-api","anthropic-api","google-ai-studio","gcp-vertex-ai","mistral-ai-api"],"target_provider_hrefs":["/provider/openai-api","/provider/anthropic-api","/provider/google-ai-studio","/provider/gcp-vertex-ai","/provider/mistral-ai-api"],"models_count":null,"api_compatibility":["native"],"pricing_model":"passthrough_plus_fee","data_retention":"opt_in_logging","self_host_available":false,"href":"/router/neutrino","api_href":"/api/router/neutrino","last_checked":"2026-06-08"},{"slug":"not-diamond","name":"Not Diamond","vendor":"Not Diamond","type":"router","status":"active","summary":"Predictive model router that determines the best LLM for each query; claims up to 25% accuracy gains and 10x cost reduction; powers OpenRouter's auto mode and is positioned specifically for coding agents.","editor_take":null,"best_for":["coding agents","accuracy optimization","cost reduction"],"target_providers":["openai-api","anthropic-api","google-ai-studio","gcp-vertex-ai","mistral-ai-api"],"target_provider_hrefs":["/provider/openai-api","/provider/anthropic-api","/provider/google-ai-studio","/provider/gcp-vertex-ai","/provider/mistral-ai-api"],"models_count":null,"api_compatibility":["native"],"pricing_model":"enterprise_quote","data_retention":"opt_in_logging","self_host_available":false,"href":"/router/not-diamond","api_href":"/api/router/not-diamond","last_checked":"2026-06-08"},{"slug":"nvidia-llm-router","name":"NVIDIA LLM Router Blueprint","vendor":"NVIDIA","type":"router","status":"deprecated","summary":"NVIDIA's open-source AI blueprint for LLM routing that selects the optimal model per prompt via intent classification or neural auto-routing; being deprecated 2026-06-20.","editor_take":null,"best_for":["open source","reference implementation","NVIDIA ecosystem","self-hosting"],"target_providers":["openai-api","anthropic-api","nvidia-nim"],"target_provider_hrefs":["/provider/openai-api","/provider/anthropic-api","/provider/nvidia-nim"],"models_count":null,"api_compatibility":["openai","native"],"pricing_model":"free_oss","data_retention":"zero_retention","self_host_available":true,"href":"/router/nvidia-llm-router","api_href":"/api/router/nvidia-llm-router","last_checked":"2026-06-08"},{"slug":"openai-auto","name":"OpenAI Auto Routing (GPT-5 Auto)","vendor":"OpenAI","type":"router","status":"active","summary":"OpenAI's native auto-routing mode (GPT-5 Auto) that dynamically routes each API request between GPT-5 and GPT-5 Instant based on prompt complexity, with no extra charge beyond model token costs.","editor_take":null,"best_for":["OpenAI users","zero-configuration routing","automatic cost optimization"],"target_providers":["openai-api"],"target_provider_hrefs":["/provider/openai-api"],"models_count":null,"api_compatibility":["openai","native"],"pricing_model":"passthrough","data_retention":"retains","self_host_available":false,"href":"/router/openai-auto","api_href":"/api/router/openai-auto","last_checked":"2026-06-08"},{"slug":"openrouter","name":"OpenRouter","vendor":"OpenRouter, Inc.","type":"hybrid","status":"active","summary":"Unified hybrid gateway to 400+ models from 60+ providers via a single OpenAI-compatible API, with optional auto-routing that selects the best model per prompt.","editor_take":null,"best_for":["multi-provider access","cost optimization","auto model selection","provider fallback"],"target_providers":["openai-api","anthropic-api","google-ai-studio","gcp-vertex-ai","mistral-ai-api","xai-console","deepseek-api","cohere-api","together-ai","fireworks-ai"],"target_provider_hrefs":["/provider/openai-api","/provider/anthropic-api","/provider/google-ai-studio","/provider/gcp-vertex-ai","/provider/mistral-ai-api","/provider/xai-console","/provider/deepseek-api","/provider/cohere-api","/provider/together-ai","/provider/fireworks-ai"],"models_count":400,"api_compatibility":["openai"],"pricing_model":"passthrough","data_retention":"opt_in_logging","self_host_available":false,"href":"/router/openrouter","api_href":"/api/router/openrouter","last_checked":"2026-06-08"},{"slug":"portkey","name":"Portkey","vendor":"Portkey AI","type":"gateway","status":"active","summary":"Production AI gateway routing to 1,600+ LLMs with failover, load balancing, semantic caching, and guardrails; Apache 2.0 core is fully self-hostable with the complete feature set.","editor_take":null,"best_for":["production reliability","governance","observability","self-hosting","enterprise"],"target_providers":["openai-api","anthropic-api","google-ai-studio","gcp-vertex-ai","microsoft-foundry","azure-openai","cohere-api","mistral-ai-api"],"target_provider_hrefs":["/provider/openai-api","/provider/anthropic-api","/provider/google-ai-studio","/provider/gcp-vertex-ai","/provider/microsoft-foundry","/provider/azure-openai","/provider/cohere-api","/provider/mistral-ai-api"],"models_count":1600,"api_compatibility":["openai"],"pricing_model":"subscription","data_retention":"opt_in_logging","self_host_available":true,"href":"/router/portkey","api_href":"/api/router/portkey","last_checked":"2026-06-08"},{"slug":"requesty","name":"Requesty","vendor":"Requesty","type":"hybrid","status":"active","summary":"AI gateway to 400+ LLM providers with intelligent routing, caching, guardrails, and governance; flat 5% markup on model costs with no subscription fee.","editor_take":null,"best_for":["cost optimization","observability","governance","EU data residency"],"target_providers":["openai-api","anthropic-api","google-ai-studio","gcp-vertex-ai","mistral-ai-api","deepseek-api"],"target_provider_hrefs":["/provider/openai-api","/provider/anthropic-api","/provider/google-ai-studio","/provider/gcp-vertex-ai","/provider/mistral-ai-api","/provider/deepseek-api"],"models_count":400,"api_compatibility":["openai"],"pricing_model":"passthrough_plus_fee","data_retention":"opt_in_logging","self_host_available":false,"href":"/router/requesty","api_href":"/api/router/requesty","last_checked":"2026-06-08"},{"slug":"respan","name":"Respan","vendor":"Respan (formerly Keywords AI)","type":"hybrid","status":"active","summary":"Unified LLM engineering platform (gateway + observability + evals + prompt management) routing across 250+ models; previously Keywords AI, rebranded February 2026.","editor_take":null,"best_for":["observability","evals","prompt management","multi-model gateway","agent tracing"],"target_providers":["openai-api","anthropic-api","google-ai-studio","gcp-vertex-ai","mistral-ai-api","deepseek-api"],"target_provider_hrefs":["/provider/openai-api","/provider/anthropic-api","/provider/google-ai-studio","/provider/gcp-vertex-ai","/provider/mistral-ai-api","/provider/deepseek-api"],"models_count":250,"api_compatibility":["openai"],"pricing_model":"subscription","data_retention":"opt_in_logging","self_host_available":false,"href":"/router/respan","api_href":"/api/router/respan","last_checked":"2026-06-08"},{"slug":"routellm","name":"RouteLLM","vendor":"LMSYS (lm-sys)","type":"router","status":"active","summary":"Open-source LLM routing framework from LMSYS that routes simpler queries to a cheaper weak model and harder ones to a stronger frontier model, achieving 35–85% cost reduction on benchmarks.","editor_take":null,"best_for":["open source","binary cost optimization","research","self-hosting"],"target_providers":["openai-api","anthropic-api","google-ai-studio","gcp-vertex-ai"],"target_provider_hrefs":["/provider/openai-api","/provider/anthropic-api","/provider/google-ai-studio","/provider/gcp-vertex-ai"],"models_count":null,"api_compatibility":["openai"],"pricing_model":"free_oss","data_retention":"zero_retention","self_host_available":true,"href":"/router/routellm","api_href":"/api/router/routellm","last_checked":"2026-06-08"},{"slug":"unify","name":"Unify","vendor":"Unify AI","type":"router","status":"active","summary":"Benchmark-driven LLM router using a neural scorer and live runtime benchmarks refreshed every 10 minutes to route each request to the optimal endpoint across 100+ providers.","editor_take":null,"best_for":["benchmark-driven routing","latency optimization","cost vs quality tradeoffs"],"target_providers":["openai-api","anthropic-api","google-ai-studio","gcp-vertex-ai","mistral-ai-api"],"target_provider_hrefs":["/provider/openai-api","/provider/anthropic-api","/provider/google-ai-studio","/provider/gcp-vertex-ai","/provider/mistral-ai-api"],"models_count":null,"api_compatibility":["openai"],"pricing_model":"subscription","data_retention":"unknown","self_host_available":false,"href":"/router/unify","api_href":"/api/router/unify","last_checked":"2026-06-08"},{"slug":"vllm-semantic-router","name":"vLLM Semantic Router","vendor":"Red Hat / vLLM Project","type":"router","status":"active","summary":"Open-source Mixture-of-Models router that semantically classifies each request and routes it to the best backend (local, private, or frontier) by cost, latency, privacy, or safety, deployed as an Envoy External Processor.","editor_take":null,"best_for":["open source","Kubernetes/OpenShift","privacy-aware routing","on-prem","edge/hybrid cloud"],"target_providers":[],"target_provider_hrefs":[],"models_count":null,"api_compatibility":["openai"],"pricing_model":"free_oss","data_retention":"zero_retention","self_host_available":true,"href":"/router/vllm-semantic-router","api_href":"/api/router/vllm-semantic-router","last_checked":"2026-06-08"}]}