🪣 Token Bucket: Most Popular
Token bucket allows bursts, smooths to average rate. Most widely used algorithm.
Without rate limiting:
With rate limiting:
Bucket holds tokens. Tokens refill at fixed rate. Request consumes token.
Characteristics:
Algorithm:
capacity tokensrefill_rate per secondimport timefrom threading import Lockfrom typing import Optional
class TokenBucket: """Token bucket rate limiter"""
def __init__(self, capacity: int, refill_rate: float): """ Args: capacity: Maximum tokens in bucket refill_rate: Tokens added per second """ self.capacity = capacity self.refill_rate = refill_rate self.tokens = capacity self.last_refill = time.time() self.lock = Lock()
def acquire(self, tokens: int = 1) -> bool: """Try to acquire tokens. Returns True if successful.""" with self.lock: self._refill()
if self.tokens >= tokens: self.tokens -= tokens return True else: return False
def _refill(self): """Refill tokens based on elapsed time""" now = time.time() elapsed = now - self.last_refill
# Calculate tokens to add tokens_to_add = elapsed * self.refill_rate
# Add tokens (don't exceed capacity) self.tokens = min(self.capacity, self.tokens + tokens_to_add) self.last_refill = now
def get_available_tokens(self) -> int: """Get current number of available tokens""" with self.lock: self._refill() return int(self.tokens)
# Usagerate_limiter = TokenBucket(capacity=10, refill_rate=2.0) # 10 tokens, refill 2/sec
def handle_request(): if rate_limiter.acquire(): # Process request return "Request processed" else: return "Rate limit exceeded", 429import java.util.concurrent.locks.ReentrantLock;
public class TokenBucket { private final int capacity; private final double refillRate; private double tokens; private long lastRefill; private final ReentrantLock lock = new ReentrantLock();
public TokenBucket(int capacity, double refillRate) { this.capacity = capacity; this.refillRate = refillRate; this.tokens = capacity; this.lastRefill = System.currentTimeMillis(); }
public boolean acquire(int tokensToConsume) { lock.lock(); try { refill();
if (tokens >= tokensToConsume) { tokens -= tokensToConsume; return true; } else { return false; } } finally { lock.unlock(); } }
private void refill() { long now = System.currentTimeMillis(); double elapsed = (now - lastRefill) / 1000.0; // Convert to seconds
// Calculate tokens to add double tokensToAdd = elapsed * refillRate;
// Add tokens (don't exceed capacity) tokens = Math.min(capacity, tokens + tokensToAdd); lastRefill = now; }
public int getAvailableTokens() { lock.lock(); try { refill(); return (int) tokens; } finally { lock.unlock(); } }}
// UsageTokenBucket rateLimiter = new TokenBucket(10, 2.0); // 10 tokens, refill 2/sec
public String handleRequest() { if (rateLimiter.acquire(1)) { // Process request return "Request processed"; } else { return "Rate limit exceeded"; // Return 429 }}Bucket holds requests. Requests leak out at fixed rate. If full, reject.
Characteristics:
Algorithm:
capacity (queue size)leak_rate per secondimport timeimport queuefrom threading import Lock, Threadfrom typing import Optional
class LeakyBucket: """Leaky bucket rate limiter"""
def __init__(self, capacity: int, leak_rate: float): """ Args: capacity: Maximum requests in bucket leak_rate: Requests processed per second """ self.capacity = capacity self.leak_rate = leak_rate self.bucket = queue.Queue(maxsize=capacity) self.lock = Lock() self.processing = False
def start_processing(self): """Start processing requests""" if not self.processing: self.processing = True Thread(target=self._process_requests, daemon=True).start()
def add_request(self, request) -> bool: """Try to add request to bucket. Returns True if successful.""" try: self.bucket.put_nowait(request) return True except queue.Full: return False
def _process_requests(self): """Process requests at leak rate""" interval = 1.0 / self.leak_rate # Time between requests
while self.processing: try: request = self.bucket.get(timeout=interval) # Process request self._handle_request(request) except queue.Empty: continue
def _handle_request(self, request): """Handle processed request""" # Override in subclass or pass handler passimport java.util.concurrent.BlockingQueue;import java.util.concurrent.LinkedBlockingQueue;import java.util.concurrent.Executors;import java.util.concurrent.ScheduledExecutorService;import java.util.concurrent.TimeUnit;
public class LeakyBucket { private final int capacity; private final double leakRate; private final BlockingQueue<Request> bucket; private final ScheduledExecutorService scheduler;
public LeakyBucket(int capacity, double leakRate) { this.capacity = capacity; this.leakRate = leakRate; this.bucket = new LinkedBlockingQueue<>(capacity); this.scheduler = Executors.newScheduledThreadPool(1);
// Start processing startProcessing(); }
public boolean addRequest(Request request) { // Try to add request to bucket return bucket.offer(request); }
private void startProcessing() { long interval = (long) (1000 / leakRate); // Milliseconds between requests
scheduler.scheduleAtFixedRate(() -> { Request request = bucket.poll(); if (request != null) { handleRequest(request); } }, 0, interval, TimeUnit.MILLISECONDS); }
private void handleRequest(Request request) { // Process request }}Tracks requests in sliding time window. More accurate than fixed window.
Characteristics:
import timefrom collections import dequefrom threading import Lock
class SlidingWindowRateLimiter: """Sliding window rate limiter"""
def __init__(self, limit: int, window_seconds: int): """ Args: limit: Maximum requests allowed window_seconds: Time window in seconds """ self.limit = limit self.window_seconds = window_seconds self.requests = deque() # Store request timestamps self.lock = Lock()
def is_allowed(self) -> bool: """Check if request is allowed""" with self.lock: now = time.time()
# Remove old requests outside window while self.requests and self.requests[0] < now - self.window_seconds: self.requests.popleft()
# Check if under limit if len(self.requests) < self.limit: self.requests.append(now) return True else: return False
def get_remaining_requests(self) -> int: """Get remaining requests in current window""" with self.lock: now = time.time()
# Remove old requests while self.requests and self.requests[0] < now - self.window_seconds: self.requests.popleft()
return max(0, self.limit - len(self.requests))import java.util.concurrent.ConcurrentLinkedDeque;import java.util.concurrent.locks.ReentrantLock;
public class SlidingWindowRateLimiter { private final int limit; private final long windowMillis; private final ConcurrentLinkedDeque<Long> requests; private final ReentrantLock lock = new ReentrantLock();
public SlidingWindowRateLimiter(int limit, int windowSeconds) { this.limit = limit; this.windowMillis = windowSeconds * 1000L; this.requests = new ConcurrentLinkedDeque<>(); }
public boolean isAllowed() { lock.lock(); try { long now = System.currentTimeMillis();
// Remove old requests outside window while (!requests.isEmpty() && requests.peekFirst() < now - windowMillis) { requests.pollFirst(); }
// Check if under limit if (requests.size() < limit) { requests.addLast(now); return true; } else { return false; } } finally { lock.unlock(); } }
public int getRemainingRequests() { lock.lock(); try { long now = System.currentTimeMillis();
// Remove old requests while (!requests.isEmpty() && requests.peekFirst() < now - windowMillis) { requests.pollFirst(); }
return Math.max(0, limit - requests.size()); } finally { lock.unlock(); } }}Divides time into fixed windows. Simple but allows bursts.
Characteristics:
import timefrom threading import Lock
class FixedWindowRateLimiter: """Fixed window rate limiter"""
def __init__(self, limit: int, window_seconds: int): """ Args: limit: Maximum requests per window window_seconds: Window size in seconds """ self.limit = limit self.window_seconds = window_seconds self.count = 0 self.window_start = time.time() self.lock = Lock()
def is_allowed(self) -> bool: """Check if request is allowed""" with self.lock: now = time.time()
# Check if window expired if now - self.window_start >= self.window_seconds: # Reset window self.count = 0 self.window_start = now
# Check limit if self.count < self.limit: self.count += 1 return True else: return Falseimport java.util.concurrent.atomic.AtomicInteger;import java.util.concurrent.atomic.AtomicLong;import java.util.concurrent.locks.ReentrantLock;
public class FixedWindowRateLimiter { private final int limit; private final long windowMillis; private final AtomicInteger count = new AtomicInteger(0); private final AtomicLong windowStart = new AtomicLong(System.currentTimeMillis()); private final ReentrantLock lock = new ReentrantLock();
public FixedWindowRateLimiter(int limit, int windowSeconds) { this.limit = limit; this.windowMillis = windowSeconds * 1000L; }
public boolean isAllowed() { lock.lock(); try { long now = System.currentTimeMillis();
// Check if window expired if (now - windowStart.get() >= windowMillis) { // Reset window count.set(0); windowStart.set(now); }
// Check limit if (count.get() < limit) { count.incrementAndGet(); return true; } else { return false; } } finally { lock.unlock(); } }}For multiple servers, use Redis:
import redisimport time
class DistributedTokenBucket: """Distributed token bucket using Redis"""
def __init__(self, redis_client: redis.Redis, key_prefix: str, capacity: int, refill_rate: float): self.redis = redis_client self.key_prefix = key_prefix self.capacity = capacity self.refill_rate = refill_rate
def is_allowed(self, identifier: str) -> bool: """Check if request is allowed for identifier""" key = f"{self.key_prefix}:{identifier}" now = time.time()
# Use Lua script for atomic operations lua_script = """ local key = KEYS[1] local capacity = tonumber(ARGV[1]) local refill_rate = tonumber(ARGV[2]) local now = tonumber(ARGV[3])
local bucket = redis.call('HMGET', key, 'tokens', 'last_refill') local tokens = tonumber(bucket[1]) or capacity local last_refill = tonumber(bucket[2]) or now
-- Refill tokens local elapsed = now - last_refill local tokens_to_add = elapsed * refill_rate tokens = math.min(capacity, tokens + tokens_to_add)
-- Check if can consume token if tokens >= 1 then tokens = tokens - 1 redis.call('HMSET', key, 'tokens', tokens, 'last_refill', now) redis.call('EXPIRE', key, 3600) -- Expire after 1 hour return 1 else redis.call('HMSET', key, 'tokens', tokens, 'last_refill', now) redis.call('EXPIRE', key, 3600) return 0 end """
result = self.redis.eval(lua_script, 1, key, self.capacity, self.refill_rate, now) return bool(result)import redis.clients.jedis.Jedis;import redis.clients.jedis.JedisPool;
public class DistributedTokenBucket { private final JedisPool jedisPool; private final String keyPrefix; private final int capacity; private final double refillRate;
private static final String LUA_SCRIPT = "local key = KEYS[1]\n" + "local capacity = tonumber(ARGV[1])\n" + "local refill_rate = tonumber(ARGV[2])\n" + "local now = tonumber(ARGV[3])\n" + "local bucket = redis.call('HMGET', key, 'tokens', 'last_refill')\n" + "local tokens = tonumber(bucket[1]) or capacity\n" + "local last_refill = tonumber(bucket[2]) or now\n" + "local elapsed = now - last_refill\n" + "local tokens_to_add = elapsed * refill_rate\n" + "tokens = math.min(capacity, tokens + tokens_to_add)\n" + "if tokens >= 1 then\n" + " tokens = tokens - 1\n" + " redis.call('HMSET', key, 'tokens', tokens, 'last_refill', now)\n" + " redis.call('EXPIRE', key, 3600)\n" + " return 1\n" + "else\n" + " redis.call('HMSET', key, 'tokens', tokens, 'last_refill', now)\n" + " redis.call('EXPIRE', key, 3600)\n" + " return 0\n" + "end";
public boolean isAllowed(String identifier) { try (Jedis jedis = jedisPool.getResource()) { String key = keyPrefix + ":" + identifier; long now = System.currentTimeMillis() / 1000;
Object result = jedis.eval(LUA_SCRIPT, Collections.singletonList(key), Arrays.asList( String.valueOf(capacity), String.valueOf(refillRate), String.valueOf(now) ));
return ((Long) result) == 1L; } }}rate_limiter = TokenBucket(capacity=100, refill_rate=10)client_ip = request.remote_addr
if not rate_limiter.is_allowed(client_ip): return "Rate limit exceeded", 429user_id = get_user_id(request)if not rate_limiter.is_allowed(f"user:{user_id}"): return "Rate limit exceeded", 429api_key = request.headers.get('X-API-Key')if not rate_limiter.is_allowed(f"key:{api_key}"): return "Rate limit exceeded", 429# Different limits for different tierslimits = { 'free': TokenBucket(100, 10), 'premium': TokenBucket(1000, 100), 'enterprise': TokenBucket(10000, 1000),}
tier = get_user_tier(user_id)if not limits[tier].is_allowed(user_id): return "Rate limit exceeded", 429Inform clients about rate limits:
HTTP/1.1 200 OKX-RateLimit-Limit: 100X-RateLimit-Remaining: 95X-RateLimit-Reset: 1640995200When limit exceeded:
HTTP/1.1 429 Too Many RequestsX-RateLimit-Limit: 100X-RateLimit-Remaining: 0X-RateLimit-Reset: 1640995200Retry-After: 60| Algorithm | Bursts | Accuracy | Memory | Complexity |
|---|---|---|---|---|
| Token Bucket | ✅ Yes | High | Low | Medium |
| Leaky Bucket | ❌ No | High | Medium | Medium |
| Sliding Window | ❌ No | Very High | High | High |
| Fixed Window | ✅ Yes | Medium | Low | Low |
Recommendation: Use Token Bucket for most cases. It’s simple, accurate, and allows bursts.
🪣 Token Bucket: Most Popular
Token bucket allows bursts, smooths to average rate. Most widely used algorithm.
📊 Sliding Window: Most Accurate
Sliding window is most accurate but uses more memory. Use when accuracy is critical.
🌐 Distributed: Use Redis
For multiple servers, use Redis with Lua scripts for atomic operations.
🔢 Return 429
When rate limit exceeded, return HTTP 429 with Retry-After header. Inform clients about limits.