🔥 Hardest Problem
Cache invalidation is famously difficult. Choose strategy based on consistency needs.
“There are only two hard things in Computer Science: cache invalidation and naming things.”
— Phil Karlton
Cache invalidation is deciding when and how to remove or update cached data when the underlying data changes.
Why is it hard?
Stale data = cached data that doesn’t match the database anymore.
Impact:
Update cache when data is written. Cache and database stay in sync.
How it works:
When to use:
Trade-offs:
class WriteThroughCache: def __init__(self, cache: CacheClient, db: Database): self.cache = cache self.db = db
def update_user(self, user_id: int, data: dict): # Write to cache cache_key = f"user:{user_id}" self.cache.set(cache_key, data, ttl=3600)
# Write to database self.db.update_user(user_id, data)
# Both complete - return success return Trueclass WriteThroughCache { private final CacheClient cache; private final Database db;
public WriteThroughCache(CacheClient cache, Database db) { this.cache = cache; this.db = db; }
public boolean updateUser(int userId, UserData data) { // Write to cache String cacheKey = "user:" + userId; cache.set(cacheKey, serialize(data), 3600);
// Write to database db.updateUser(userId, data);
// Both complete - return success return true; }}Delete cache on write. Next read fetches fresh data.
How it works:
When to use:
Trade-offs:
class WriteInvalidateCache: def __init__(self, cache: CacheClient, db: Database): self.cache = cache self.db = db
def update_user(self, user_id: int, data: dict): # Update database self.db.update_user(user_id, data)
# Invalidate cache cache_key = f"user:{user_id}" self.cache.delete(cache_key)
return True
def get_user(self, user_id: int): # Cache-aside pattern cache_key = f"user:{user_id}" cached = self.cache.get(cache_key)
if cached: return cached
# Cache miss - fetch from DB user = self.db.get_user(user_id)
# Cache it if user: self.cache.set(cache_key, user, ttl=3600)
return userclass WriteInvalidateCache { private final CacheClient cache; private final Database db;
public WriteInvalidateCache(CacheClient cache, Database db) { this.cache = cache; this.db = db; }
public boolean updateUser(int userId, UserData data) { // Update database db.updateUser(userId, data);
// Invalidate cache String cacheKey = "user:" + userId; cache.delete(cacheKey);
return true; }
public Optional<User> getUser(int userId) { // Cache-aside pattern String cacheKey = "user:" + userId; Optional<String> cached = cache.get(cacheKey);
if (cached.isPresent()) { return Optional.of(deserialize(cached.get())); }
// Cache miss - fetch from DB Optional<User> user = db.getUser(userId);
// Cache it user.ifPresent(u -> cache.set(cacheKey, serialize(u), 3600));
return user; }}Time-based expiration. Items expire after fixed time.
How it works:
When to use:
Trade-offs:
Invalidate on data change events. Most sophisticated approach.
How it works:
When to use:
Trade-offs:
Cache stampede (thundering herd) happens when cache expires and many requests simultaneously try to fetch from database.
Impact:
Only one request fetches; others wait.
How it works:
import redisimport timeimport uuid
class CacheWithLock: def __init__(self, redis_client: redis.Redis): self.redis = redis_client self.lock_ttl = 10 # seconds
def get_with_lock(self, key: str, fetch_func): # Try cache first cached = self.redis.get(key) if cached: return cached
# Cache miss - try to acquire lock lock_key = f"lock:{key}" lock_value = str(uuid.uuid4())
# Try to acquire lock (set if not exists) acquired = self.redis.set( lock_key, lock_value, nx=True, ex=self.lock_ttl )
if acquired: # We got the lock - fetch from DB try: data = fetch_func() self.redis.set(key, data, ex=300) return data finally: # Release lock (only if we still own it) if self.redis.get(lock_key) == lock_value: self.redis.delete(lock_key) else: # Someone else has lock - wait and retry time.sleep(0.1) # Retry cache (winner should have cached it) return self.redis.get(key) or fetch_func()import redis.clients.jedis.Jedis;import java.util.UUID;
class CacheWithLock { private final Jedis redis; private final int lockTtl = 10;
public CacheWithLock(Jedis redis) { this.redis = redis; }
public String getWithLock(String key, Supplier<String> fetchFunc) { // Try cache first String cached = redis.get(key); if (cached != null) { return cached; }
// Cache miss - try to acquire lock String lockKey = "lock:" + key; String lockValue = UUID.randomUUID().toString();
// Try to acquire lock (set if not exists) boolean acquired = "OK".equals( redis.set(lockKey, lockValue, "NX", "EX", lockTtl) );
if (acquired) { // We got the lock - fetch from DB try { String data = fetchFunc.get(); redis.setex(key, 300, data); return data; } finally { // Release lock (only if we still own it) if (lockValue.equals(redis.get(lockKey))) { redis.del(lockKey); } } } else { // Someone else has lock - wait and retry try { Thread.sleep(100); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } // Retry cache (winner should have cached it) String retry = redis.get(key); return retry != null ? retry : fetchFunc.get(); } }}Refresh cache randomly before expiration. Spreads load over time.
How it works:
import randomimport time
class ProbabilisticRefreshCache: def __init__(self, cache: CacheClient, db: Database): self.cache = cache self.db = db
def get_with_refresh(self, key: str, ttl: int = 300): cached_data = self.cache.get(key)
if cached_data: # Check if we should refresh (last 10% of TTL) # This is simplified - in practice, store timestamp if random.random() < 0.1: # 10% chance # Refresh in background self._refresh_async(key, ttl)
return cached_data
# Cache miss - fetch and cache data = self.db.fetch(key) self.cache.set(key, data, ttl=ttl) return data
def _refresh_async(self, key: str, ttl: int): # Background refresh (simplified) data = self.db.fetch(key) self.cache.set(key, data, ttl=ttl)import java.util.Random;
class ProbabilisticRefreshCache { private final CacheClient cache; private final Database db; private final Random random = new Random();
public ProbabilisticRefreshCache(CacheClient cache, Database db) { this.cache = cache; this.db = db; }
public String getWithRefresh(String key, int ttl) { String cached = cache.get(key);
if (cached != null) { // Check if we should refresh (last 10% of TTL) if (random.nextDouble() < 0.1) { // 10% chance // Refresh in background refreshAsync(key, ttl); }
return cached; }
// Cache miss - fetch and cache String data = db.fetch(key); cache.set(key, data, ttl); return data; }
private void refreshAsync(String key, int ttl) { // Background refresh (simplified) String data = db.fetch(key); cache.set(key, data, ttl); }}| Strategy | Consistency | Complexity | Latency | Use Case |
|---|---|---|---|---|
| Write-Through | Strong | Low | High (write) | Critical data |
| Write-Invalidate | Eventual | Low | Low (write) | General purpose |
| TTL | Eventual | Low | Low | Read-heavy |
| Event-Driven | Strong | High | Low | Complex systems |
🔥 Hardest Problem
Cache invalidation is famously difficult. Choose strategy based on consistency needs.
⚡ Write-Invalidate
Most common: delete cache on write. Next read fetches fresh. Simple and effective.
🐘 Prevent Stampede
Use distributed locks or probabilistic refresh to prevent cache stampede.
🔄 Event-Driven
For complex systems, use event-driven invalidation for precise control.