feat: request deduplication / debouncing

Kobo eReaders have a buggy browser that makes 2 requests for
the same HTTP resource when you click a link.

This change ensures that requests within a certain time frame
from the same IP, for the same path  / query params will only
be executed a single time.

We record the http request response and replay it for the second
request. If we get 2 simultaneous requests, we use the
sync/singleflight library to ensure only the first request is actually
processed. The second waits for the shared result of the first.

This probably adds latency since some requests are blocked while
we determine if we already have a cache entry, but for a simple
service like this I don't think it matters.
This commit is contained in:
Evan Buss
2024-08-18 18:37:42 +00:00
parent 5d45afd419
commit 0f0540549d
6 changed files with 248 additions and 2 deletions

71
internal/cache/cache.go vendored Normal file
View File

@@ -0,0 +1,71 @@
package cache
import (
"sync"
"time"
)
type CacheEntry[T any] struct {
timestamp time.Time
Value *T
}
type Cache[T any] struct {
entries map[string]*CacheEntry[T]
config CacheConfig
mutex sync.Mutex
}
type CacheConfig struct {
TTL time.Duration
CleanupInterval time.Duration
}
func NewCache[T any](config CacheConfig) *Cache[T] {
cache := &Cache[T]{
entries: make(map[string]*CacheEntry[T]),
config: config,
}
go cache.cleanupLoop()
return cache
}
func (c *Cache[T]) Set(key string, entry *T) {
c.mutex.Lock()
defer c.mutex.Unlock()
c.entries[key] = &CacheEntry[T]{timestamp: time.Now(), Value: entry}
}
func (c *Cache[T]) Get(key string) (*T, bool) {
c.mutex.Lock()
defer c.mutex.Unlock()
entry, exists := c.entries[key]
if !exists {
return nil, false
}
if exists && time.Since(entry.timestamp) > c.config.TTL {
delete(c.entries, key)
return nil, false
}
return entry.Value, exists
}
func (c *Cache[T]) cleanupLoop() {
ticker := time.NewTicker(c.config.CleanupInterval)
defer ticker.Stop()
for range ticker.C {
c.cleanEntries()
}
}
func (c *Cache[T]) cleanEntries() {
c.mutex.Lock()
defer c.mutex.Unlock()
for key, entry := range c.entries {
if time.Since(entry.timestamp) > c.config.TTL {
delete(c.entries, key)
}
}
}

View File

@@ -0,0 +1,53 @@
package debounce
import (
"crypto/md5"
"encoding/hex"
"net"
"net/http"
"net/http/httptest"
"strconv"
"time"
"github.com/evan-buss/opds-proxy/internal/cache"
"golang.org/x/sync/singleflight"
)
func NewDebounceMiddleware(debounce time.Duration) func(next http.HandlerFunc) http.HandlerFunc {
responseCache := cache.NewCache[httptest.ResponseRecorder](cache.CacheConfig{CleanupInterval: time.Second, TTL: debounce})
singleflight := singleflight.Group{}
return func(next http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
ip, _, _ := net.SplitHostPort(r.RemoteAddr)
hash := md5.Sum([]byte(ip + r.URL.Path + r.URL.RawQuery))
key := string(hex.EncodeToString(hash[:]))
if entry, exists := responseCache.Get(key); exists {
w.Header().Set("X-Debounce", "true")
writeResponse(entry, w)
return
}
rw, _, shared := singleflight.Do(key, func() (interface{}, error) {
rw := httptest.NewRecorder()
next(rw, r)
return rw, nil
})
recorder := rw.(*httptest.ResponseRecorder)
responseCache.Set(key, recorder)
w.Header().Set("X-Shared", strconv.FormatBool(shared))
writeResponse(recorder, w)
}
}
}
func writeResponse(rec *httptest.ResponseRecorder, w http.ResponseWriter) {
for k, v := range rec.Header() {
w.Header()[k] = v
}
w.WriteHeader(rec.Code)
w.Write(rec.Body.Bytes())
}

View File

@@ -0,0 +1,106 @@
package debounce
import (
"net/http"
"net/http/httptest"
"sync"
"testing"
"time"
)
func TestDebounceMiddleware(t *testing.T) {
setup := func() (http.Handler, *int) {
// Mock handler that simulates a slow response
handlerCallCount := 0
mockHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
handlerCallCount++
time.Sleep(100 * time.Millisecond) // Simulate some work
w.WriteHeader(http.StatusOK)
w.Write([]byte("OK"))
})
middleware := NewDebounceMiddleware(500 * time.Millisecond)
wrappedHandler := middleware(mockHandler)
return wrappedHandler, &handlerCallCount
}
t.Run("Caching Behavior", func(t *testing.T) {
wrappedHandler, handlerCallCount := setup()
// First request
req1 := httptest.NewRequest("GET", "/test", nil)
rec1 := httptest.NewRecorder()
wrappedHandler.ServeHTTP(rec1, req1)
if *handlerCallCount != 1 {
t.Errorf("Expected handler to be called once, got %d", handlerCallCount)
}
// Second request within debounce period
req2 := httptest.NewRequest("GET", "/test", nil)
rec2 := httptest.NewRecorder()
wrappedHandler.ServeHTTP(rec2, req2)
if *handlerCallCount != 1 {
t.Errorf("Expected handler to still be called once, got %d", handlerCallCount)
}
if rec2.Header().Get("X-Debounce") != "true" {
t.Error("Expected second response to be debounced")
}
// Wait for debounce period to expire
time.Sleep(600 * time.Millisecond)
// Third request after debounce period
req3 := httptest.NewRequest("GET", "/test", nil)
rec3 := httptest.NewRecorder()
wrappedHandler.ServeHTTP(rec3, req3)
if *handlerCallCount != 2 {
t.Errorf("Expected handler to be called twice, got %d", handlerCallCount)
}
})
t.Run("Singleflight Behavior", func(t *testing.T) {
wrappedHandler, handlerCallCount := setup()
var wg sync.WaitGroup
requestCount := 10
for i := 0; i < requestCount; i++ {
wg.Add(1)
go func() {
defer wg.Done()
req := httptest.NewRequest("GET", "/test", nil)
rec := httptest.NewRecorder()
wrappedHandler.ServeHTTP(rec, req)
}()
}
wg.Wait()
if *handlerCallCount != 1 {
t.Errorf("Expected handler to be called once for concurrent requests, got %d", handlerCallCount)
}
})
t.Run("Different Paths", func(t *testing.T) {
wrappedHandler, handlerCallCount := setup()
// Request to path A
reqA := httptest.NewRequest("GET", "/testA", nil)
recA := httptest.NewRecorder()
wrappedHandler.ServeHTTP(recA, reqA)
// Request to path B
reqB := httptest.NewRequest("GET", "/testB", nil)
recB := httptest.NewRecorder()
wrappedHandler.ServeHTTP(recB, reqB)
if *handlerCallCount != 2 {
t.Errorf("Expected handler to be called twice for different paths, got %d", handlerCallCount)
}
})
}