From f6e7fb524e3e157f04a5fe90066e55bf1dc692ec Mon Sep 17 00:00:00 2001
From: sudoforge <no-reply@sudoforge.com>
Date: Thu, 8 May 2025 01:08:48 -0700
Subject: [PATCH] test: add an internal lib for running flaky tests (#1398)

This change adds an internal utility library for running flaky tests
with built-in support for incremental backoff retries. This can be used
by packages within this repository by importing `internal/test` and
invoking it as such:

    func SomeTest(t *testing.T) {
        f := test.NewFlaky(t, &test.FlakyOptions{
            // define options here
            ...
        })

        f.Run(func(t testing.TB) {
            // original test logic here
            ...
        }
    }

Change-Id: I8c6138c39c381bcee408ea6b7fe9d9b6eeb48fed
---
 internal/test/recorder.go  | 28 ++++++++++++++++
 internal/test/test.go      | 69 ++++++++++++++++++++++++++++++++++++++
 internal/test/test_test.go | 42 +++++++++++++++++++++++
 3 files changed, 139 insertions(+)
 create mode 100644 internal/test/recorder.go
 create mode 100644 internal/test/test.go
 create mode 100644 internal/test/test_test.go

diff --git a/internal/test/recorder.go b/internal/test/recorder.go
new file mode 100644
index 00000000..35ddb0d0
--- /dev/null
+++ b/internal/test/recorder.go
@@ -0,0 +1,28 @@
+package test
+
+import (
+	"fmt"
+	"testing"
+)
+
+type recorder struct {
+	testing.TB
+	fail  func(string)
+	fatal func(string)
+}
+
+func (r *recorder) Errorf(format string, args ...any) {
+	r.fail(fmt.Sprintf(format, args...))
+}
+
+func (r *recorder) Fatalf(format string, args ...any) {
+	r.fatal(fmt.Sprintf(format, args...))
+}
+
+func (r *recorder) Fatal(args ...any) {
+	r.fatal(fmt.Sprint(args...))
+}
+
+func (r *recorder) Error(args ...any) {
+	r.fail(fmt.Sprint(args...))
+}
diff --git a/internal/test/test.go b/internal/test/test.go
new file mode 100644
index 00000000..1dc052bf
--- /dev/null
+++ b/internal/test/test.go
@@ -0,0 +1,69 @@
+package test
+
+import (
+	"errors"
+	"math/rand"
+	"testing"
+	"time"
+)
+
+type flaky struct {
+	t testing.TB
+	o *FlakyOptions
+}
+
+type FlakyOptions struct {
+	InitialBackoff time.Duration
+	MaxAttempts    int
+	Jitter         float64
+}
+
+func NewFlaky(t testing.TB, o *FlakyOptions) *flaky {
+	if o.InitialBackoff <= 0 {
+		o.InitialBackoff = 500 * time.Millisecond
+	}
+
+	if o.MaxAttempts <= 0 {
+		o.MaxAttempts = 3
+	}
+
+	if o.Jitter < 0 {
+		o.Jitter = 0
+	}
+
+	return &flaky{t: t, o: o}
+}
+
+func (f *flaky) Run(fn func(t testing.TB)) {
+	var last error
+
+	for attempt := 1; attempt <= f.o.MaxAttempts; attempt++ {
+		var failed bool
+
+		fn(&recorder{
+			TB:    f.t,
+			fail:  func(e string) { failed = true; last = errors.New(e) },
+			fatal: func(e string) { failed = true; last = errors.New(e) },
+		})
+
+		if !failed {
+			return
+		}
+
+		if attempt < f.o.MaxAttempts {
+			backoff := f.o.InitialBackoff * time.Duration(1<<uint(attempt-1))
+			time.Sleep(applyJitter(backoff, f.o.Jitter))
+		}
+	}
+
+	f.t.Fatalf("[%s] test failed after %d attempts: %s", f.t.Name(), f.o.MaxAttempts, last)
+}
+
+func applyJitter(d time.Duration, jitter float64) time.Duration {
+	if jitter == 0 {
+		return d
+	}
+	maxJitter := float64(d) * jitter
+	delta := maxJitter * (rand.Float64()*2 - 1)
+	return time.Duration(float64(d) + delta)
+}
diff --git a/internal/test/test_test.go b/internal/test/test_test.go
new file mode 100644
index 00000000..264398c1
--- /dev/null
+++ b/internal/test/test_test.go
@@ -0,0 +1,42 @@
+package test
+
+import (
+	"testing"
+	"time"
+)
+
+func Test_SucceedsImmediately(t *testing.T) {
+	var attempts int
+
+	f := NewFlaky(t, &FlakyOptions{
+		MaxAttempts:    3,
+		InitialBackoff: 10 * time.Millisecond,
+	})
+
+	f.Run(func(t testing.TB) {
+		attempts++
+		if attempts > 1 {
+			t.Fatalf("should not retry on success")
+		}
+	})
+}
+
+func Test_EventualSuccess(t *testing.T) {
+	var attempts int
+
+	f := NewFlaky(t, &FlakyOptions{
+		MaxAttempts:    5,
+		InitialBackoff: 10 * time.Millisecond,
+	})
+
+	f.Run(func(t testing.TB) {
+		attempts++
+		if attempts < 3 {
+			t.Fatalf("intentional failure")
+		}
+	})
+
+	if attempts != 3 {
+		t.Fatalf("expected 3 attempts, got %d", attempts)
+	}
+}
-- 
GitLab