1
1
mirror of https://github.com/go-gitea/gitea synced 2025-01-10 09:44:43 +00:00

188 lines
4.8 KiB
Go
Raw Normal View History

package certmagic
import (
"context"
"errors"
"log"
"runtime"
"sync"
"time"
"go.uber.org/zap"
)
var jm = &jobManager{maxConcurrentJobs: 1000}
type jobManager struct {
mu sync.Mutex
maxConcurrentJobs int
activeWorkers int
queue []namedJob
names map[string]struct{}
}
type namedJob struct {
name string
job func() error
logger *zap.Logger
}
// Submit enqueues the given job with the given name. If name is non-empty
// and a job with the same name is already enqueued or running, this is a
// no-op. If name is empty, no duplicate prevention will occur. The job
// manager will then run this job as soon as it is able.
func (jm *jobManager) Submit(logger *zap.Logger, name string, job func() error) {
jm.mu.Lock()
defer jm.mu.Unlock()
if jm.names == nil {
jm.names = make(map[string]struct{})
}
if name != "" {
// prevent duplicate jobs
if _, ok := jm.names[name]; ok {
return
}
jm.names[name] = struct{}{}
}
jm.queue = append(jm.queue, namedJob{name, job, logger})
if jm.activeWorkers < jm.maxConcurrentJobs {
jm.activeWorkers++
go jm.worker()
}
}
func (jm *jobManager) worker() {
defer func() {
if err := recover(); err != nil {
buf := make([]byte, stackTraceBufferSize)
buf = buf[:runtime.Stack(buf, false)]
log.Printf("panic: certificate worker: %v\n%s", err, buf)
}
}()
for {
jm.mu.Lock()
if len(jm.queue) == 0 {
jm.activeWorkers--
jm.mu.Unlock()
return
}
next := jm.queue[0]
jm.queue = jm.queue[1:]
jm.mu.Unlock()
if err := next.job(); err != nil {
if next.logger != nil {
next.logger.Error("job failed", zap.Error(err))
}
}
if next.name != "" {
jm.mu.Lock()
delete(jm.names, next.name)
jm.mu.Unlock()
}
}
}
func doWithRetry(ctx context.Context, log *zap.Logger, f func(context.Context) error) error {
var attempts int
ctx = context.WithValue(ctx, AttemptsCtxKey, &attempts)
// the initial intervalIndex is -1, signaling
// that we should not wait for the first attempt
start, intervalIndex := time.Now(), -1
var err error
for time.Since(start) < maxRetryDuration {
var wait time.Duration
if intervalIndex >= 0 {
wait = retryIntervals[intervalIndex]
}
timer := time.NewTimer(wait)
select {
case <-ctx.Done():
timer.Stop()
return context.Canceled
case <-timer.C:
err = f(ctx)
attempts++
if err == nil || errors.Is(err, context.Canceled) {
return err
}
var errNoRetry ErrNoRetry
if errors.As(err, &errNoRetry) {
return err
}
if intervalIndex < len(retryIntervals)-1 {
intervalIndex++
}
if time.Since(start) < maxRetryDuration {
if log != nil {
log.Error("will retry",
zap.Error(err),
zap.Int("attempt", attempts),
zap.Duration("retrying_in", retryIntervals[intervalIndex]),
zap.Duration("elapsed", time.Since(start)),
zap.Duration("max_duration", maxRetryDuration))
}
} else {
if log != nil {
log.Error("final attempt; giving up",
zap.Error(err),
zap.Int("attempt", attempts),
zap.Duration("elapsed", time.Since(start)),
zap.Duration("max_duration", maxRetryDuration))
}
return nil
}
}
}
return err
}
// ErrNoRetry is an error type which signals
// to stop retries early.
type ErrNoRetry struct{ Err error }
// Unwrap makes it so that e wraps e.Err.
func (e ErrNoRetry) Unwrap() error { return e.Err }
func (e ErrNoRetry) Error() string { return e.Err.Error() }
type retryStateCtxKey struct{}
// AttemptsCtxKey is the context key for the value
// that holds the attempt counter. The value counts
// how many times the operation has been attempted.
// A value of 0 means first attempt.
var AttemptsCtxKey retryStateCtxKey
// retryIntervals are based on the idea of exponential
// backoff, but weighed a little more heavily to the
// front. We figure that intermittent errors would be
// resolved after the first retry, but any errors after
// that would probably require at least a few minutes
// to clear up: either for DNS to propagate, for the
// administrator to fix their DNS or network properties,
// or some other external factor needs to change. We
// chose intervals that we think will be most useful
// without introducing unnecessary delay. The last
// interval in this list will be used until the time
// of maxRetryDuration has elapsed.
var retryIntervals = []time.Duration{
1 * time.Minute,
2 * time.Minute,
2 * time.Minute,
5 * time.Minute, // elapsed: 10 min
10 * time.Minute,
20 * time.Minute,
20 * time.Minute, // elapsed: 1 hr
30 * time.Minute,
30 * time.Minute, // elapsed: 2 hr
1 * time.Hour,
3 * time.Hour, // elapsed: 6 hr
6 * time.Hour, // for up to maxRetryDuration
}
// maxRetryDuration is the maximum duration to try
// doing retries using the above intervals.
const maxRetryDuration = 24 * time.Hour * 30