Skip to content

Commit

Permalink
change Backoff() algorithm (#237)
Browse files Browse the repository at this point in the history
1) Add capabilities to handle Retry-After headers and similar info from
server

Motivation: some servers provide Retry-After header or similar info along with 429
or 503 status code, and it is often important to honor such information
on retries, i.e. simple expotential backoff is not optimal.
https://docs.microsoft.com/en-us/sharepoint/dev/general-development/how-to-avoid-getting-throttled-or-blocked-in-sharepoint-online

2) Add option NOT to retry even if operation returns an error (but retry
by default, if no retry conditions are set)

Motivation: error are already passed to condition callback in resty, but
Backoff() still retries the request if error is not nil. It implies excessive,
stillborn retries for non-retryble errors from underlying http client
(i.e. with RoundTripper from oauth2).

3) Remove error return value from condition callback

Motivation: this error is neither passed to caller, nor logged in any
way. It is cleaner to have "needRetry == true" than "needRetry == true
|| conditionErr != nil".

4) Does not use floating-point arithmetics for expotential backoff

Motivation: simplification & performance
  • Loading branch information
neganovalexey authored and jeevatkm committed Mar 28, 2019
1 parent 46fc51a commit e5f377d
Show file tree
Hide file tree
Showing 4 changed files with 376 additions and 71 deletions.
8 changes: 8 additions & 0 deletions client.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ type Client struct {
RetryWaitTime time.Duration
RetryMaxWaitTime time.Duration
RetryConditions []RetryConditionFunc
RetryAfter RetryAfterFunc
JSONMarshal func(v interface{}) ([]byte, error)
JSONUnmarshal func(data []byte, v interface{}) error

Expand Down Expand Up @@ -515,6 +516,13 @@ func (c *Client) SetRetryMaxWaitTime(maxWaitTime time.Duration) *Client {
return c
}

// SetRetryAfter sets callback to calculate wait time between retries.
// Default (nil) implies exponential backoff with jitter
func (c *Client) SetRetryAfter(callback RetryAfterFunc) *Client {
c.RetryAfter = callback
return c
}

// AddRetryCondition method adds a retry condition function to array of functions
// that are checked to determine if the request is retried. The request will
// retry if any of the functions return true and error is nil.
Expand Down
7 changes: 1 addition & 6 deletions request.go
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,7 @@ func (r *Request) Execute(method, url string) (*Response, error) {

var resp *Response
attempt := 0
_ = Backoff(
err = Backoff(
func() (*Response, error) {
attempt++

Expand All @@ -575,11 +575,6 @@ func (r *Request) Execute(method, url string) (*Response, error) {
resp, err = r.client.execute(r)
if err != nil {
r.client.log.Errorf("%v, Attempt %v", err, attempt)
if r.ctx != nil && r.ctx.Err() != nil {
// stop Backoff from retrying request if request has been
// canceled by context
return resp, nil
}
}

return resp, err
Expand Down
99 changes: 75 additions & 24 deletions retry.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,16 @@ type (
Option func(*Options)

// RetryConditionFunc type is for retry condition function
RetryConditionFunc func(*Response, error) (bool, error)
// input: non-nil Response OR request execution error
RetryConditionFunc func(*Response, error) bool

// RetryAfterFunc returns time to wait before retry
// For example, it can parse HTTP Retry-After header
// https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html
// Non-nil error is returned if it is found that request is not retryable
// (0, nil) is a special result means 'use default algorithm'
RetryAfterFunc func(*Client, *Response) (time.Duration, error)

// Options to hold Resty retry values.
Options struct {
maxRetries int
waitTime time.Duration
Expand Down Expand Up @@ -79,40 +86,84 @@ func Backoff(operation func() (*Response, error), options ...Option) error {
resp *Response
err error
)
base := float64(opts.waitTime) // Time to wait between each attempt
capLevel := float64(opts.maxWaitTime) // Maximum amount of wait time for the retry

for attempt := 0; attempt < opts.maxRetries; attempt++ {
resp, err = operation()
if resp != nil && resp.Request.ctx != nil && resp.Request.ctx.Err() != nil {
return err
}

needsRetry := err != nil // retry on operation errors by default

var needsRetry bool
var conditionErr error
for _, condition := range opts.retryConditions {
needsRetry, conditionErr = condition(resp, err)
if needsRetry || conditionErr != nil {
needsRetry = condition(resp, err)
if needsRetry {
break
}
}

// If the operation returned no error, there was no condition satisfied and
// there was no error caused by the conditional functions.
if err == nil && !needsRetry && conditionErr == nil {
return nil
}
// Adding capped exponential backup with jitter
// See the following article...
// http://www.awsarchitectureblog.com/2015/03/backoff.html
temp := math.Min(capLevel, base*math.Exp2(float64(attempt)))
ri := int(temp / 2)
if ri <= 0 {
ri = 1<<31 - 1 // max int for arch 386
if !needsRetry {
return err
}
sleepDuration := time.Duration(math.Abs(float64(ri + rand.Intn(ri))))

if sleepDuration < opts.waitTime {
sleepDuration = opts.waitTime
waitTime, err2 := sleepDuration(resp, opts.waitTime, opts.maxWaitTime, attempt)
if err2 != nil {
if err == nil {
err = err2
}
return err
}
time.Sleep(sleepDuration)
time.Sleep(waitTime)
}

return err
}

func sleepDuration(resp *Response, min, max time.Duration, attempt int) (time.Duration, error) {
const maxInt = 1<<31 -1 // max int for arch 386

if max < 0 {
max = maxInt
}

if resp == nil {
goto defaultCase
}

// 1. Check for custom callback
if retryAfterFunc := resp.Request.client.RetryAfter; retryAfterFunc != nil {
result, err := retryAfterFunc(resp.Request.client, resp)
if err != nil {
return 0, err // i.e. 'API quota exceeded'
}
if result == 0 {
goto defaultCase
}
if result < 0 || max < result {
result = max
}
if result < min {
result = min
}
return result, nil
}

// 2. Return capped exponential backoff with jitter
// http://www.awsarchitectureblog.com/2015/03/backoff.html
defaultCase:
base := float64(min)
capLevel := float64(max)

temp := math.Min(capLevel, base*math.Exp2(float64(attempt)))
ri := int(temp / 2)
if ri <= 0 {
ri = maxInt // max int for arch 386
}
result := time.Duration(math.Abs(float64(ri + rand.Intn(ri))))

if result < min {
result = min
}

return result, nil
}
Loading

0 comments on commit e5f377d

Please sign in to comment.