Skip to content

Commit

Permalink
Merge pull request #18428 from ahrtr/improve_leaseRenew_20240809
Browse files Browse the repository at this point in the history
Skip leadership check if the etcd instance is active processing heartbeats
  • Loading branch information
ahrtr authored Aug 14, 2024
2 parents c147425 + b8b0cf8 commit a8b30b1
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 2 deletions.
14 changes: 12 additions & 2 deletions server/etcdserver/raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ type toApply struct {
type raftNode struct {
lg *zap.Logger

tickMu *sync.Mutex
tickMu *sync.RWMutex
// timestamp of the latest tick
latestTickTs time.Time
raftNodeConfig

// a chan to send/receive snapshot
Expand Down Expand Up @@ -132,8 +134,9 @@ func newRaftNode(cfg raftNodeConfig) *raftNode {
raft.SetLogger(lg)
r := &raftNode{
lg: cfg.lg,
tickMu: new(sync.Mutex),
tickMu: new(sync.RWMutex),
raftNodeConfig: cfg,
latestTickTs: time.Now(),
// set up contention detectors for raft heartbeat message.
// expect to send a heartbeat within 2 heartbeat intervals.
td: contention.NewTimeoutDetector(2 * cfg.heartbeat),
Expand All @@ -155,9 +158,16 @@ func newRaftNode(cfg raftNodeConfig) *raftNode {
func (r *raftNode) tick() {
r.tickMu.Lock()
r.Tick()
r.latestTickTs = time.Now()
r.tickMu.Unlock()
}

func (r *raftNode) getLatestTickTs() time.Time {
r.tickMu.RLock()
defer r.tickMu.RUnlock()
return r.latestTickTs
}

// start prepares and starts raftNode in a new goroutine. It is no longer safe
// to modify the fields after it has been started.
func (r *raftNode) start(rh *raftReadyHandler) {
Expand Down
16 changes: 16 additions & 0 deletions server/etcdserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -904,10 +904,26 @@ func (s *EtcdServer) revokeExpiredLeases(leases []*lease.Lease) {
})
}

// isActive checks if the etcd instance is still actively processing the
// heartbeat message (ticks). It returns false if no heartbeat has been
// received within 3 * tickMs.
func (s *EtcdServer) isActive() bool {
latestTickTs := s.r.getLatestTickTs()
threshold := 3 * time.Duration(s.Cfg.TickMs) * time.Millisecond
return latestTickTs.Add(threshold).After(time.Now())
}

// ensureLeadership checks whether current member is still the leader.
func (s *EtcdServer) ensureLeadership() bool {
lg := s.Logger()

if s.isActive() {
lg.Debug("The member is active, skip checking leadership",
zap.Time("latestTickTs", s.r.getLatestTickTs()),
zap.Time("now", time.Now()))
return true
}

ctx, cancel := context.WithTimeout(s.ctx, s.Cfg.ReqTimeout())
defer cancel()
if err := s.linearizableReadNotify(ctx); err != nil {
Expand Down
43 changes: 43 additions & 0 deletions server/etcdserver/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (

"github.com/coreos/go-semver/semver"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/zap"
"go.uber.org/zap/zaptest"

Expand Down Expand Up @@ -1538,3 +1539,45 @@ func TestWaitAppliedIndex(t *testing.T) {
})
}
}

func TestIsActive(t *testing.T) {
cases := []struct {
name string
tickMs uint
durationSinceLastTick time.Duration
expectActive bool
}{
{
name: "1.5*tickMs,active",
tickMs: 100,
durationSinceLastTick: 150 * time.Millisecond,
expectActive: true,
},
{
name: "2*tickMs,active",
tickMs: 200,
durationSinceLastTick: 400 * time.Millisecond,
expectActive: true,
},
{
name: "4*tickMs,not active",
tickMs: 150,
durationSinceLastTick: 600 * time.Millisecond,
expectActive: false,
},
}

for _, tc := range cases {
s := EtcdServer{
Cfg: config.ServerConfig{
TickMs: tc.tickMs,
},
r: raftNode{
tickMu: new(sync.RWMutex),
latestTickTs: time.Now().Add(-tc.durationSinceLastTick),
},
}

require.Equal(t, tc.expectActive, s.isActive())
}
}

0 comments on commit a8b30b1

Please sign in to comment.