From 25982375a8ed57f623775951e6cdef21bfbc2b34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felf=C3=B6ldi=20Zsolt?= Date: Tue, 12 Jun 2018 15:58:47 +0200 Subject: les: fix retriever logic (#16776) This PR fixes a retriever logic bug. When a peer had a soft timeout and then a response arrived, it always assumed it was the same peer even though it could have been a later requested one that did not time out at all yet. In this case the logic went to an illegal state and deadlocked, causing a goroutine leak. Fixes #16243 and replaces #16359. Thanks to @riceke for finding the bug in the logic. --- les/retrieve.go | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/les/retrieve.go b/les/retrieve.go index e262a3cb4..a9037a38e 100644 --- a/les/retrieve.go +++ b/les/retrieve.go @@ -69,9 +69,9 @@ type sentReq struct { lock sync.RWMutex // protect access to sentTo map sentTo map[distPeer]sentReqToPeer - reqQueued bool // a request has been queued but not sent - reqSent bool // a request has been sent but not timed out - reqSrtoCount int // number of requests that reached soft (but not hard) timeout + lastReqQueued bool // last request has been queued but not sent + lastReqSentTo distPeer // if not nil then last request has been sent to given peer but not timed out + reqSrtoCount int // number of requests that reached soft (but not hard) timeout } // sentReqToPeer notifies the request-from-peer goroutine (tryRequest) about a response @@ -180,7 +180,7 @@ type reqStateFn func() reqStateFn // retrieveLoop is the retrieval state machine event loop func (r *sentReq) retrieveLoop() { go r.tryRequest() - r.reqQueued = true + r.lastReqQueued = true state := r.stateRequesting for state != nil { @@ -214,7 +214,7 @@ func (r *sentReq) stateRequesting() reqStateFn { case rpSoftTimeout: // last request timed out, try asking a new peer go r.tryRequest() - r.reqQueued = true + r.lastReqQueued = true return r.stateRequesting case rpDeliveredValid: r.stop(nil) @@ -233,7 +233,7 @@ func (r *sentReq) stateNoMorePeers() reqStateFn { select { case <-time.After(retryQueue): go r.tryRequest() - r.reqQueued = true + r.lastReqQueued = true return r.stateRequesting case ev := <-r.eventsCh: r.update(ev) @@ -260,22 +260,26 @@ func (r *sentReq) stateStopped() reqStateFn { func (r *sentReq) update(ev reqPeerEvent) { switch ev.event { case rpSent: - r.reqQueued = false - if ev.peer != nil { - r.reqSent = true - } + r.lastReqQueued = false + r.lastReqSentTo = ev.peer case rpSoftTimeout: - r.reqSent = false + r.lastReqSentTo = nil r.reqSrtoCount++ - case rpHardTimeout, rpDeliveredValid, rpDeliveredInvalid: + case rpHardTimeout: r.reqSrtoCount-- + case rpDeliveredValid, rpDeliveredInvalid: + if ev.peer == r.lastReqSentTo { + r.lastReqSentTo = nil + } else { + r.reqSrtoCount-- + } } } // waiting returns true if the retrieval mechanism is waiting for an answer from // any peer func (r *sentReq) waiting() bool { - return r.reqQueued || r.reqSent || r.reqSrtoCount > 0 + return r.lastReqQueued || r.lastReqSentTo != nil || r.reqSrtoCount > 0 } // tryRequest tries to send the request to a new peer and waits for it to either -- cgit v1.2.3