diff options
author | Felföldi Zsolt <zsfelfoldi@gmail.com> | 2018-06-12 21:58:47 +0800 |
---|---|---|
committer | Felix Lange <fjl@users.noreply.github.com> | 2018-06-12 21:58:47 +0800 |
commit | 25982375a8ed57f623775951e6cdef21bfbc2b34 (patch) | |
tree | c3635a99e0ce46caf68e2d2e47ca276caadf746f | |
parent | 049f5b357200406c01f7bf2d2e2936d1d28a319b (diff) | |
download | dexon-25982375a8ed57f623775951e6cdef21bfbc2b34.tar dexon-25982375a8ed57f623775951e6cdef21bfbc2b34.tar.gz dexon-25982375a8ed57f623775951e6cdef21bfbc2b34.tar.bz2 dexon-25982375a8ed57f623775951e6cdef21bfbc2b34.tar.lz dexon-25982375a8ed57f623775951e6cdef21bfbc2b34.tar.xz dexon-25982375a8ed57f623775951e6cdef21bfbc2b34.tar.zst dexon-25982375a8ed57f623775951e6cdef21bfbc2b34.zip |
les: fix retriever logic (#16776)
This PR fixes a retriever logic bug. When a peer had a soft timeout
and then a response arrived, it always assumed it was the same peer
even though it could have been a later requested one that did not time
out at all yet. In this case the logic went to an illegal state and
deadlocked, causing a goroutine leak.
Fixes #16243 and replaces #16359.
Thanks to @riceke for finding the bug in the logic.
-rw-r--r-- | les/retrieve.go | 30 |
1 files changed, 17 insertions, 13 deletions
diff --git a/les/retrieve.go b/les/retrieve.go index e262a3cb4..a9037a38e 100644 --- a/les/retrieve.go +++ b/les/retrieve.go @@ -69,9 +69,9 @@ type sentReq struct { lock sync.RWMutex // protect access to sentTo map sentTo map[distPeer]sentReqToPeer - reqQueued bool // a request has been queued but not sent - reqSent bool // a request has been sent but not timed out - reqSrtoCount int // number of requests that reached soft (but not hard) timeout + lastReqQueued bool // last request has been queued but not sent + lastReqSentTo distPeer // if not nil then last request has been sent to given peer but not timed out + reqSrtoCount int // number of requests that reached soft (but not hard) timeout } // sentReqToPeer notifies the request-from-peer goroutine (tryRequest) about a response @@ -180,7 +180,7 @@ type reqStateFn func() reqStateFn // retrieveLoop is the retrieval state machine event loop func (r *sentReq) retrieveLoop() { go r.tryRequest() - r.reqQueued = true + r.lastReqQueued = true state := r.stateRequesting for state != nil { @@ -214,7 +214,7 @@ func (r *sentReq) stateRequesting() reqStateFn { case rpSoftTimeout: // last request timed out, try asking a new peer go r.tryRequest() - r.reqQueued = true + r.lastReqQueued = true return r.stateRequesting case rpDeliveredValid: r.stop(nil) @@ -233,7 +233,7 @@ func (r *sentReq) stateNoMorePeers() reqStateFn { select { case <-time.After(retryQueue): go r.tryRequest() - r.reqQueued = true + r.lastReqQueued = true return r.stateRequesting case ev := <-r.eventsCh: r.update(ev) @@ -260,22 +260,26 @@ func (r *sentReq) stateStopped() reqStateFn { func (r *sentReq) update(ev reqPeerEvent) { switch ev.event { case rpSent: - r.reqQueued = false - if ev.peer != nil { - r.reqSent = true - } + r.lastReqQueued = false + r.lastReqSentTo = ev.peer case rpSoftTimeout: - r.reqSent = false + r.lastReqSentTo = nil r.reqSrtoCount++ - case rpHardTimeout, rpDeliveredValid, rpDeliveredInvalid: + case rpHardTimeout: r.reqSrtoCount-- + case rpDeliveredValid, rpDeliveredInvalid: + if ev.peer == r.lastReqSentTo { + r.lastReqSentTo = nil + } else { + r.reqSrtoCount-- + } } } // waiting returns true if the retrieval mechanism is waiting for an answer from // any peer func (r *sentReq) waiting() bool { - return r.reqQueued || r.reqSent || r.reqSrtoCount > 0 + return r.lastReqQueued || r.lastReqSentTo != nil || r.reqSrtoCount > 0 } // tryRequest tries to send the request to a new peer and waits for it to either |