diff options
author | Mission Liao <mission.liao@dexon.org> | 2019-03-18 17:06:38 +0800 |
---|---|---|
committer | Jimmy Hu <jimmy.hu@dexon.org> | 2019-03-18 17:06:38 +0800 |
commit | 9e97ddc00c67ee13ceb8fc597f4f55cfd6df6101 (patch) | |
tree | a9c3c3c9520fca2205c5eef9b1355a5db0fca539 | |
parent | 89dddf787c9360f0fed76738fb515be30c7d73bc (diff) | |
download | dexon-consensus-9e97ddc00c67ee13ceb8fc597f4f55cfd6df6101.tar dexon-consensus-9e97ddc00c67ee13ceb8fc597f4f55cfd6df6101.tar.gz dexon-consensus-9e97ddc00c67ee13ceb8fc597f4f55cfd6df6101.tar.bz2 dexon-consensus-9e97ddc00c67ee13ceb8fc597f4f55cfd6df6101.tar.lz dexon-consensus-9e97ddc00c67ee13ceb8fc597f4f55cfd6df6101.tar.xz dexon-consensus-9e97ddc00c67ee13ceb8fc597f4f55cfd6df6101.tar.zst dexon-consensus-9e97ddc00c67ee13ceb8fc597f4f55cfd6df6101.zip |
test: try to mitigate flaky tests (#498)
* Add assertion to make sure all nodes are synced.
* Retry round event with BA's interval
We are about to modify lambdaDKG with block height, instead of timing.
Therefore, the last interval we could use is BA's interval.
* Check ctx in inifite loop
* Disable this panic temporary
If a fast sync triggered by receiving types.AgreementResult from later
position, this panic would be triggered 100%, ex.
current-round: 1,
change-notary-height: 120,
agreeemnt-result from round:2 height:121
However, I don't have easy way to fix it temporary.
* Lower down the test size for TCP
* Fix typo
* Fixup: revert the removed panic
-rw-r--r-- | core/agreement-mgr.go | 7 | ||||
-rw-r--r-- | core/consensus.go | 4 | ||||
-rw-r--r-- | core/test/transport_test.go | 2 | ||||
-rw-r--r-- | core/utils/round-event.go | 8 | ||||
-rw-r--r-- | integration_test/consensus_test.go | 30 |
5 files changed, 32 insertions, 19 deletions
diff --git a/core/agreement-mgr.go b/core/agreement-mgr.go index 0e39fa5..14aa385 100644 --- a/core/agreement-mgr.go +++ b/core/agreement-mgr.go @@ -432,6 +432,13 @@ func (mgr *agreementMgr) baRoutineForOneRound( var nextHeight uint64 var nextTime time.Time for { + // Make sure we are stoppable. + select { + case <-mgr.ctx.Done(): + breakLoop = true + return + default: + } nextHeight, nextTime = mgr.bcModule.nextBlock() if isStop(oldPos) && nextHeight == 0 { break diff --git a/core/consensus.go b/core/consensus.go index e0a6753..4d82222 100644 --- a/core/consensus.go +++ b/core/consensus.go @@ -260,8 +260,8 @@ CleanChannelLoop: if block.Position.Height > changeNotaryHeight && block.Position.Round <= currentRound { panic(fmt.Errorf( - "round not switch when confirmig: %s, %d, should switch at %d", - block, currentRound, changeNotaryHeight)) + "round not switch when confirmig: %s, %d, should switch at %d, %s", + block, currentRound, changeNotaryHeight, newPos)) } recv.restartNotary <- newPos } diff --git a/core/test/transport_test.go b/core/test/transport_test.go index 8305ee2..9140649 100644 --- a/core/test/transport_test.go +++ b/core/test/transport_test.go @@ -238,7 +238,7 @@ func (s *TransportTestSuite) TestFake() { func (s *TransportTestSuite) TestTCPLocal() { var ( - peerCount = 25 + peerCount = 13 req = s.Require() peers = make(map[types.NodeID]*testPeer) prvKeys = GenerateRandomPrivateKeys(peerCount) diff --git a/core/utils/round-event.go b/core/utils/round-event.go index 1ce877d..2689840 100644 --- a/core/utils/round-event.go +++ b/core/utils/round-event.go @@ -135,6 +135,7 @@ type RoundEvent struct { roundShift uint64 ctx context.Context ctxCancel context.CancelFunc + retryInterval time.Duration } // NewRoundEvent creates an RoundEvent instance. @@ -144,16 +145,17 @@ func NewRoundEvent(parentCtx context.Context, gov governanceAccessor, roundShift uint64) (*RoundEvent, error) { // We need to generate valid ending block height of this round (taken // DKG reset count into consideration). + initConfig := GetConfigWithPanic(gov, initRound, logger) e := &RoundEvent{ gov: gov, logger: logger, lastTriggeredRound: initRound, roundShift: roundShift, + retryInterval: initConfig.LambdaBA, } e.ctx, e.ctxCancel = context.WithCancel(parentCtx) e.config = RoundBasedConfig{} - e.config.SetupRoundBasedFields(initRound, GetConfigWithPanic( - gov, initRound, logger)) + e.config.SetupRoundBasedFields(initRound, initConfig) e.config.SetRoundBeginHeight(initRoundBeginHeight) // Make sure the DKG reset count in current governance can cover the initial // block height. @@ -245,7 +247,7 @@ func (e *RoundEvent) ValidateNextRound(blockHeight uint64) { select { case <-e.ctx.Done(): return - case <-time.After(500 * time.Millisecond): + case <-time.After(e.retryInterval): } } } diff --git a/integration_test/consensus_test.go b/integration_test/consensus_test.go index 70e6c1f..4784df1 100644 --- a/integration_test/consensus_test.go +++ b/integration_test/consensus_test.go @@ -537,21 +537,21 @@ ReachStop: } } - var latestPos types.Position + var latestHeight uint64 var latestNodeID types.NodeID for _, n := range nodes { n.con.Stop() time.Sleep(1 * time.Second) } for nID, n := range nodes { - pos := n.app.GetLatestDeliveredPosition() - if pos.Newer(latestPos) { - fmt.Println("Newe position", nID, pos) + _, height := n.db.GetCompactionChainTipInfo() + if height > latestHeight { + fmt.Println("Newer height", nID, height) latestNodeID = nID - latestPos = pos + latestHeight = height } } - fmt.Println("Latest node", latestNodeID, &latestPos) + fmt.Println("Latest node", latestNodeID, latestHeight) for nID, node := range nodes { if nID == latestNodeID { continue @@ -573,19 +573,15 @@ ReachStop: &common.NullLogger{}, ) } - targetNode := nodes[latestNodeID] for nID, node := range nodes { - if nID == latestNodeID { - continue - } syncedHeight := node.app.GetLatestDeliveredPosition().Height + 1 // FinalizationHeight = Height + 1 syncedHeight++ var err error for { fmt.Println("Syncing", nID, syncedHeight) - if syncedHeight >= latestPos.Height { + if syncedHeight >= latestHeight { break } _, syncedHeight, err = s.syncBlocksWithSomeNode( @@ -593,11 +589,19 @@ ReachStop: if err != nil { panic(err) } - fmt.Println("Syncing", nID, syncedHeight) + fmt.Println("Syncing after", nID, syncedHeight) } fmt.Println("Synced", nID, syncedHeight) } - + // Make sure all nodes are synced in db and app. + _, latestHeight = targetNode.db.GetCompactionChainTipInfo() + latestPos := targetNode.app.GetLatestDeliveredPosition() + for _, node := range nodes { + _, height := node.db.GetCompactionChainTipInfo() + s.Require().Equal(height, latestHeight) + pos := node.app.GetLatestDeliveredPosition() + s.Require().Equal(latestPos, pos) + } for _, con := range syncerCon { con.ForceSync(true) } |