diff options
Diffstat (limited to 'trie')
-rw-r--r-- | trie/iterator.go | 141 | ||||
-rw-r--r-- | trie/iterator_test.go | 32 | ||||
-rw-r--r-- | trie/sync_test.go | 95 |
3 files changed, 260 insertions, 8 deletions
diff --git a/trie/iterator.go b/trie/iterator.go index 5f205e081..ceef52ec8 100644 --- a/trie/iterator.go +++ b/trie/iterator.go @@ -18,22 +18,27 @@ package trie import ( "bytes" + "fmt" + "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/logger" "github.com/ethereum/go-ethereum/logger/glog" ) +// Iterator is a key-value trie iterator to traverse the data contents. type Iterator struct { trie *Trie - Key []byte - Value []byte + Key []byte // Current data key on which the iterator is positioned on + Value []byte // Current data value on which the iterator is positioned on } +// NewIterator creates a new key-value iterator. func NewIterator(trie *Trie) *Iterator { return &Iterator{trie: trie, Key: nil} } +// Next moves the iterator forward with one key-value entry. func (self *Iterator) Next() bool { isIterStart := false if self.Key == nil { @@ -142,6 +147,138 @@ func (self *Iterator) key(node interface{}) []byte { } return self.key(rn) } + return nil +} + +// nodeIteratorState represents the iteration state at one particular node of the +// trie, which can be resumed at a later invocation. +type nodeIteratorState struct { + hash common.Hash // Hash of the node being iterated (nil if not standalone) + node node // Trie node being iterated + parent common.Hash // Hash of the first full ancestor node (nil if current is the root) + child int // Child to be processed next +} + +// NodeIterator is an iterator to traverse the trie post-order. +type NodeIterator struct { + trie *Trie // Trie being iterated + stack []*nodeIteratorState // Hierarchy of trie nodes persisting the iteration state + + Hash common.Hash // Hash of the current node being iterated (nil if not standalone) + Node node // Current node being iterated (internal representation) + Parent common.Hash // Hash of the first full ancestor node (nil if current is the root) + Leaf bool // Flag whether the current node is a value (data) node + LeafBlob []byte // Data blob contained within a leaf (otherwise nil) + + Error error // Failure set in case of an internal error in the iterator +} + +// NewNodeIterator creates an post-order trie iterator. +func NewNodeIterator(trie *Trie) *NodeIterator { + if bytes.Compare(trie.Root(), emptyRoot.Bytes()) == 0 { + return new(NodeIterator) + } + return &NodeIterator{trie: trie} +} + +// Next moves the iterator to the next node, returning whether there are any +// further nodes. In case of an internal error this method returns false and +// sets the Error field to the encountered failure. +func (it *NodeIterator) Next() bool { + // If the iterator failed previously, don't do anything + if it.Error != nil { + return false + } + // Otherwise step forward with the iterator and report any errors + if err := it.step(); err != nil { + it.Error = err + return false + } + return it.retrieve() +} + +// step moves the iterator to the next node of the trie. +func (it *NodeIterator) step() error { + // Abort if we reached the end of the iteration + if it.trie == nil { + return nil + } + // Initialize the iterator if we've just started, or pop off the old node otherwise + if len(it.stack) == 0 { + it.stack = append(it.stack, &nodeIteratorState{node: it.trie.root, child: -1}) + if it.stack[0].node == nil { + return fmt.Errorf("root node missing: %x", it.trie.Root()) + } + } else { + it.stack = it.stack[:len(it.stack)-1] + if len(it.stack) == 0 { + it.trie = nil + return nil + } + } + // Continue iteration to the next child + for { + parent := it.stack[len(it.stack)-1] + ancestor := parent.hash + if (ancestor == common.Hash{}) { + ancestor = parent.parent + } + if node, ok := parent.node.(fullNode); ok { + // Full node, traverse all children, then the node itself + if parent.child >= len(node) { + break + } + for parent.child++; parent.child < len(node); parent.child++ { + if current := node[parent.child]; current != nil { + it.stack = append(it.stack, &nodeIteratorState{node: current, parent: ancestor, child: -1}) + break + } + } + } else if node, ok := parent.node.(shortNode); ok { + // Short node, traverse the pointer singleton child, then the node itself + if parent.child >= 0 { + break + } + parent.child++ + it.stack = append(it.stack, &nodeIteratorState{node: node.Val, parent: ancestor, child: -1}) + } else if hash, ok := parent.node.(hashNode); ok { + // Hash node, resolve the hash child from the database, then the node itself + if parent.child >= 0 { + break + } + parent.child++ + node, err := it.trie.resolveHash(hash, nil, nil) + if err != nil { + return err + } + it.stack = append(it.stack, &nodeIteratorState{hash: common.BytesToHash(hash), node: node, parent: ancestor, child: -1}) + } else { + break + } + } return nil } + +// retrieve pulls and caches the current trie node the iterator is traversing. +// In case of a value node, the additional leaf blob is also populated with the +// data contents for external interpretation. +// +// The method returns whether there are any more data left for inspection. +func (it *NodeIterator) retrieve() bool { + // Clear out any previously set values + it.Hash, it.Node, it.Parent, it.Leaf, it.LeafBlob = common.Hash{}, nil, common.Hash{}, false, nil + + // If the iteration's done, return no available data + if it.trie == nil { + return false + } + // Otherwise retrieve the current node and resolve leaf accessors + state := it.stack[len(it.stack)-1] + + it.Hash, it.Node, it.Parent = state.hash, state.node, state.parent + if value, ok := it.Node.(valueNode); ok { + it.Leaf, it.LeafBlob = true, []byte(value) + } + return true +} diff --git a/trie/iterator_test.go b/trie/iterator_test.go index fdc60b412..dc8276116 100644 --- a/trie/iterator_test.go +++ b/trie/iterator_test.go @@ -16,7 +16,12 @@ package trie -import "testing" +import ( + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" +) func TestIterator(t *testing.T) { trie := newEmpty() @@ -47,3 +52,28 @@ func TestIterator(t *testing.T) { } } } + +// Tests that the node iterator indeed walks over the entire database contents. +func TestNodeIteratorCoverage(t *testing.T) { + // Create some arbitrary test trie to iterate + db, trie, _ := makeTestTrie() + + // Gather all the node hashes found by the iterator + hashes := make(map[common.Hash]struct{}) + for it := NewNodeIterator(trie); it.Next(); { + if it.Hash != (common.Hash{}) { + hashes[it.Hash] = struct{}{} + } + } + // Cross check the hashes and the database itself + for hash, _ := range hashes { + if _, err := db.Get(hash.Bytes()); err != nil { + t.Errorf("failed to retrieve reported node %x: %v", hash, err) + } + } + for _, key := range db.(*ethdb.MemDatabase).Keys() { + if _, ok := hashes[common.BytesToHash(key)]; !ok { + t.Errorf("state entry not reported %x", key) + } + } +} diff --git a/trie/sync_test.go b/trie/sync_test.go index 9c036a3a9..a81f7650e 100644 --- a/trie/sync_test.go +++ b/trie/sync_test.go @@ -33,6 +33,7 @@ func makeTestTrie() (ethdb.Database, *Trie, map[string][]byte) { // Fill it with some arbitrary data content := make(map[string][]byte) for i := byte(0); i < 255; i++ { + // Map the same data under multiple keys key, val := common.LeftPadBytes([]byte{1, i}, 32), []byte{i} content[string(key)] = val trie.Update(key, val) @@ -40,9 +41,19 @@ func makeTestTrie() (ethdb.Database, *Trie, map[string][]byte) { key, val = common.LeftPadBytes([]byte{2, i}, 32), []byte{i} content[string(key)] = val trie.Update(key, val) + + // Add some other data to inflate th trie + for j := byte(3); j < 13; j++ { + key, val = common.LeftPadBytes([]byte{j, i}, 32), []byte{j, i} + content[string(key)] = val + trie.Update(key, val) + } } trie.Commit() + // Remove any potentially cached data from the test trie creation + globalCache.Clear() + // Return the generated trie return db, trie, content } @@ -50,10 +61,17 @@ func makeTestTrie() (ethdb.Database, *Trie, map[string][]byte) { // checkTrieContents cross references a reconstructed trie with an expected data // content map. func checkTrieContents(t *testing.T, db Database, root []byte, content map[string][]byte) { + // Remove any potentially cached data from the trie synchronisation + globalCache.Clear() + + // Check root availability and trie contents trie, err := New(common.BytesToHash(root), db) if err != nil { t.Fatalf("failed to create trie at %x: %v", root, err) } + if err := checkTrieConsistency(db, common.BytesToHash(root)); err != nil { + t.Fatalf("inconsistent trie at %x: %v", root, err) + } for key, val := range content { if have := trie.Get([]byte(key)); bytes.Compare(have, val) != 0 { t.Errorf("entry %x: content mismatch: have %x, want %x", key, have, val) @@ -61,6 +79,22 @@ func checkTrieContents(t *testing.T, db Database, root []byte, content map[strin } } +// checkTrieConsistency checks that all nodes in a trie are indeed present. +func checkTrieConsistency(db Database, root common.Hash) error { + // Remove any potentially cached data from the test trie creation or previous checks + globalCache.Clear() + + // Create and iterate a trie rooted in a subnode + trie, err := New(root, db) + if err != nil { + return nil // // Consider a non existent state consistent + } + it := NewNodeIterator(trie) + for it.Next() { + } + return it.Error +} + // Tests that an empty trie is not scheduled for syncing. func TestEmptyTrieSync(t *testing.T) { emptyA, _ := New(common.Hash{}, nil) @@ -102,7 +136,7 @@ func testIterativeTrieSync(t *testing.T, batch int) { } queue = append(queue[:0], sched.Missing(batch)...) } - // Cross check that the two tries re in sync + // Cross check that the two tries are in sync checkTrieContents(t, dstDb, srcTrie.Root(), srcData) } @@ -132,7 +166,7 @@ func TestIterativeDelayedTrieSync(t *testing.T) { } queue = append(queue[len(results):], sched.Missing(10000)...) } - // Cross check that the two tries re in sync + // Cross check that the two tries are in sync checkTrieContents(t, dstDb, srcTrie.Root(), srcData) } @@ -173,7 +207,7 @@ func testIterativeRandomTrieSync(t *testing.T, batch int) { queue[hash] = struct{}{} } } - // Cross check that the two tries re in sync + // Cross check that the two tries are in sync checkTrieContents(t, dstDb, srcTrie.Root(), srcData) } @@ -216,7 +250,7 @@ func TestIterativeRandomDelayedTrieSync(t *testing.T) { queue[hash] = struct{}{} } } - // Cross check that the two tries re in sync + // Cross check that the two tries are in sync checkTrieContents(t, dstDb, srcTrie.Root(), srcData) } @@ -252,6 +286,57 @@ func TestDuplicateAvoidanceTrieSync(t *testing.T) { } queue = append(queue[:0], sched.Missing(0)...) } - // Cross check that the two tries re in sync + // Cross check that the two tries are in sync checkTrieContents(t, dstDb, srcTrie.Root(), srcData) } + +// Tests that at any point in time during a sync, only complete sub-tries are in +// the database. +func TestIncompleteTrieSync(t *testing.T) { + // Create a random trie to copy + srcDb, srcTrie, _ := makeTestTrie() + + // Create a destination trie and sync with the scheduler + dstDb, _ := ethdb.NewMemDatabase() + sched := NewTrieSync(common.BytesToHash(srcTrie.Root()), dstDb, nil) + + added := []common.Hash{} + queue := append([]common.Hash{}, sched.Missing(1)...) + for len(queue) > 0 { + // Fetch a batch of trie nodes + results := make([]SyncResult, len(queue)) + for i, hash := range queue { + data, err := srcDb.Get(hash.Bytes()) + if err != nil { + t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + } + results[i] = SyncResult{hash, data} + } + // Process each of the trie nodes + if index, err := sched.Process(results); err != nil { + t.Fatalf("failed to process result #%d: %v", index, err) + } + for _, result := range results { + added = append(added, result.Hash) + } + // Check that all known sub-tries in the synced trie is complete + for _, root := range added { + if err := checkTrieConsistency(dstDb, root); err != nil { + t.Fatalf("trie inconsistent: %v", err) + } + } + // Fetch the next batch to retrieve + queue = append(queue[:0], sched.Missing(1)...) + } + // Sanity check that removing any node from the database is detected + for _, node := range added[1:] { + key := node.Bytes() + value, _ := dstDb.Get(key) + + dstDb.Delete(key) + if err := checkTrieConsistency(dstDb, added[0]); err == nil { + t.Fatalf("trie inconsistency not caught, missing: %x", key) + } + dstDb.Put(key, value) + } +} |