diff options
-rw-r--r-- | cmd/geth/chaincmd.go | 18 | ||||
-rw-r--r-- | core/state/statedb.go | 8 | ||||
-rw-r--r-- | trie/hasher.go | 33 | ||||
-rw-r--r-- | trie/node.go | 26 | ||||
-rw-r--r-- | trie/proof.go | 2 | ||||
-rw-r--r-- | trie/sync.go | 6 | ||||
-rw-r--r-- | trie/trie.go | 35 | ||||
-rw-r--r-- | trie/trie_test.go | 104 |
8 files changed, 158 insertions, 74 deletions
diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index 5ef278a64..d7cdf1064 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -31,6 +31,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/logger/glog" + "github.com/ethereum/go-ethereum/trie" "github.com/syndtr/goleveldb/leveldb/util" "gopkg.in/urfave/cli.v1" ) @@ -89,21 +90,30 @@ func importChain(ctx *cli.Context) error { if err := utils.ImportChain(chain, ctx.Args().First()); err != nil { utils.Fatalf("Import error: %v", err) } - fmt.Printf("Import done in %v, compacting...\n", time.Since(start)) + fmt.Printf("Import done in %v.\n", time.Since(start)) - // Compact the entire database to more accurately measure disk io and print the stats if db, ok := chainDb.(*ethdb.LDBDatabase); ok { + // Output pre-compaction stats mostly to see the import trashing + stats, err := db.LDB().GetProperty("leveldb.stats") + if err != nil { + utils.Fatalf("Failed to read database stats: %v", err) + } + fmt.Println(stats) + + // Compact the entire database to more accurately measure disk io and print the stats start = time.Now() - if err := db.LDB().CompactRange(util.Range{}); err != nil { + fmt.Println("Compacting entire database...") + if err = db.LDB().CompactRange(util.Range{}); err != nil { utils.Fatalf("Compaction failed: %v", err) } fmt.Printf("Compaction done in %v.\n", time.Since(start)) - stats, err := db.LDB().GetProperty("leveldb.stats") + stats, err = db.LDB().GetProperty("leveldb.stats") if err != nil { utils.Fatalf("Failed to read database stats: %v", err) } fmt.Println(stats) + fmt.Println("Trie cache misses:", trie.CacheMisses()) } return nil } diff --git a/core/state/statedb.go b/core/state/statedb.go index dcb897628..0e46e5e2c 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -39,12 +39,12 @@ import ( var StartingNonce uint64 const ( - // Number of past tries to keep. The arbitrarily chosen value here - // is max uncle depth + 1. - maxPastTries = 8 + // Number of past tries to keep. This value is chosen such that + // reasonable chain reorg depths will hit an existing trie. + maxPastTries = 12 // Trie cache generation limit. - maxTrieCacheGen = 100 + maxTrieCacheGen = 120 // Number of codehash->size associations to keep. codeSizeCacheSize = 100000 diff --git a/trie/hasher.go b/trie/hasher.go index e395e00d7..b6223bf32 100644 --- a/trie/hasher.go +++ b/trie/hasher.go @@ -58,7 +58,7 @@ func (h *hasher) hash(n node, db DatabaseWriter, force bool) (node, node, error) return hash, n, nil } if n.canUnload(h.cachegen, h.cachelimit) { - // Evict the node from cache. All of its subnodes will have a lower or equal + // Unload the node from cache. All of its subnodes will have a lower or equal // cache generation number. return hash, hash, nil } @@ -75,23 +75,20 @@ func (h *hasher) hash(n node, db DatabaseWriter, force bool) (node, node, error) if err != nil { return hashNode{}, n, err } - // Cache the hash of the ndoe for later reuse. - if hash, ok := hashed.(hashNode); ok && !force { - switch cached := cached.(type) { - case *shortNode: - cached = cached.copy() - cached.flags.hash = hash - if db != nil { - cached.flags.dirty = false - } - return hashed, cached, nil - case *fullNode: - cached = cached.copy() - cached.flags.hash = hash - if db != nil { - cached.flags.dirty = false - } - return hashed, cached, nil + // Cache the hash of the ndoe for later reuse and remove + // the dirty flag in commit mode. It's fine to assign these values directly + // without copying the node first because hashChildren copies it. + cachedHash, _ := hashed.(hashNode) + switch cn := cached.(type) { + case *shortNode: + cn.flags.hash = cachedHash + if db != nil { + cn.flags.dirty = false + } + case *fullNode: + cn.flags.hash = cachedHash + if db != nil { + cn.flags.dirty = false } } return hashed, cached, nil diff --git a/trie/node.go b/trie/node.go index de9752c93..4aa0cab65 100644 --- a/trie/node.go +++ b/trie/node.go @@ -104,8 +104,8 @@ func (n valueNode) fstring(ind string) string { return fmt.Sprintf("%x ", []byte(n)) } -func mustDecodeNode(hash, buf []byte) node { - n, err := decodeNode(hash, buf) +func mustDecodeNode(hash, buf []byte, cachegen uint16) node { + n, err := decodeNode(hash, buf, cachegen) if err != nil { panic(fmt.Sprintf("node %x: %v", hash, err)) } @@ -113,7 +113,7 @@ func mustDecodeNode(hash, buf []byte) node { } // decodeNode parses the RLP encoding of a trie node. -func decodeNode(hash, buf []byte) (node, error) { +func decodeNode(hash, buf []byte, cachegen uint16) (node, error) { if len(buf) == 0 { return nil, io.ErrUnexpectedEOF } @@ -123,22 +123,22 @@ func decodeNode(hash, buf []byte) (node, error) { } switch c, _ := rlp.CountValues(elems); c { case 2: - n, err := decodeShort(hash, buf, elems) + n, err := decodeShort(hash, buf, elems, cachegen) return n, wrapError(err, "short") case 17: - n, err := decodeFull(hash, buf, elems) + n, err := decodeFull(hash, buf, elems, cachegen) return n, wrapError(err, "full") default: return nil, fmt.Errorf("invalid number of list elements: %v", c) } } -func decodeShort(hash, buf, elems []byte) (node, error) { +func decodeShort(hash, buf, elems []byte, cachegen uint16) (node, error) { kbuf, rest, err := rlp.SplitString(elems) if err != nil { return nil, err } - flag := nodeFlag{hash: hash} + flag := nodeFlag{hash: hash, gen: cachegen} key := compactDecode(kbuf) if key[len(key)-1] == 16 { // value node @@ -148,17 +148,17 @@ func decodeShort(hash, buf, elems []byte) (node, error) { } return &shortNode{key, append(valueNode{}, val...), flag}, nil } - r, _, err := decodeRef(rest) + r, _, err := decodeRef(rest, cachegen) if err != nil { return nil, wrapError(err, "val") } return &shortNode{key, r, flag}, nil } -func decodeFull(hash, buf, elems []byte) (*fullNode, error) { - n := &fullNode{flags: nodeFlag{hash: hash}} +func decodeFull(hash, buf, elems []byte, cachegen uint16) (*fullNode, error) { + n := &fullNode{flags: nodeFlag{hash: hash, gen: cachegen}} for i := 0; i < 16; i++ { - cld, rest, err := decodeRef(elems) + cld, rest, err := decodeRef(elems, cachegen) if err != nil { return n, wrapError(err, fmt.Sprintf("[%d]", i)) } @@ -176,7 +176,7 @@ func decodeFull(hash, buf, elems []byte) (*fullNode, error) { const hashLen = len(common.Hash{}) -func decodeRef(buf []byte) (node, []byte, error) { +func decodeRef(buf []byte, cachegen uint16) (node, []byte, error) { kind, val, rest, err := rlp.Split(buf) if err != nil { return nil, buf, err @@ -189,7 +189,7 @@ func decodeRef(buf []byte) (node, []byte, error) { err := fmt.Errorf("oversized embedded node (size is %d bytes, want size < %d)", size, hashLen) return nil, buf, err } - n, err := decodeNode(nil, buf) + n, err := decodeNode(nil, buf, cachegen) return n, rest, err case kind == rlp.String && len(val) == 0: // empty node diff --git a/trie/proof.go b/trie/proof.go index f193b52df..bea5e5c09 100644 --- a/trie/proof.go +++ b/trie/proof.go @@ -101,7 +101,7 @@ func VerifyProof(rootHash common.Hash, key []byte, proof []rlp.RawValue) (value if !bytes.Equal(sha.Sum(nil), wantHash) { return nil, fmt.Errorf("bad proof node %d: hash mismatch", i) } - n, err := decodeNode(wantHash, buf) + n, err := decodeNode(wantHash, buf, 0) if err != nil { return nil, fmt.Errorf("bad proof node %d: %v", i, err) } diff --git a/trie/sync.go b/trie/sync.go index 400dff903..30caf6980 100644 --- a/trie/sync.go +++ b/trie/sync.go @@ -82,7 +82,7 @@ func (s *TrieSync) AddSubTrie(root common.Hash, depth int, parent common.Hash, c } key := root.Bytes() blob, _ := s.database.Get(key) - if local, err := decodeNode(key, blob); local != nil && err == nil { + if local, err := decodeNode(key, blob, 0); local != nil && err == nil { return } // Assemble the new sub-trie sync request @@ -158,7 +158,7 @@ func (s *TrieSync) Process(results []SyncResult) (int, error) { continue } // Decode the node data content and update the request - node, err := decodeNode(item.Hash[:], item.Data) + node, err := decodeNode(item.Hash[:], item.Data, 0) if err != nil { return i, err } @@ -246,7 +246,7 @@ func (s *TrieSync) children(req *request) ([]*request, error) { if node, ok := (*child.node).(hashNode); ok { // Try to resolve the node from the local database blob, _ := s.database.Get(node) - if local, err := decodeNode(node[:], blob); local != nil && err == nil { + if local, err := decodeNode(node[:], blob, 0); local != nil && err == nil { *child.node = local continue } diff --git a/trie/trie.go b/trie/trie.go index 65005bae8..914bf20fa 100644 --- a/trie/trie.go +++ b/trie/trie.go @@ -20,6 +20,7 @@ package trie import ( "bytes" "fmt" + "sync/atomic" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto/sha3" @@ -34,6 +35,17 @@ var ( emptyState common.Hash ) +// cacheMisses maintains the number of times a trie node was loaded from disk. +// Always use atomic operations when accessing this global variable. +var cacheMisses uint64 + +// CacheMisses retrieves a global counter measuring the number of cache misses +// the trie did since process startup. This isn't useful for anything apart from +// trie debugging purposes. +func CacheMisses() uint64 { + return atomic.LoadUint64(&cacheMisses) +} + func init() { sha3.NewKeccak256().Sum(emptyState[:0]) } @@ -93,13 +105,11 @@ func New(root common.Hash, db Database) (*Trie, error) { if db == nil { panic("trie.New: cannot use existing root without a database") } - if v, _ := trie.db.Get(root[:]); len(v) == 0 { - return nil, &MissingNodeError{ - RootHash: root, - NodeHash: root, - } + rootnode, err := trie.resolveHash(root[:], nil, nil) + if err != nil { + return nil, err } - trie.root = hashNode(root.Bytes()) + trie.root = rootnode } return trie, nil } @@ -146,14 +156,15 @@ func (t *Trie) tryGet(origNode node, key []byte, pos int) (value []byte, newnode if err == nil && didResolve { n = n.copy() n.Val = newnode + n.flags.gen = t.cachegen } return value, n, didResolve, err case *fullNode: value, newnode, didResolve, err = t.tryGet(n.Children[key[pos]], key, pos+1) if err == nil && didResolve { n = n.copy() + n.flags.gen = t.cachegen n.Children[key[pos]] = newnode - } return value, n, didResolve, err case hashNode: @@ -249,7 +260,8 @@ func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error return false, n, err } n = n.copy() - n.Children[key[0]], n.flags.hash, n.flags.dirty = nn, nil, true + n.flags = t.newFlag() + n.Children[key[0]] = nn return true, n, nil case nil: @@ -333,7 +345,8 @@ func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) { return false, n, err } n = n.copy() - n.Children[key[0]], n.flags.hash, n.flags.dirty = nn, nil, true + n.flags = t.newFlag() + n.Children[key[0]] = nn // Check how many non-nil entries are left after deleting and // reduce the full node to a short node if only one entry is @@ -419,6 +432,8 @@ func (t *Trie) resolve(n node, prefix, suffix []byte) (node, error) { } func (t *Trie) resolveHash(n hashNode, prefix, suffix []byte) (node, error) { + atomic.AddUint64(&cacheMisses, 1) + enc, err := t.db.Get(n) if err != nil || enc == nil { return nil, &MissingNodeError{ @@ -429,7 +444,7 @@ func (t *Trie) resolveHash(n hashNode, prefix, suffix []byte) (node, error) { SuffixLen: len(suffix), } } - dec := mustDecodeNode(n, enc) + dec := mustDecodeNode(n, enc, t.cachegen) return dec, nil } diff --git a/trie/trie_test.go b/trie/trie_test.go index 32fbe6801..14ac5a666 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -300,25 +300,6 @@ func TestReplication(t *testing.T) { } } -// Not an actual test -func TestOutput(t *testing.T) { - t.Skip() - - base := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" - trie := newEmpty() - for i := 0; i < 50; i++ { - updateString(trie, fmt.Sprintf("%s%d", base, i), "valueeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee") - } - fmt.Println("############################## FULL ################################") - fmt.Println(trie.root) - - trie.Commit() - fmt.Println("############################## SMALL ################################") - trie2, _ := New(trie.Hash(), trie.db) - getString(trie2, base+"20") - fmt.Println(trie2.root) -} - func TestLargeValue(t *testing.T) { trie := newEmpty() trie.Update([]byte("key1"), []byte{99, 99, 99, 99}) @@ -326,14 +307,56 @@ func TestLargeValue(t *testing.T) { trie.Hash() } +type countingDB struct { + Database + gets map[string]int +} + +func (db *countingDB) Get(key []byte) ([]byte, error) { + db.gets[string(key)]++ + return db.Database.Get(key) +} + +// TestCacheUnload checks that decoded nodes are unloaded after a +// certain number of commit operations. +func TestCacheUnload(t *testing.T) { + // Create test trie with two branches. + trie := newEmpty() + key1 := "---------------------------------" + key2 := "---some other branch" + updateString(trie, key1, "this is the branch of key1.") + updateString(trie, key2, "this is the branch of key2.") + root, _ := trie.Commit() + + // Commit the trie repeatedly and access key1. + // The branch containing it is loaded from DB exactly two times: + // in the 0th and 6th iteration. + db := &countingDB{Database: trie.db, gets: make(map[string]int)} + trie, _ = New(root, db) + trie.SetCacheLimit(5) + for i := 0; i < 12; i++ { + getString(trie, key1) + trie.Commit() + } + + // Check that it got loaded two times. + for dbkey, count := range db.gets { + if count != 2 { + t.Errorf("db key %x loaded %d times, want %d times", []byte(dbkey), count, 2) + } + } +} + +// randTest performs random trie operations. +// Instances of this test are created by Generate. +type randTest []randTestStep + type randTestStep struct { op int key []byte // for opUpdate, opDelete, opGet value []byte // for opUpdate } -type randTest []randTestStep - const ( opUpdate = iota opDelete @@ -342,6 +365,7 @@ const ( opHash opReset opItercheckhash + opCheckCacheInvariant opMax // boundary value, not an actual op ) @@ -437,6 +461,44 @@ func runRandTest(rt randTest) bool { fmt.Println("hashes not equal") return false } + case opCheckCacheInvariant: + return checkCacheInvariant(tr.root, nil, tr.cachegen, false, 0) + } + } + return true +} + +func checkCacheInvariant(n, parent node, parentCachegen uint16, parentDirty bool, depth int) bool { + var children []node + var flag nodeFlag + switch n := n.(type) { + case *shortNode: + flag = n.flags + children = []node{n.Val} + case *fullNode: + flag = n.flags + children = n.Children[:] + default: + return true + } + + showerror := func() { + fmt.Printf("at depth %d node %s", depth, spew.Sdump(n)) + fmt.Printf("parent: %s", spew.Sdump(parent)) + } + if flag.gen > parentCachegen { + fmt.Printf("cache invariant violation: %d > %d\n", flag.gen, parentCachegen) + showerror() + return false + } + if depth > 0 && !parentDirty && flag.dirty { + fmt.Printf("cache invariant violation: child is dirty but parent isn't\n") + showerror() + return false + } + for _, child := range children { + if !checkCacheInvariant(child, n, flag.gen, flag.dirty, depth+1) { + return false } } return true |