From 55599ee95d4151a2502465e0afc7c47bd1acba77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Mon, 5 Feb 2018 18:40:32 +0200 Subject: core, trie: intermediate mempool between trie and database (#15857) This commit reduces database I/O by not writing every state trie to disk. --- core/state/database.go | 51 ++++++++++++++++++++++++++++++++++----------- core/state/iterator_test.go | 17 +++++++++------ core/state/state_object.go | 5 ++--- core/state/state_test.go | 6 +++--- core/state/statedb.go | 38 ++++++++++++++++++++++++++------- core/state/statedb_test.go | 14 ++++++------- core/state/sync_test.go | 44 +++++++++++++++++++------------------- 7 files changed, 114 insertions(+), 61 deletions(-) (limited to 'core/state') diff --git a/core/state/database.go b/core/state/database.go index 946625e76..36926ec69 100644 --- a/core/state/database.go +++ b/core/state/database.go @@ -40,16 +40,23 @@ const ( // Database wraps access to tries and contract code. type Database interface { - // Accessing tries: // OpenTrie opens the main account trie. - // OpenStorageTrie opens the storage trie of an account. OpenTrie(root common.Hash) (Trie, error) + + // OpenStorageTrie opens the storage trie of an account. OpenStorageTrie(addrHash, root common.Hash) (Trie, error) - // Accessing contract code: - ContractCode(addrHash, codeHash common.Hash) ([]byte, error) - ContractCodeSize(addrHash, codeHash common.Hash) (int, error) + // CopyTrie returns an independent copy of the given trie. CopyTrie(Trie) Trie + + // ContractCode retrieves a particular contract's code. + ContractCode(addrHash, codeHash common.Hash) ([]byte, error) + + // ContractCodeSize retrieves a particular contracts code's size. + ContractCodeSize(addrHash, codeHash common.Hash) (int, error) + + // TrieDB retrieves the low level trie database used for data storage. + TrieDB() *trie.Database } // Trie is a Ethereum Merkle Trie. @@ -57,26 +64,33 @@ type Trie interface { TryGet(key []byte) ([]byte, error) TryUpdate(key, value []byte) error TryDelete(key []byte) error - CommitTo(trie.DatabaseWriter) (common.Hash, error) + Commit(onleaf trie.LeafCallback) (common.Hash, error) Hash() common.Hash NodeIterator(startKey []byte) trie.NodeIterator GetKey([]byte) []byte // TODO(fjl): remove this when SecureTrie is removed + Prove(key []byte, fromLevel uint, proofDb ethdb.Putter) error } // NewDatabase creates a backing store for state. The returned database is safe for -// concurrent use and retains cached trie nodes in memory. +// concurrent use and retains cached trie nodes in memory. The pool is an optional +// intermediate trie-node memory pool between the low level storage layer and the +// high level trie abstraction. func NewDatabase(db ethdb.Database) Database { csc, _ := lru.New(codeSizeCacheSize) - return &cachingDB{db: db, codeSizeCache: csc} + return &cachingDB{ + db: trie.NewDatabase(db), + codeSizeCache: csc, + } } type cachingDB struct { - db ethdb.Database + db *trie.Database mu sync.Mutex pastTries []*trie.SecureTrie codeSizeCache *lru.Cache } +// OpenTrie opens the main account trie. func (db *cachingDB) OpenTrie(root common.Hash) (Trie, error) { db.mu.Lock() defer db.mu.Unlock() @@ -105,10 +119,12 @@ func (db *cachingDB) pushTrie(t *trie.SecureTrie) { } } +// OpenStorageTrie opens the storage trie of an account. func (db *cachingDB) OpenStorageTrie(addrHash, root common.Hash) (Trie, error) { return trie.NewSecure(root, db.db, 0) } +// CopyTrie returns an independent copy of the given trie. func (db *cachingDB) CopyTrie(t Trie) Trie { switch t := t.(type) { case cachedTrie: @@ -120,14 +136,16 @@ func (db *cachingDB) CopyTrie(t Trie) Trie { } } +// ContractCode retrieves a particular contract's code. func (db *cachingDB) ContractCode(addrHash, codeHash common.Hash) ([]byte, error) { - code, err := db.db.Get(codeHash[:]) + code, err := db.db.Node(codeHash) if err == nil { db.codeSizeCache.Add(codeHash, len(code)) } return code, err } +// ContractCodeSize retrieves a particular contracts code's size. func (db *cachingDB) ContractCodeSize(addrHash, codeHash common.Hash) (int, error) { if cached, ok := db.codeSizeCache.Get(codeHash); ok { return cached.(int), nil @@ -139,16 +157,25 @@ func (db *cachingDB) ContractCodeSize(addrHash, codeHash common.Hash) (int, erro return len(code), err } +// TrieDB retrieves any intermediate trie-node caching layer. +func (db *cachingDB) TrieDB() *trie.Database { + return db.db +} + // cachedTrie inserts its trie into a cachingDB on commit. type cachedTrie struct { *trie.SecureTrie db *cachingDB } -func (m cachedTrie) CommitTo(dbw trie.DatabaseWriter) (common.Hash, error) { - root, err := m.SecureTrie.CommitTo(dbw) +func (m cachedTrie) Commit(onleaf trie.LeafCallback) (common.Hash, error) { + root, err := m.SecureTrie.Commit(onleaf) if err == nil { m.db.pushTrie(m.SecureTrie) } return root, err } + +func (m cachedTrie) Prove(key []byte, fromLevel uint, proofDb ethdb.Putter) error { + return m.SecureTrie.Prove(key, fromLevel, proofDb) +} diff --git a/core/state/iterator_test.go b/core/state/iterator_test.go index ff66ba7a9..9e46c851c 100644 --- a/core/state/iterator_test.go +++ b/core/state/iterator_test.go @@ -21,12 +21,13 @@ import ( "testing" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" ) // Tests that the node iterator indeed walks over the entire database contents. func TestNodeIteratorCoverage(t *testing.T) { // Create some arbitrary test state to iterate - db, mem, root, _ := makeTestState() + db, root, _ := makeTestState() state, err := New(root, db) if err != nil { @@ -39,14 +40,18 @@ func TestNodeIteratorCoverage(t *testing.T) { hashes[it.Hash] = struct{}{} } } - - // Cross check the hashes and the database itself + // Cross check the iterated hashes and the database/nodepool content for hash := range hashes { - if _, err := mem.Get(hash.Bytes()); err != nil { - t.Errorf("failed to retrieve reported node %x: %v", hash, err) + if _, err := db.TrieDB().Node(hash); err != nil { + t.Errorf("failed to retrieve reported node %x", hash) + } + } + for _, hash := range db.TrieDB().Nodes() { + if _, ok := hashes[hash]; !ok { + t.Errorf("state entry not reported %x", hash) } } - for _, key := range mem.Keys() { + for _, key := range db.TrieDB().DiskDB().(*ethdb.MemDatabase).Keys() { if bytes.HasPrefix(key, []byte("secure-key-")) { continue } diff --git a/core/state/state_object.go b/core/state/state_object.go index b2378c69c..b2112bfae 100644 --- a/core/state/state_object.go +++ b/core/state/state_object.go @@ -25,7 +25,6 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto" "github.com/ethereum/go-ethereum/rlp" - "github.com/ethereum/go-ethereum/trie" ) var emptyCodeHash = crypto.Keccak256(nil) @@ -238,12 +237,12 @@ func (self *stateObject) updateRoot(db Database) { // CommitTrie the storage trie of the object to dwb. // This updates the trie root. -func (self *stateObject) CommitTrie(db Database, dbw trie.DatabaseWriter) error { +func (self *stateObject) CommitTrie(db Database) error { self.updateTrie(db) if self.dbErr != nil { return self.dbErr } - root, err := self.trie.CommitTo(dbw) + root, err := self.trie.Commit(nil) if err == nil { self.data.Root = root } diff --git a/core/state/state_test.go b/core/state/state_test.go index bbae3685b..6d42d63d8 100644 --- a/core/state/state_test.go +++ b/core/state/state_test.go @@ -48,7 +48,7 @@ func (s *StateSuite) TestDump(c *checker.C) { // write some of them to the trie s.state.updateStateObject(obj1) s.state.updateStateObject(obj2) - s.state.CommitTo(s.db, false) + s.state.Commit(false) // check that dump contains the state objects that are in trie got := string(s.state.Dump()) @@ -97,7 +97,7 @@ func (s *StateSuite) TestNull(c *checker.C) { //value := common.FromHex("0x823140710bf13990e4500136726d8b55") var value common.Hash s.state.SetState(address, common.Hash{}, value) - s.state.CommitTo(s.db, false) + s.state.Commit(false) value = s.state.GetState(address, common.Hash{}) if !common.EmptyHash(value) { c.Errorf("expected empty hash. got %x", value) @@ -155,7 +155,7 @@ func TestSnapshot2(t *testing.T) { so0.deleted = false state.setStateObject(so0) - root, _ := state.CommitTo(db, false) + root, _ := state.Commit(false) state.Reset(root) // and one with deleted == true diff --git a/core/state/statedb.go b/core/state/statedb.go index 8e29104d5..776693e24 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -36,6 +36,14 @@ type revision struct { journalIndex int } +var ( + // emptyState is the known hash of an empty state trie entry. + emptyState = crypto.Keccak256Hash(nil) + + // emptyCode is the known hash of the empty EVM bytecode. + emptyCode = crypto.Keccak256Hash(nil) +) + // StateDBs within the ethereum protocol are used to store anything // within the merkle trie. StateDBs take care of caching and storing // nested states. It's the general query interface to retrieve: @@ -235,6 +243,11 @@ func (self *StateDB) GetState(a common.Address, b common.Hash) common.Hash { return common.Hash{} } +// Database retrieves the low level database supporting the lower level trie ops. +func (self *StateDB) Database() Database { + return self.db +} + // StorageTrie returns the storage trie of an account. // The return value is a copy and is nil for non-existent accounts. func (self *StateDB) StorageTrie(a common.Address) Trie { @@ -568,8 +581,8 @@ func (s *StateDB) clearJournalAndRefund() { s.refund = 0 } -// CommitTo writes the state to the given database. -func (s *StateDB) CommitTo(dbw trie.DatabaseWriter, deleteEmptyObjects bool) (root common.Hash, err error) { +// Commit writes the state to the underlying in-memory trie database. +func (s *StateDB) Commit(deleteEmptyObjects bool) (root common.Hash, err error) { defer s.clearJournalAndRefund() // Commit objects to the trie. @@ -583,13 +596,11 @@ func (s *StateDB) CommitTo(dbw trie.DatabaseWriter, deleteEmptyObjects bool) (ro case isDirty: // Write any contract code associated with the state object if stateObject.code != nil && stateObject.dirtyCode { - if err := dbw.Put(stateObject.CodeHash(), stateObject.code); err != nil { - return common.Hash{}, err - } + s.db.TrieDB().Insert(common.BytesToHash(stateObject.CodeHash()), stateObject.code) stateObject.dirtyCode = false } // Write any storage changes in the state object to its storage trie. - if err := stateObject.CommitTrie(s.db, dbw); err != nil { + if err := stateObject.CommitTrie(s.db); err != nil { return common.Hash{}, err } // Update the object in the main account trie. @@ -598,7 +609,20 @@ func (s *StateDB) CommitTo(dbw trie.DatabaseWriter, deleteEmptyObjects bool) (ro delete(s.stateObjectsDirty, addr) } // Write trie changes. - root, err = s.trie.CommitTo(dbw) + root, err = s.trie.Commit(func(leaf []byte, parent common.Hash) error { + var account Account + if err := rlp.DecodeBytes(leaf, &account); err != nil { + return nil + } + if account.Root != emptyState { + s.db.TrieDB().Reference(account.Root, parent) + } + code := common.BytesToHash(account.CodeHash) + if code != emptyCode { + s.db.TrieDB().Reference(code, parent) + } + return nil + }) log.Debug("Trie cache stats after commit", "misses", trie.CacheMisses(), "unloads", trie.CacheUnloads()) return root, err } diff --git a/core/state/statedb_test.go b/core/state/statedb_test.go index 5c80e3aa5..d9e3d9b79 100644 --- a/core/state/statedb_test.go +++ b/core/state/statedb_test.go @@ -97,10 +97,10 @@ func TestIntermediateLeaks(t *testing.T) { } // Commit and cross check the databases. - if _, err := transState.CommitTo(transDb, false); err != nil { + if _, err := transState.Commit(false); err != nil { t.Fatalf("failed to commit transition state: %v", err) } - if _, err := finalState.CommitTo(finalDb, false); err != nil { + if _, err := finalState.Commit(false); err != nil { t.Fatalf("failed to commit final state: %v", err) } for _, key := range finalDb.Keys() { @@ -122,8 +122,8 @@ func TestIntermediateLeaks(t *testing.T) { // https://github.com/ethereum/go-ethereum/pull/15549. func TestCopy(t *testing.T) { // Create a random state test to copy and modify "independently" - mem, _ := ethdb.NewMemDatabase() - orig, _ := New(common.Hash{}, NewDatabase(mem)) + db, _ := ethdb.NewMemDatabase() + orig, _ := New(common.Hash{}, NewDatabase(db)) for i := byte(0); i < 255; i++ { obj := orig.GetOrNewStateObject(common.BytesToAddress([]byte{i})) @@ -346,11 +346,10 @@ func (test *snapshotTest) run() bool { } action.fn(action, state) } - // Revert all snapshots in reverse order. Each revert must yield a state // that is equivalent to fresh state with all actions up the snapshot applied. for sindex--; sindex >= 0; sindex-- { - checkstate, _ := New(common.Hash{}, NewDatabase(db)) + checkstate, _ := New(common.Hash{}, state.Database()) for _, action := range test.actions[:test.snapshots[sindex]] { action.fn(action, checkstate) } @@ -409,7 +408,7 @@ func (test *snapshotTest) checkEqual(state, checkstate *StateDB) error { func (s *StateSuite) TestTouchDelete(c *check.C) { s.state.GetOrNewStateObject(common.Address{}) - root, _ := s.state.CommitTo(s.db, false) + root, _ := s.state.Commit(false) s.state.Reset(root) snapshot := s.state.Snapshot() @@ -417,7 +416,6 @@ func (s *StateSuite) TestTouchDelete(c *check.C) { if len(s.state.stateObjectsDirty) != 1 { c.Fatal("expected one dirty state object") } - s.state.RevertToSnapshot(snapshot) if len(s.state.stateObjectsDirty) != 0 { c.Fatal("expected no dirty state object") diff --git a/core/state/sync_test.go b/core/state/sync_test.go index 06c572ea6..8f14a44e7 100644 --- a/core/state/sync_test.go +++ b/core/state/sync_test.go @@ -36,10 +36,10 @@ type testAccount struct { } // makeTestState create a sample test state to test node-wise reconstruction. -func makeTestState() (Database, *ethdb.MemDatabase, common.Hash, []*testAccount) { +func makeTestState() (Database, common.Hash, []*testAccount) { // Create an empty state - mem, _ := ethdb.NewMemDatabase() - db := NewDatabase(mem) + diskdb, _ := ethdb.NewMemDatabase() + db := NewDatabase(diskdb) state, _ := New(common.Hash{}, db) // Fill it with some arbitrary data @@ -61,10 +61,10 @@ func makeTestState() (Database, *ethdb.MemDatabase, common.Hash, []*testAccount) state.updateStateObject(obj) accounts = append(accounts, acc) } - root, _ := state.CommitTo(mem, false) + root, _ := state.Commit(false) // Return the generated state - return db, mem, root, accounts + return db, root, accounts } // checkStateAccounts cross references a reconstructed state with an expected @@ -96,7 +96,7 @@ func checkTrieConsistency(db ethdb.Database, root common.Hash) error { if v, _ := db.Get(root[:]); v == nil { return nil // Consider a non existent state consistent. } - trie, err := trie.New(root, db) + trie, err := trie.New(root, trie.NewDatabase(db)) if err != nil { return err } @@ -138,7 +138,7 @@ func TestIterativeStateSyncBatched(t *testing.T) { testIterativeStateSync(t, func testIterativeStateSync(t *testing.T, batch int) { // Create a random state to copy - _, srcMem, srcRoot, srcAccounts := makeTestState() + srcDb, srcRoot, srcAccounts := makeTestState() // Create a destination state and sync with the scheduler dstDb, _ := ethdb.NewMemDatabase() @@ -148,9 +148,9 @@ func testIterativeStateSync(t *testing.T, batch int) { for len(queue) > 0 { results := make([]trie.SyncResult, len(queue)) for i, hash := range queue { - data, err := srcMem.Get(hash.Bytes()) + data, err := srcDb.TrieDB().Node(hash) if err != nil { - t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + t.Fatalf("failed to retrieve node data for %x", hash) } results[i] = trie.SyncResult{Hash: hash, Data: data} } @@ -170,7 +170,7 @@ func testIterativeStateSync(t *testing.T, batch int) { // partial results are returned, and the others sent only later. func TestIterativeDelayedStateSync(t *testing.T) { // Create a random state to copy - _, srcMem, srcRoot, srcAccounts := makeTestState() + srcDb, srcRoot, srcAccounts := makeTestState() // Create a destination state and sync with the scheduler dstDb, _ := ethdb.NewMemDatabase() @@ -181,9 +181,9 @@ func TestIterativeDelayedStateSync(t *testing.T) { // Sync only half of the scheduled nodes results := make([]trie.SyncResult, len(queue)/2+1) for i, hash := range queue[:len(results)] { - data, err := srcMem.Get(hash.Bytes()) + data, err := srcDb.TrieDB().Node(hash) if err != nil { - t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + t.Fatalf("failed to retrieve node data for %x", hash) } results[i] = trie.SyncResult{Hash: hash, Data: data} } @@ -207,7 +207,7 @@ func TestIterativeRandomStateSyncBatched(t *testing.T) { testIterativeRandomS func testIterativeRandomStateSync(t *testing.T, batch int) { // Create a random state to copy - _, srcMem, srcRoot, srcAccounts := makeTestState() + srcDb, srcRoot, srcAccounts := makeTestState() // Create a destination state and sync with the scheduler dstDb, _ := ethdb.NewMemDatabase() @@ -221,9 +221,9 @@ func testIterativeRandomStateSync(t *testing.T, batch int) { // Fetch all the queued nodes in a random order results := make([]trie.SyncResult, 0, len(queue)) for hash := range queue { - data, err := srcMem.Get(hash.Bytes()) + data, err := srcDb.TrieDB().Node(hash) if err != nil { - t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + t.Fatalf("failed to retrieve node data for %x", hash) } results = append(results, trie.SyncResult{Hash: hash, Data: data}) } @@ -247,7 +247,7 @@ func testIterativeRandomStateSync(t *testing.T, batch int) { // partial results are returned (Even those randomly), others sent only later. func TestIterativeRandomDelayedStateSync(t *testing.T) { // Create a random state to copy - _, srcMem, srcRoot, srcAccounts := makeTestState() + srcDb, srcRoot, srcAccounts := makeTestState() // Create a destination state and sync with the scheduler dstDb, _ := ethdb.NewMemDatabase() @@ -263,9 +263,9 @@ func TestIterativeRandomDelayedStateSync(t *testing.T) { for hash := range queue { delete(queue, hash) - data, err := srcMem.Get(hash.Bytes()) + data, err := srcDb.TrieDB().Node(hash) if err != nil { - t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + t.Fatalf("failed to retrieve node data for %x", hash) } results = append(results, trie.SyncResult{Hash: hash, Data: data}) @@ -292,9 +292,9 @@ func TestIterativeRandomDelayedStateSync(t *testing.T) { // the database. func TestIncompleteStateSync(t *testing.T) { // Create a random state to copy - _, srcMem, srcRoot, srcAccounts := makeTestState() + srcDb, srcRoot, srcAccounts := makeTestState() - checkTrieConsistency(srcMem, srcRoot) + checkTrieConsistency(srcDb.TrieDB().DiskDB().(ethdb.Database), srcRoot) // Create a destination state and sync with the scheduler dstDb, _ := ethdb.NewMemDatabase() @@ -306,9 +306,9 @@ func TestIncompleteStateSync(t *testing.T) { // Fetch a batch of state nodes results := make([]trie.SyncResult, len(queue)) for i, hash := range queue { - data, err := srcMem.Get(hash.Bytes()) + data, err := srcDb.TrieDB().Node(hash) if err != nil { - t.Fatalf("failed to retrieve node data for %x: %v", hash, err) + t.Fatalf("failed to retrieve node data for %x", hash) } results[i] = trie.SyncResult{Hash: hash, Data: data} } -- cgit v1.2.3