core, eth/downloader: commit block data using batches (#15115)

* ethdb: add Putter interface and Has method * ethdb: improve docs and add IdealBatchSize * ethdb: remove memory batch lock Batches are not safe for concurrent use. * core: use ethdb.Putter for Write* functions This covers the easy cases. * core/state: simplify StateSync * trie: optimize local node check * ethdb: add ValueSize to Batch * core: optimize HasHeader check This avoids one random database read get the block number. For many uses of HasHeader, the expectation is that it's actually there. Using Has avoids a load + decode of the value. * core: write fast sync block data in batches Collect writes into batches up to the ideal size instead of issuing many small, concurrent writes. * eth/downloader: commit larger state batches Collect nodes into a batch up to the ideal size instead of committing whenever a node is received. * core: optimize HasBlock check This avoids a random database read to get the number. * core: use numberCache in HasHeader numberCache has higher capacity, increasing the odds of finding the header without a database lookup. * core: write imported block data using a batch Restore batch writes of state and add blocks, tx entries, receipts to the same batch. The change also simplifies the miner. This commit also removes posting of logs when a forked block is imported. * core: fix DB write error handling * ethdb: use RLock for Has * core: fix HasBlock comment
author: Felix Lange <fjl@users.noreply.github.com> 2017-09-10 00:03:07 +0800
committer: Péter Szilágyi <peterke@gmail.com> 2017-09-10 00:03:07 +0800
commit: 10181b57a9fb648f5fd424ca611820a3cf42c42b (patch)
tree: 8508c139bb867a6d2126fcbe6500cb08025ddbc1 /ethdb
parent: ac193e36ce4bce752717124433a8ce84c347dbf7 (diff)
download: go-tangerine-10181b57a9fb648f5fd424ca611820a3cf42c42b.tar
go-tangerine-10181b57a9fb648f5fd424ca611820a3cf42c42b.tar.gz
go-tangerine-10181b57a9fb648f5fd424ca611820a3cf42c42b.tar.bz2
go-tangerine-10181b57a9fb648f5fd424ca611820a3cf42c42b.tar.lz
go-tangerine-10181b57a9fb648f5fd424ca611820a3cf42c42b.tar.xz
go-tangerine-10181b57a9fb648f5fd424ca611820a3cf42c42b.tar.zst
go-tangerine-10181b57a9fb648f5fd424ca611820a3cf42c42b.zip
3 files changed, 50 insertions, 13 deletions
diff --git a/ethdb/database.go b/ethdb/database.go
index 7d5fb0b9e..93755dd7e 100644
--- a/ethdb/database.go
+++ b/ethdb/database.go
@@ -109,6 +109,10 @@ func (db *LDBDatabase) Put(key []byte, value []byte) error {
 	return db.db.Put(key, value, nil)
 }
 
+func (db *LDBDatabase) Has(key []byte) (bool, error) {
+	return db.db.Has(key, nil)
+}
+
 // Get returns the given key if it's present.
 func (db *LDBDatabase) Get(key []byte) ([]byte, error) {
 	// Measure the database get latency, if requested
@@ -271,19 +275,19 @@ func (db *LDBDatabase) meter(refresh time.Duration) {
 	}
 }
 
-// TODO: remove this stuff and expose leveldb directly
-
 func (db *LDBDatabase) NewBatch() Batch {
 	return &ldbBatch{db: db.db, b: new(leveldb.Batch)}
 }
 
 type ldbBatch struct {
-	db *leveldb.DB
-	b  *leveldb.Batch
+	db   *leveldb.DB
+	b    *leveldb.Batch
+	size int
 }
 
 func (b *ldbBatch) Put(key, value []byte) error {
 	b.b.Put(key, value)
+	b.size += len(value)
 	return nil
 }
 
@@ -291,6 +295,10 @@ func (b *ldbBatch) Write() error {
 	return b.db.Write(b.b, nil)
 }
 
+func (b *ldbBatch) ValueSize() int {
+	return b.size
+}
+
 type table struct {
 	db     Database
 	prefix string
@@ -309,6 +317,10 @@ func (dt *table) Put(key []byte, value []byte) error {
 	return dt.db.Put(append([]byte(dt.prefix), key...), value)
 }
 
+func (dt *table) Has(key []byte) (bool, error) {
+	return dt.db.Has(append([]byte(dt.prefix), key...))
+}
+
 func (dt *table) Get(key []byte) ([]byte, error) {
 	return dt.db.Get(append([]byte(dt.prefix), key...))
 }
@@ -342,3 +354,7 @@ func (tb *tableBatch) Put(key, value []byte) error {
 func (tb *tableBatch) Write() error {
 	return tb.batch.Write()
 }
+
+func (tb *tableBatch) ValueSize() int {
+	return tb.batch.ValueSize()
+}
diff --git a/ethdb/interface.go b/ethdb/interface.go
index f4b787a52..99a5b770d 100644
--- a/ethdb/interface.go
+++ b/ethdb/interface.go
@@ -16,15 +16,29 @@
 
 package ethdb
 
-type Database interface {
+// Code using batches should try to add this much data to the batch.
+// The value was determined empirically.
+const IdealBatchSize = 100 * 1024
+
+// Putter wraps the database write operation supported by both batches and regular databases.
+type Putter interface {
 	Put(key []byte, value []byte) error
+}
+
+// Database wraps all database operations. All methods are safe for concurrent use.
+type Database interface {
+	Putter
 	Get(key []byte) ([]byte, error)
+	Has(key []byte) (bool, error)
 	Delete(key []byte) error
 	Close()
 	NewBatch() Batch
 }
 
+// Batch is a write-only database that commits changes to its host database
+// when Write is called. Batch cannot be used concurrently.
 type Batch interface {
-	Put(key, value []byte) error
+	Putter
+	ValueSize() int // amount of data in the batch
 	Write() error
 }
diff --git a/ethdb/memory_database.go b/ethdb/memory_database.go
index 11b093724..699bd0c9f 100644
--- a/ethdb/memory_database.go
+++ b/ethdb/memory_database.go
@@ -45,6 +45,14 @@ func (db *MemDatabase) Put(key []byte, value []byte) error {
 	return nil
 }
 
+func (db *MemDatabase) Has(key []byte) (bool, error) {
+	db.lock.RLock()
+	defer db.lock.RUnlock()
+
+	_, ok := db.db[string(key)]
+	return ok, nil
+}
+
 func (db *MemDatabase) Get(key []byte) ([]byte, error) {
 	db.lock.RLock()
 	defer db.lock.RUnlock()
@@ -93,21 +101,16 @@ type kv struct{ k, v []byte }
 type memBatch struct {
 	db     *MemDatabase
 	writes []kv
-	lock   sync.RWMutex
+	size   int
 }
 
 func (b *memBatch) Put(key, value []byte) error {
-	b.lock.Lock()
-	defer b.lock.Unlock()
-
 	b.writes = append(b.writes, kv{common.CopyBytes(key), common.CopyBytes(value)})
+	b.size += len(value)
 	return nil
 }
 
 func (b *memBatch) Write() error {
-	b.lock.RLock()
-	defer b.lock.RUnlock()
-
 	b.db.lock.Lock()
 	defer b.db.lock.Unlock()
 
@@ -116,3 +119,7 @@ func (b *memBatch) Write() error {
 	}
 	return nil
 }
+
+func (b *memBatch) ValueSize() int {
+	return b.size
+}
author	Felix Lange <fjl@users.noreply.github.com>	2017-09-10 00:03:07 +0800
committer	Péter Szilágyi <peterke@gmail.com>	2017-09-10 00:03:07 +0800
commit	10181b57a9fb648f5fd424ca611820a3cf42c42b (patch)
tree	8508c139bb867a6d2126fcbe6500cb08025ddbc1 /ethdb
parent	ac193e36ce4bce752717124433a8ce84c347dbf7 (diff)
download	go-tangerine-10181b57a9fb648f5fd424ca611820a3cf42c42b.tar go-tangerine-10181b57a9fb648f5fd424ca611820a3cf42c42b.tar.gz go-tangerine-10181b57a9fb648f5fd424ca611820a3cf42c42b.tar.bz2 go-tangerine-10181b57a9fb648f5fd424ca611820a3cf42c42b.tar.lz go-tangerine-10181b57a9fb648f5fd424ca611820a3cf42c42b.tar.xz go-tangerine-10181b57a9fb648f5fd424ca611820a3cf42c42b.tar.zst go-tangerine-10181b57a9fb648f5fd424ca611820a3cf42c42b.zip