From 10181b57a9fb648f5fd424ca611820a3cf42c42b Mon Sep 17 00:00:00 2001
From: Felix Lange <fjl@users.noreply.github.com>
Date: Sat, 9 Sep 2017 18:03:07 +0200
Subject: core, eth/downloader: commit block data using batches (#15115)

* ethdb: add Putter interface and Has method

* ethdb: improve docs and add IdealBatchSize

* ethdb: remove memory batch lock

Batches are not safe for concurrent use.

* core: use ethdb.Putter for Write* functions

This covers the easy cases.

* core/state: simplify StateSync

* trie: optimize local node check

* ethdb: add ValueSize to Batch

* core: optimize HasHeader check

This avoids one random database read get the block number. For many uses
of HasHeader, the expectation is that it's actually there. Using Has
avoids a load + decode of the value.

* core: write fast sync block data in batches

Collect writes into batches up to the ideal size instead of issuing many
small, concurrent writes.

* eth/downloader: commit larger state batches

Collect nodes into a batch up to the ideal size instead of committing
whenever a node is received.

* core: optimize HasBlock check

This avoids a random database read to get the number.

* core: use numberCache in HasHeader

numberCache has higher capacity, increasing the odds of finding the
header without a database lookup.

* core: write imported block data using a batch

Restore batch writes of state and add blocks, tx entries, receipts to
the same batch. The change also simplifies the miner.

This commit also removes posting of logs when a forked block is imported.

* core: fix DB write error handling

* ethdb: use RLock for Has

* core: fix HasBlock comment
---
 ethdb/database.go        | 24 ++++++++++++++++++++----
 ethdb/interface.go       | 18 ++++++++++++++++--
 ethdb/memory_database.go | 21 ++++++++++++++-------
 3 files changed, 50 insertions(+), 13 deletions(-)

(limited to 'ethdb')

diff --git a/ethdb/database.go b/ethdb/database.go
index 7d5fb0b9e..93755dd7e 100644
--- a/ethdb/database.go
+++ b/ethdb/database.go
@@ -109,6 +109,10 @@ func (db *LDBDatabase) Put(key []byte, value []byte) error {
 	return db.db.Put(key, value, nil)
 }
 
+func (db *LDBDatabase) Has(key []byte) (bool, error) {
+	return db.db.Has(key, nil)
+}
+
 // Get returns the given key if it's present.
 func (db *LDBDatabase) Get(key []byte) ([]byte, error) {
 	// Measure the database get latency, if requested
@@ -271,19 +275,19 @@ func (db *LDBDatabase) meter(refresh time.Duration) {
 	}
 }
 
-// TODO: remove this stuff and expose leveldb directly
-
 func (db *LDBDatabase) NewBatch() Batch {
 	return &ldbBatch{db: db.db, b: new(leveldb.Batch)}
 }
 
 type ldbBatch struct {
-	db *leveldb.DB
-	b  *leveldb.Batch
+	db   *leveldb.DB
+	b    *leveldb.Batch
+	size int
 }
 
 func (b *ldbBatch) Put(key, value []byte) error {
 	b.b.Put(key, value)
+	b.size += len(value)
 	return nil
 }
 
@@ -291,6 +295,10 @@ func (b *ldbBatch) Write() error {
 	return b.db.Write(b.b, nil)
 }
 
+func (b *ldbBatch) ValueSize() int {
+	return b.size
+}
+
 type table struct {
 	db     Database
 	prefix string
@@ -309,6 +317,10 @@ func (dt *table) Put(key []byte, value []byte) error {
 	return dt.db.Put(append([]byte(dt.prefix), key...), value)
 }
 
+func (dt *table) Has(key []byte) (bool, error) {
+	return dt.db.Has(append([]byte(dt.prefix), key...))
+}
+
 func (dt *table) Get(key []byte) ([]byte, error) {
 	return dt.db.Get(append([]byte(dt.prefix), key...))
 }
@@ -342,3 +354,7 @@ func (tb *tableBatch) Put(key, value []byte) error {
 func (tb *tableBatch) Write() error {
 	return tb.batch.Write()
 }
+
+func (tb *tableBatch) ValueSize() int {
+	return tb.batch.ValueSize()
+}
diff --git a/ethdb/interface.go b/ethdb/interface.go
index f4b787a52..99a5b770d 100644
--- a/ethdb/interface.go
+++ b/ethdb/interface.go
@@ -16,15 +16,29 @@
 
 package ethdb
 
-type Database interface {
+// Code using batches should try to add this much data to the batch.
+// The value was determined empirically.
+const IdealBatchSize = 100 * 1024
+
+// Putter wraps the database write operation supported by both batches and regular databases.
+type Putter interface {
 	Put(key []byte, value []byte) error
+}
+
+// Database wraps all database operations. All methods are safe for concurrent use.
+type Database interface {
+	Putter
 	Get(key []byte) ([]byte, error)
+	Has(key []byte) (bool, error)
 	Delete(key []byte) error
 	Close()
 	NewBatch() Batch
 }
 
+// Batch is a write-only database that commits changes to its host database
+// when Write is called. Batch cannot be used concurrently.
 type Batch interface {
-	Put(key, value []byte) error
+	Putter
+	ValueSize() int // amount of data in the batch
 	Write() error
 }
diff --git a/ethdb/memory_database.go b/ethdb/memory_database.go
index 11b093724..699bd0c9f 100644
--- a/ethdb/memory_database.go
+++ b/ethdb/memory_database.go
@@ -45,6 +45,14 @@ func (db *MemDatabase) Put(key []byte, value []byte) error {
 	return nil
 }
 
+func (db *MemDatabase) Has(key []byte) (bool, error) {
+	db.lock.RLock()
+	defer db.lock.RUnlock()
+
+	_, ok := db.db[string(key)]
+	return ok, nil
+}
+
 func (db *MemDatabase) Get(key []byte) ([]byte, error) {
 	db.lock.RLock()
 	defer db.lock.RUnlock()
@@ -93,21 +101,16 @@ type kv struct{ k, v []byte }
 type memBatch struct {
 	db     *MemDatabase
 	writes []kv
-	lock   sync.RWMutex
+	size   int
 }
 
 func (b *memBatch) Put(key, value []byte) error {
-	b.lock.Lock()
-	defer b.lock.Unlock()
-
 	b.writes = append(b.writes, kv{common.CopyBytes(key), common.CopyBytes(value)})
+	b.size += len(value)
 	return nil
 }
 
 func (b *memBatch) Write() error {
-	b.lock.RLock()
-	defer b.lock.RUnlock()
-
 	b.db.lock.Lock()
 	defer b.db.lock.Unlock()
 
@@ -116,3 +119,7 @@ func (b *memBatch) Write() error {
 	}
 	return nil
 }
+
+func (b *memBatch) ValueSize() int {
+	return b.size
+}
-- 
cgit v1.2.3