aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorgary rong <garyrong0905@gmail.com>2018-04-16 16:18:24 +0800
committerPéter Szilágyi <peterke@gmail.com>2018-04-16 16:18:24 +0800
commit6b2b328cdb51a99e2ce55f0bc22eeb9733c9ec30 (patch)
tree16ae8a8ed2b36c1983fe82f553a6bb0d1ad88c3c
parent2a1fc3d155870ba2e42f108a232b4b6a3ad9d939 (diff)
downloaddexon-6b2b328cdb51a99e2ce55f0bc22eeb9733c9ec30.tar
dexon-6b2b328cdb51a99e2ce55f0bc22eeb9733c9ec30.tar.gz
dexon-6b2b328cdb51a99e2ce55f0bc22eeb9733c9ec30.tar.bz2
dexon-6b2b328cdb51a99e2ce55f0bc22eeb9733c9ec30.tar.lz
dexon-6b2b328cdb51a99e2ce55f0bc22eeb9733c9ec30.tar.xz
dexon-6b2b328cdb51a99e2ce55f0bc22eeb9733c9ec30.tar.zst
dexon-6b2b328cdb51a99e2ce55f0bc22eeb9733c9ec30.zip
ethdb: add leveldb write delay statistic (#16499)
-rw-r--r--ethdb/database.go95
1 files changed, 81 insertions, 14 deletions
diff --git a/ethdb/database.go b/ethdb/database.go
index 243b7f8d3..001d8f0bb 100644
--- a/ethdb/database.go
+++ b/ethdb/database.go
@@ -17,6 +17,7 @@
package ethdb
import (
+ "fmt"
"strconv"
"strings"
"sync"
@@ -32,17 +33,25 @@ import (
"github.com/syndtr/goleveldb/leveldb/util"
)
+const (
+ writeDelayNThreshold = 200
+ writeDelayThreshold = 350 * time.Millisecond
+ writeDelayWarningThrottler = 1 * time.Minute
+)
+
var OpenFileLimit = 64
type LDBDatabase struct {
fn string // filename for reporting
db *leveldb.DB // LevelDB instance
- compTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction
- compReadMeter metrics.Meter // Meter for measuring the data read during compaction
- compWriteMeter metrics.Meter // Meter for measuring the data written during compaction
- diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read
- diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written
+ compTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction
+ compReadMeter metrics.Meter // Meter for measuring the data read during compaction
+ compWriteMeter metrics.Meter // Meter for measuring the data written during compaction
+ writeDelayNMeter metrics.Meter // Meter for measuring the write delay number due to database compaction
+ writeDelayMeter metrics.Meter // Meter for measuring the write delay duration due to database compaction
+ diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read
+ diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written
quitLock sync.Mutex // Mutex protecting the quit channel access
quitChan chan chan error // Quit channel to stop the metrics collection before closing the database
@@ -147,16 +156,17 @@ func (db *LDBDatabase) LDB() *leveldb.DB {
// Meter configures the database metrics collectors and
func (db *LDBDatabase) Meter(prefix string) {
- // Short circuit metering if the metrics system is disabled
- if !metrics.Enabled {
- return
+ if metrics.Enabled {
+ // Initialize all the metrics collector at the requested prefix
+ db.compTimeMeter = metrics.NewRegisteredMeter(prefix+"compact/time", nil)
+ db.compReadMeter = metrics.NewRegisteredMeter(prefix+"compact/input", nil)
+ db.compWriteMeter = metrics.NewRegisteredMeter(prefix+"compact/output", nil)
+ db.diskReadMeter = metrics.NewRegisteredMeter(prefix+"disk/read", nil)
+ db.diskWriteMeter = metrics.NewRegisteredMeter(prefix+"disk/write", nil)
}
- // Initialize all the metrics collector at the requested prefix
- db.compTimeMeter = metrics.NewRegisteredMeter(prefix+"compact/time", nil)
- db.compReadMeter = metrics.NewRegisteredMeter(prefix+"compact/input", nil)
- db.compWriteMeter = metrics.NewRegisteredMeter(prefix+"compact/output", nil)
- db.diskReadMeter = metrics.NewRegisteredMeter(prefix+"disk/read", nil)
- db.diskWriteMeter = metrics.NewRegisteredMeter(prefix+"disk/write", nil)
+ // Initialize write delay metrics no matter we are in metric mode or not.
+ db.writeDelayMeter = metrics.NewRegisteredMeter(prefix+"compact/writedelay/duration", nil)
+ db.writeDelayNMeter = metrics.NewRegisteredMeter(prefix+"compact/writedelay/counter", nil)
// Create a quit channel for the periodic collector and run it
db.quitLock.Lock()
@@ -178,6 +188,9 @@ func (db *LDBDatabase) Meter(prefix string) {
// 2 | 523 | 1000.37159 | 7.26059 | 66.86342 | 66.77884
// 3 | 570 | 1113.18458 | 0.00000 | 0.00000 | 0.00000
//
+// This is how the write delay look like (currently):
+// DelayN:5 Delay:406.604657ms
+//
// This is how the iostats look like (currently):
// Read(MB):3895.04860 Write(MB):3654.64712
func (db *LDBDatabase) meter(refresh time.Duration) {
@@ -188,6 +201,14 @@ func (db *LDBDatabase) meter(refresh time.Duration) {
}
// Create storage for iostats.
var iostats [2]float64
+
+ // Create storage and warning log tracer for write delay.
+ var (
+ delaystats [2]int64
+ lastWriteDelay time.Time
+ lastWriteDelayN time.Time
+ )
+
// Iterate ad infinitum and collect the stats
for i := 1; ; i++ {
// Retrieve the database stats
@@ -236,6 +257,52 @@ func (db *LDBDatabase) meter(refresh time.Duration) {
db.compWriteMeter.Mark(int64((compactions[i%2][2] - compactions[(i-1)%2][2]) * 1024 * 1024))
}
+ // Retrieve the write delay statistic
+ writedelay, err := db.db.GetProperty("leveldb.writedelay")
+ if err != nil {
+ db.log.Error("Failed to read database write delay statistic", "err", err)
+ return
+ }
+ var (
+ delayN int64
+ delayDuration string
+ duration time.Duration
+ )
+ if n, err := fmt.Sscanf(writedelay, "DelayN:%d Delay:%s", &delayN, &delayDuration); n != 2 || err != nil {
+ db.log.Error("Write delay statistic not found")
+ return
+ }
+ duration, err = time.ParseDuration(delayDuration)
+ if err != nil {
+ db.log.Error("Failed to parse delay duration", "err", err)
+ return
+ }
+ if db.writeDelayNMeter != nil {
+ db.writeDelayNMeter.Mark(delayN - delaystats[0])
+ // If the write delay number been collected in the last minute exceeds the predefined threshold,
+ // print a warning log here.
+ // If a warning that db performance is laggy has been displayed,
+ // any subsequent warnings will be withhold for 1 minute to don't overwhelm the user.
+ if int(db.writeDelayNMeter.Rate1()) > writeDelayNThreshold &&
+ time.Now().After(lastWriteDelayN.Add(writeDelayWarningThrottler)) {
+ db.log.Warn("Write delay number exceeds the threshold (200 per second) in the last minute")
+ lastWriteDelayN = time.Now()
+ }
+ }
+ if db.writeDelayMeter != nil {
+ db.writeDelayMeter.Mark(duration.Nanoseconds() - delaystats[1])
+ // If the write delay duration been collected in the last minute exceeds the predefined threshold,
+ // print a warning log here.
+ // If a warning that db performance is laggy has been displayed,
+ // any subsequent warnings will be withhold for 1 minute to don't overwhelm the user.
+ if int64(db.writeDelayMeter.Rate1()) > writeDelayThreshold.Nanoseconds() &&
+ time.Now().After(lastWriteDelay.Add(writeDelayWarningThrottler)) {
+ db.log.Warn("Write delay duration exceeds the threshold (35% of the time) in the last minute")
+ lastWriteDelay = time.Now()
+ }
+ }
+ delaystats[0], delaystats[1] = delayN, duration.Nanoseconds()
+
// Retrieve the database iostats.
ioStats, err := db.db.GetProperty("leveldb.iostats")
if err != nil {