forked from forks/go-ethereum
ethdb/pebble: expose stall counter of pebble (#31782)
This pull request adds a more Pebble metrics, tracking the amount of write stalls with specific reasons
This commit is contained in:
parent
fa86416ce9
commit
098cc7e878
1 changed files with 72 additions and 17 deletions
|
|
@ -21,6 +21,7 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
|
@ -55,24 +56,35 @@ const (
|
||||||
// Apart from basic data storage functionality it also supports batch writes and
|
// Apart from basic data storage functionality it also supports batch writes and
|
||||||
// iterating over the keyspace in binary-alphabetical order.
|
// iterating over the keyspace in binary-alphabetical order.
|
||||||
type Database struct {
|
type Database struct {
|
||||||
fn string // filename for reporting
|
fn string // filename for reporting
|
||||||
db *pebble.DB // Underlying pebble storage engine
|
db *pebble.DB // Underlying pebble storage engine
|
||||||
|
namespace string // Namespace for metrics
|
||||||
|
|
||||||
compTimeMeter *metrics.Meter // Meter for measuring the total time spent in database compaction
|
compTimeMeter *metrics.Meter // Meter for measuring the total time spent in database compaction
|
||||||
compReadMeter *metrics.Meter // Meter for measuring the data read during compaction
|
compReadMeter *metrics.Meter // Meter for measuring the data read during compaction
|
||||||
compWriteMeter *metrics.Meter // Meter for measuring the data written during compaction
|
compWriteMeter *metrics.Meter // Meter for measuring the data written during compaction
|
||||||
writeDelayNMeter *metrics.Meter // Meter for measuring the write delay number due to database compaction
|
writeDelayNMeter *metrics.Meter // Meter for measuring the write delay number due to database compaction
|
||||||
writeDelayMeter *metrics.Meter // Meter for measuring the write delay duration due to database compaction
|
writeDelayMeter *metrics.Meter // Meter for measuring the write delay duration due to database compaction
|
||||||
diskSizeGauge *metrics.Gauge // Gauge for tracking the size of all the levels in the database
|
diskSizeGauge *metrics.Gauge // Gauge for tracking the size of all the levels in the database
|
||||||
diskReadMeter *metrics.Meter // Meter for measuring the effective amount of data read
|
diskReadMeter *metrics.Meter // Meter for measuring the effective amount of data read
|
||||||
diskWriteMeter *metrics.Meter // Meter for measuring the effective amount of data written
|
diskWriteMeter *metrics.Meter // Meter for measuring the effective amount of data written
|
||||||
memCompGauge *metrics.Gauge // Gauge for tracking the number of memory compaction
|
memCompGauge *metrics.Gauge // Gauge for tracking the number of memory compaction
|
||||||
level0CompGauge *metrics.Gauge // Gauge for tracking the number of table compaction in level0
|
level0CompGauge *metrics.Gauge // Gauge for tracking the number of table compaction in level0
|
||||||
nonlevel0CompGauge *metrics.Gauge // Gauge for tracking the number of table compaction in non0 level
|
nonlevel0CompGauge *metrics.Gauge // Gauge for tracking the number of table compaction in non0 level
|
||||||
seekCompGauge *metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt
|
seekCompGauge *metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt
|
||||||
manualMemAllocGauge *metrics.Gauge // Gauge for tracking amount of non-managed memory currently allocated
|
manualMemAllocGauge *metrics.Gauge // Gauge for tracking amount of non-managed memory currently allocated
|
||||||
|
liveMemTablesGauge *metrics.Gauge // Gauge for tracking the number of live memory tables
|
||||||
levelsGauge []*metrics.Gauge // Gauge for tracking the number of tables in levels
|
zombieMemTablesGauge *metrics.Gauge // Gauge for tracking the number of zombie memory tables
|
||||||
|
blockCacheHitGauge *metrics.Gauge // Gauge for tracking the number of total hit in the block cache
|
||||||
|
blockCacheMissGauge *metrics.Gauge // Gauge for tracking the number of total miss in the block cache
|
||||||
|
tableCacheHitGauge *metrics.Gauge // Gauge for tracking the number of total hit in the table cache
|
||||||
|
tableCacheMissGauge *metrics.Gauge // Gauge for tracking the number of total miss in the table cache
|
||||||
|
filterHitGauge *metrics.Gauge // Gauge for tracking the number of total hit in bloom filter
|
||||||
|
filterMissGauge *metrics.Gauge // Gauge for tracking the number of total miss in bloom filter
|
||||||
|
estimatedCompDebtGauge *metrics.Gauge // Gauge for tracking the number of bytes that need to be compacted
|
||||||
|
liveCompGauge *metrics.Gauge // Gauge for tracking the number of in-progress compactions
|
||||||
|
liveCompSizeGauge *metrics.Gauge // Gauge for tracking the size of in-progress compactions
|
||||||
|
levelsGauge []*metrics.Gauge // Gauge for tracking the number of tables in levels
|
||||||
|
|
||||||
quitLock sync.RWMutex // Mutex protecting the quit channel and the closed flag
|
quitLock sync.RWMutex // Mutex protecting the quit channel and the closed flag
|
||||||
quitChan chan chan error // Quit channel to stop the metrics collection before closing the database
|
quitChan chan chan error // Quit channel to stop the metrics collection before closing the database
|
||||||
|
|
@ -88,6 +100,7 @@ type Database struct {
|
||||||
|
|
||||||
writeStalled atomic.Bool // Flag whether the write is stalled
|
writeStalled atomic.Bool // Flag whether the write is stalled
|
||||||
writeDelayStartTime time.Time // The start time of the latest write stall
|
writeDelayStartTime time.Time // The start time of the latest write stall
|
||||||
|
writeDelayReason string // The reason of the latest write stall
|
||||||
writeDelayCount atomic.Int64 // Total number of write stall counts
|
writeDelayCount atomic.Int64 // Total number of write stall counts
|
||||||
writeDelayTime atomic.Int64 // Total time spent in write stalls
|
writeDelayTime atomic.Int64 // Total time spent in write stalls
|
||||||
|
|
||||||
|
|
@ -120,11 +133,30 @@ func (d *Database) onWriteStallBegin(b pebble.WriteStallBeginInfo) {
|
||||||
d.writeDelayStartTime = time.Now()
|
d.writeDelayStartTime = time.Now()
|
||||||
d.writeDelayCount.Add(1)
|
d.writeDelayCount.Add(1)
|
||||||
d.writeStalled.Store(true)
|
d.writeStalled.Store(true)
|
||||||
|
|
||||||
|
// Take just the first word of the reason. These are two potential
|
||||||
|
// reasons for the write stall:
|
||||||
|
// - memtable count limit reached
|
||||||
|
// - L0 file count limit exceeded
|
||||||
|
reason := b.Reason
|
||||||
|
if i := strings.IndexByte(reason, ' '); i != -1 {
|
||||||
|
reason = reason[:i]
|
||||||
|
}
|
||||||
|
if reason == "L0" || reason == "memtable" {
|
||||||
|
d.writeDelayReason = reason
|
||||||
|
metrics.GetOrRegisterGauge(d.namespace+"stall/count/"+reason, nil).Inc(1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *Database) onWriteStallEnd() {
|
func (d *Database) onWriteStallEnd() {
|
||||||
d.writeDelayTime.Add(int64(time.Since(d.writeDelayStartTime)))
|
d.writeDelayTime.Add(int64(time.Since(d.writeDelayStartTime)))
|
||||||
d.writeStalled.Store(false)
|
d.writeStalled.Store(false)
|
||||||
|
|
||||||
|
if d.writeDelayReason != "" {
|
||||||
|
metrics.GetOrRegisterResettingTimer(d.namespace+"stall/time/"+d.writeDelayReason, nil).UpdateSince(d.writeDelayStartTime)
|
||||||
|
d.writeDelayReason = ""
|
||||||
|
}
|
||||||
|
d.writeDelayStartTime = time.Time{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// panicLogger is just a noop logger to disable Pebble's internal logger.
|
// panicLogger is just a noop logger to disable Pebble's internal logger.
|
||||||
|
|
@ -270,6 +302,17 @@ func New(file string, cache int, handles int, namespace string, readonly bool) (
|
||||||
db.nonlevel0CompGauge = metrics.GetOrRegisterGauge(namespace+"compact/nonlevel0", nil)
|
db.nonlevel0CompGauge = metrics.GetOrRegisterGauge(namespace+"compact/nonlevel0", nil)
|
||||||
db.seekCompGauge = metrics.GetOrRegisterGauge(namespace+"compact/seek", nil)
|
db.seekCompGauge = metrics.GetOrRegisterGauge(namespace+"compact/seek", nil)
|
||||||
db.manualMemAllocGauge = metrics.GetOrRegisterGauge(namespace+"memory/manualalloc", nil)
|
db.manualMemAllocGauge = metrics.GetOrRegisterGauge(namespace+"memory/manualalloc", nil)
|
||||||
|
db.liveMemTablesGauge = metrics.GetOrRegisterGauge(namespace+"table/live", nil)
|
||||||
|
db.zombieMemTablesGauge = metrics.GetOrRegisterGauge(namespace+"table/zombie", nil)
|
||||||
|
db.blockCacheHitGauge = metrics.GetOrRegisterGauge(namespace+"cache/block/hit", nil)
|
||||||
|
db.blockCacheMissGauge = metrics.GetOrRegisterGauge(namespace+"cache/block/miss", nil)
|
||||||
|
db.tableCacheHitGauge = metrics.GetOrRegisterGauge(namespace+"cache/table/hit", nil)
|
||||||
|
db.tableCacheMissGauge = metrics.GetOrRegisterGauge(namespace+"cache/table/miss", nil)
|
||||||
|
db.filterHitGauge = metrics.GetOrRegisterGauge(namespace+"filter/hit", nil)
|
||||||
|
db.filterMissGauge = metrics.GetOrRegisterGauge(namespace+"filter/miss", nil)
|
||||||
|
db.estimatedCompDebtGauge = metrics.GetOrRegisterGauge(namespace+"compact/estimateDebt", nil)
|
||||||
|
db.liveCompGauge = metrics.GetOrRegisterGauge(namespace+"compact/live/count", nil)
|
||||||
|
db.liveCompSizeGauge = metrics.GetOrRegisterGauge(namespace+"compact/live/size", nil)
|
||||||
|
|
||||||
// Start up the metrics gathering and return
|
// Start up the metrics gathering and return
|
||||||
go db.meter(metricsGatheringInterval, namespace)
|
go db.meter(metricsGatheringInterval, namespace)
|
||||||
|
|
@ -517,6 +560,18 @@ func (d *Database) meter(refresh time.Duration, namespace string) {
|
||||||
d.nonlevel0CompGauge.Update(nonLevel0CompCount)
|
d.nonlevel0CompGauge.Update(nonLevel0CompCount)
|
||||||
d.level0CompGauge.Update(level0CompCount)
|
d.level0CompGauge.Update(level0CompCount)
|
||||||
d.seekCompGauge.Update(stats.Compact.ReadCount)
|
d.seekCompGauge.Update(stats.Compact.ReadCount)
|
||||||
|
d.liveCompGauge.Update(stats.Compact.NumInProgress)
|
||||||
|
d.liveCompSizeGauge.Update(stats.Compact.InProgressBytes)
|
||||||
|
|
||||||
|
d.liveMemTablesGauge.Update(stats.MemTable.Count)
|
||||||
|
d.zombieMemTablesGauge.Update(stats.MemTable.ZombieCount)
|
||||||
|
d.estimatedCompDebtGauge.Update(int64(stats.Compact.EstimatedDebt))
|
||||||
|
d.tableCacheHitGauge.Update(stats.TableCache.Hits)
|
||||||
|
d.tableCacheMissGauge.Update(stats.TableCache.Misses)
|
||||||
|
d.blockCacheHitGauge.Update(stats.BlockCache.Hits)
|
||||||
|
d.blockCacheMissGauge.Update(stats.BlockCache.Misses)
|
||||||
|
d.filterHitGauge.Update(stats.Filter.Hits)
|
||||||
|
d.filterMissGauge.Update(stats.Filter.Misses)
|
||||||
|
|
||||||
for i, level := range stats.Levels {
|
for i, level := range stats.Levels {
|
||||||
// Append metrics for additional layers
|
// Append metrics for additional layers
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue