ethdb/pebble: expose stall counter of pebble (#31782)

This pull request adds a more Pebble metrics, tracking the amount of write stalls with specific reasons
2025-05-12 14:37:01 +08:00 · 2025-05-12 14:37:01 +08:00 · 098cc7e878
commit 098cc7e878
parent fa86416ce9
1 changed files with 72 additions and 17 deletions
--- a/ethdb/pebble/pebble.go
+++ b/ethdb/pebble/pebble.go
@ -21,6 +21,7 @@ import (
 	"bytes"
 	"fmt"
 	"runtime"
+	"strings"
 	"sync"
 	"sync/atomic"
 	"time"
@ -57,6 +58,7 @@ const (
 type Database struct {
 	fn        string     // filename for reporting
 	db        *pebble.DB // Underlying pebble storage engine
+	namespace string     // Namespace for metrics

 	compTimeMeter          *metrics.Meter   // Meter for measuring the total time spent in database compaction
 	compReadMeter          *metrics.Meter   // Meter for measuring the data read during compaction
@ -71,7 +73,17 @@ type Database struct {
 	nonlevel0CompGauge     *metrics.Gauge   // Gauge for tracking the number of table compaction in non0 level
 	seekCompGauge          *metrics.Gauge   // Gauge for tracking the number of table compaction caused by read opt
 	manualMemAllocGauge    *metrics.Gauge   // Gauge for tracking amount of non-managed memory currently allocated
-
+	liveMemTablesGauge     *metrics.Gauge   // Gauge for tracking the number of live memory tables
+	zombieMemTablesGauge   *metrics.Gauge   // Gauge for tracking the number of zombie memory tables
+	blockCacheHitGauge     *metrics.Gauge   // Gauge for tracking the number of total hit in the block cache
+	blockCacheMissGauge    *metrics.Gauge   // Gauge for tracking the number of total miss in the block cache
+	tableCacheHitGauge     *metrics.Gauge   // Gauge for tracking the number of total hit in the table cache
+	tableCacheMissGauge    *metrics.Gauge   // Gauge for tracking the number of total miss in the table cache
+	filterHitGauge         *metrics.Gauge   // Gauge for tracking the number of total hit in bloom filter
+	filterMissGauge        *metrics.Gauge   // Gauge for tracking the number of total miss in bloom filter
+	estimatedCompDebtGauge *metrics.Gauge   // Gauge for tracking the number of bytes that need to be compacted
+	liveCompGauge          *metrics.Gauge   // Gauge for tracking the number of in-progress compactions
+	liveCompSizeGauge      *metrics.Gauge   // Gauge for tracking the size of in-progress compactions
 	levelsGauge            []*metrics.Gauge // Gauge for tracking the number of tables in levels

 	quitLock sync.RWMutex    // Mutex protecting the quit channel and the closed flag
@ -88,6 +100,7 @@ type Database struct {

 	writeStalled        atomic.Bool  // Flag whether the write is stalled
 	writeDelayStartTime time.Time    // The start time of the latest write stall
+	writeDelayReason    string       // The reason of the latest write stall
 	writeDelayCount     atomic.Int64 // Total number of write stall counts
 	writeDelayTime      atomic.Int64 // Total time spent in write stalls

@ -120,11 +133,30 @@ func (d *Database) onWriteStallBegin(b pebble.WriteStallBeginInfo) {
 	d.writeDelayStartTime = time.Now()
 	d.writeDelayCount.Add(1)
 	d.writeStalled.Store(true)
+
+	// Take just the first word of the reason. These are two potential
+	// reasons for the write stall:
+	// - memtable count limit reached
+	// - L0 file count limit exceeded
+	reason := b.Reason
+	if i := strings.IndexByte(reason, ' '); i != -1 {
+		reason = reason[:i]
+	}
+	if reason == "L0" || reason == "memtable" {
+		d.writeDelayReason = reason
+		metrics.GetOrRegisterGauge(d.namespace+"stall/count/"+reason, nil).Inc(1)
+	}
 }

 func (d *Database) onWriteStallEnd() {
 	d.writeDelayTime.Add(int64(time.Since(d.writeDelayStartTime)))
 	d.writeStalled.Store(false)
+
+	if d.writeDelayReason != "" {
+		metrics.GetOrRegisterResettingTimer(d.namespace+"stall/time/"+d.writeDelayReason, nil).UpdateSince(d.writeDelayStartTime)
+		d.writeDelayReason = ""
+	}
+	d.writeDelayStartTime = time.Time{}
 }

 // panicLogger is just a noop logger to disable Pebble's internal logger.
@ -270,6 +302,17 @@ func New(file string, cache int, handles int, namespace string, readonly bool) (
 	db.nonlevel0CompGauge = metrics.GetOrRegisterGauge(namespace+"compact/nonlevel0", nil)
 	db.seekCompGauge = metrics.GetOrRegisterGauge(namespace+"compact/seek", nil)
 	db.manualMemAllocGauge = metrics.GetOrRegisterGauge(namespace+"memory/manualalloc", nil)
+	db.liveMemTablesGauge = metrics.GetOrRegisterGauge(namespace+"table/live", nil)
+	db.zombieMemTablesGauge = metrics.GetOrRegisterGauge(namespace+"table/zombie", nil)
+	db.blockCacheHitGauge = metrics.GetOrRegisterGauge(namespace+"cache/block/hit", nil)
+	db.blockCacheMissGauge = metrics.GetOrRegisterGauge(namespace+"cache/block/miss", nil)
+	db.tableCacheHitGauge = metrics.GetOrRegisterGauge(namespace+"cache/table/hit", nil)
+	db.tableCacheMissGauge = metrics.GetOrRegisterGauge(namespace+"cache/table/miss", nil)
+	db.filterHitGauge = metrics.GetOrRegisterGauge(namespace+"filter/hit", nil)
+	db.filterMissGauge = metrics.GetOrRegisterGauge(namespace+"filter/miss", nil)
+	db.estimatedCompDebtGauge = metrics.GetOrRegisterGauge(namespace+"compact/estimateDebt", nil)
+	db.liveCompGauge = metrics.GetOrRegisterGauge(namespace+"compact/live/count", nil)
+	db.liveCompSizeGauge = metrics.GetOrRegisterGauge(namespace+"compact/live/size", nil)

 	// Start up the metrics gathering and return
 	go db.meter(metricsGatheringInterval, namespace)
@ -517,6 +560,18 @@ func (d *Database) meter(refresh time.Duration, namespace string) {
 		d.nonlevel0CompGauge.Update(nonLevel0CompCount)
 		d.level0CompGauge.Update(level0CompCount)
 		d.seekCompGauge.Update(stats.Compact.ReadCount)
+		d.liveCompGauge.Update(stats.Compact.NumInProgress)
+		d.liveCompSizeGauge.Update(stats.Compact.InProgressBytes)
+
+		d.liveMemTablesGauge.Update(stats.MemTable.Count)
+		d.zombieMemTablesGauge.Update(stats.MemTable.ZombieCount)
+		d.estimatedCompDebtGauge.Update(int64(stats.Compact.EstimatedDebt))
+		d.tableCacheHitGauge.Update(stats.TableCache.Hits)
+		d.tableCacheMissGauge.Update(stats.TableCache.Misses)
+		d.blockCacheHitGauge.Update(stats.BlockCache.Hits)
+		d.blockCacheMissGauge.Update(stats.BlockCache.Misses)
+		d.filterHitGauge.Update(stats.Filter.Hits)
+		d.filterMissGauge.Update(stats.Filter.Misses)

 		for i, level := range stats.Levels {
 			// Append metrics for additional layers