This commit is contained in:
Novikov Kirill 2026-05-21 21:54:32 -07:00 committed by GitHub
commit c184320294
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 184 additions and 3 deletions

View file

@ -122,12 +122,12 @@ func (c *collector) addTimer(name string, m *metrics.TimerSnapshot) {
} }
func (c *collector) addResettingTimer(name string, m *metrics.ResettingTimerSnapshot) { func (c *collector) addResettingTimer(name string, m *metrics.ResettingTimerSnapshot) {
if m.Count() <= 0 { if m.TotalCount() <= 0 {
return return
} }
pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}
ps := m.Percentiles(pv) ps := m.Percentiles(pv)
c.writeSummaryCounter(name, m.Count()) c.writeSummaryCounter(name, m.TotalCount())
c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, mutateKey(name))) c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, mutateKey(name)))
for i := range pv { for i := range pv {
c.writeSummaryPercentile(name, strconv.FormatFloat(pv[i], 'f', -1, 64), ps[i]) c.writeSummaryPercentile(name, strconv.FormatFloat(pv[i], 'f', -1, 64), ps[i])

View file

@ -21,6 +21,7 @@ import (
"os" "os"
"strings" "strings"
"testing" "testing"
"time"
"github.com/ethereum/go-ethereum/metrics" "github.com/ethereum/go-ethereum/metrics"
"github.com/ethereum/go-ethereum/metrics/internal" "github.com/ethereum/go-ethereum/metrics/internal"
@ -51,6 +52,49 @@ func TestCollector(t *testing.T) {
} }
} }
func TestResettingTimerCumulativePrometheus(t *testing.T) {
registry := metrics.NewRegistry()
timer := metrics.NewRegisteredResettingTimer("test/resetting", registry)
// First batch of updates.
timer.Update(10 * time.Millisecond)
timer.Update(20 * time.Millisecond)
// First scrape.
c1 := newCollector()
registry.Each(func(name string, i interface{}) {
c1.Add(name, i)
})
out1 := c1.buff.String()
if !strings.Contains(out1, "test_resetting_count 2") {
t.Fatalf("first scrape should have count 2, got:\n%s", out1)
}
// Second batch.
timer.Update(30 * time.Millisecond)
// Second scrape - count should be cumulative (3, not 1).
c2 := newCollector()
registry.Each(func(name string, i interface{}) {
c2.Add(name, i)
})
out2 := c2.buff.String()
if !strings.Contains(out2, "test_resetting_count 3") {
t.Fatalf("second scrape should have cumulative count 3, got:\n%s", out2)
}
// Third scrape with no new updates - count should stay at 3.
c3 := newCollector()
registry.Each(func(name string, i interface{}) {
c3.Add(name, i)
})
out3 := c3.buff.String()
// With no new events and totalCount > 0, we still need to report.
if !strings.Contains(out3, "test_resetting_count 3") {
t.Fatalf("third scrape should still report cumulative count 3, got:\n%s", out3)
}
}
func findFirstDiffPos(a, b string) string { func findFirstDiffPos(a, b string) string {
yy := strings.Split(b, "\n") yy := strings.Split(b, "\n")
for i, x := range strings.Split(a, "\n") { for i, x := range strings.Split(a, "\n") {

View file

@ -1,5 +1,7 @@
package metrics package metrics
import "sync"
// ResettingSample converts an ordinary sample into one that resets whenever its // ResettingSample converts an ordinary sample into one that resets whenever its
// snapshot is retrieved. This will break for multi-monitor systems, but when only // snapshot is retrieved. This will break for multi-monitor systems, but when only
// a single metric is being pushed out, this ensure that low-frequency events don't // a single metric is being pushed out, this ensure that low-frequency events don't
@ -14,11 +16,29 @@ func ResettingSample(sample Sample) Sample {
// snapshot retrieval. // snapshot retrieval.
type resettingSample struct { type resettingSample struct {
Sample Sample
mutex sync.Mutex
totalCount int64 // cumulative count across all snapshots
totalSum int64 // cumulative sum across all snapshots
} }
// Snapshot returns a read-only copy of the sample with the original reset. // Snapshot returns a read-only copy of the sample with the original reset.
// The returned snapshot has cumulative count and sum values that monotonically
// increase across resets, as required by the Prometheus counter convention.
func (rs *resettingSample) Snapshot() *sampleSnapshot { func (rs *resettingSample) Snapshot() *sampleSnapshot {
rs.mutex.Lock()
defer rs.mutex.Unlock()
s := rs.Sample.Snapshot() s := rs.Sample.Snapshot()
rs.Sample.Clear() rs.Sample.Clear()
// Accumulate cumulative totals from this snapshot's window.
rs.totalCount += s.count
rs.totalSum += s.sum
// Override count and sum with cumulative values so that Prometheus
// _count and _sum are monotonically increasing counters.
s.count = rs.totalCount
s.sum = rs.totalSum
return s return s
} }

View file

@ -36,6 +36,9 @@ type ResettingTimer struct {
values []int64 values []int64
sum int64 // sum is a running count of the total sum, used later to calculate mean sum int64 // sum is a running count of the total sum, used later to calculate mean
totalCount int64 // cumulative count across all snapshots
totalSum int64 // cumulative sum across all snapshots
mutex sync.Mutex mutex sync.Mutex
} }
@ -43,7 +46,15 @@ type ResettingTimer struct {
func (t *ResettingTimer) Snapshot() *ResettingTimerSnapshot { func (t *ResettingTimer) Snapshot() *ResettingTimerSnapshot {
t.mutex.Lock() t.mutex.Lock()
defer t.mutex.Unlock() defer t.mutex.Unlock()
snapshot := &ResettingTimerSnapshot{}
// Accumulate cumulative totals before resetting.
t.totalCount += int64(len(t.values))
t.totalSum += t.sum
snapshot := &ResettingTimerSnapshot{
totalCount: t.totalCount,
totalSum: t.totalSum,
}
if len(t.values) > 0 { if len(t.values) > 0 {
snapshot.mean = float64(t.sum) / float64(len(t.values)) snapshot.mean = float64(t.sum) / float64(len(t.values))
snapshot.values = t.values snapshot.values = t.values
@ -84,6 +95,8 @@ type ResettingTimerSnapshot struct {
min int64 min int64
thresholdBoundaries []float64 thresholdBoundaries []float64
calculated bool calculated bool
totalCount int64 // cumulative count across all snapshots
totalSum int64 // cumulative sum across all snapshots
} }
// Count return the length of the values from snapshot. // Count return the length of the values from snapshot.
@ -91,6 +104,16 @@ func (t *ResettingTimerSnapshot) Count() int {
return len(t.values) return len(t.values)
} }
// TotalCount returns the cumulative count of events across all snapshots.
func (t *ResettingTimerSnapshot) TotalCount() int64 {
return t.totalCount
}
// TotalSum returns the cumulative sum of event durations across all snapshots.
func (t *ResettingTimerSnapshot) TotalSum() int64 {
return t.totalSum
}
// Percentiles returns the boundaries for the input percentiles. // Percentiles returns the boundaries for the input percentiles.
// note: this method is not thread safe // note: this method is not thread safe
func (t *ResettingTimerSnapshot) Percentiles(percentiles []float64) []float64 { func (t *ResettingTimerSnapshot) Percentiles(percentiles []float64) []float64 {

View file

@ -5,6 +5,100 @@ import (
"time" "time"
) )
func TestResettingTimerCumulativeCountAndSum(t *testing.T) {
timer := NewResettingTimer()
// First batch of updates.
timer.Update(10 * time.Millisecond)
timer.Update(20 * time.Millisecond)
timer.Update(30 * time.Millisecond)
snap1 := timer.Snapshot()
if have, want := snap1.Count(), 3; have != want {
t.Fatalf("snap1 count: have %d, want %d", have, want)
}
if have, want := snap1.TotalCount(), int64(3); have != want {
t.Fatalf("snap1 total count: have %d, want %d", have, want)
}
wantSum1 := int64(10*time.Millisecond + 20*time.Millisecond + 30*time.Millisecond)
if have := snap1.TotalSum(); have != wantSum1 {
t.Fatalf("snap1 total sum: have %d, want %d", have, wantSum1)
}
// Second batch of updates (after snapshot reset the values).
timer.Update(40 * time.Millisecond)
timer.Update(50 * time.Millisecond)
snap2 := timer.Snapshot()
// Per-window count should be 2.
if have, want := snap2.Count(), 2; have != want {
t.Fatalf("snap2 count: have %d, want %d", have, want)
}
// Cumulative count should be 5 (3 + 2).
if have, want := snap2.TotalCount(), int64(5); have != want {
t.Fatalf("snap2 total count: have %d, want %d", have, want)
}
// Cumulative sum should include both batches.
wantSum2 := wantSum1 + int64(40*time.Millisecond+50*time.Millisecond)
if have := snap2.TotalSum(); have != wantSum2 {
t.Fatalf("snap2 total sum: have %d, want %d", have, wantSum2)
}
// Empty snapshot should still report the same cumulative totals.
snap3 := timer.Snapshot()
if have, want := snap3.Count(), 0; have != want {
t.Fatalf("snap3 count: have %d, want %d", have, want)
}
if have, want := snap3.TotalCount(), int64(5); have != want {
t.Fatalf("snap3 total count: have %d, want %d", have, want)
}
if have := snap3.TotalSum(); have != wantSum2 {
t.Fatalf("snap3 total sum: have %d, want %d", have, wantSum2)
}
}
func TestResettingSampleCumulativeCountAndSum(t *testing.T) {
Enable()
s := ResettingSample(NewUniformSample(100))
// First batch.
s.Update(10)
s.Update(20)
s.Update(30)
snap1 := s.Snapshot()
if have, want := snap1.Count(), int64(3); have != want {
t.Fatalf("snap1 count: have %d, want %d", have, want)
}
if have, want := snap1.Sum(), int64(60); have != want {
t.Fatalf("snap1 sum: have %d, want %d", have, want)
}
// Second batch.
s.Update(40)
s.Update(50)
snap2 := s.Snapshot()
// Count should be cumulative: 3 + 2 = 5.
if have, want := snap2.Count(), int64(5); have != want {
t.Fatalf("snap2 count: have %d, want %d", have, want)
}
// Sum should be cumulative: 60 + 90 = 150.
if have, want := snap2.Sum(), int64(150); have != want {
t.Fatalf("snap2 sum: have %d, want %d", have, want)
}
// Empty snapshot should still report cumulative totals.
snap3 := s.Snapshot()
if have, want := snap3.Count(), int64(5); have != want {
t.Fatalf("snap3 count: have %d, want %d", have, want)
}
if have, want := snap3.Sum(), int64(150); have != want {
t.Fatalf("snap3 sum: have %d, want %d", have, want)
}
}
func TestResettingTimer(t *testing.T) { func TestResettingTimer(t *testing.T) {
tests := []struct { tests := []struct {
values []int64 values []int64