trie: use github.com/holiman/bloomfilter/v2 #22044 (#1059)

This commit is contained in:
Daniel Liu 2025-06-03 11:52:45 +08:00 committed by GitHub
parent c4a98d2ddb
commit b18f9f2705
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 17 additions and 47 deletions

3
go.mod
View file

@ -24,7 +24,6 @@ require (
github.com/pkg/errors v0.9.1
github.com/prometheus/prometheus v1.7.2-0.20170814170113-3101606756c5
github.com/rs/cors v1.7.0
github.com/steakknife/bloomfilter v0.0.0-20180922174646-6819c0d2a570
github.com/stretchr/testify v1.8.4
github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7
golang.org/x/crypto v0.29.0
@ -50,6 +49,7 @@ require (
github.com/golang-jwt/jwt/v4 v4.5.2
github.com/google/gofuzz v1.2.0
github.com/google/uuid v1.6.0
github.com/holiman/bloomfilter/v2 v2.0.3
github.com/influxdata/influxdb-client-go/v2 v2.4.0
github.com/influxdata/influxdb1-client v0.0.0-20220302092344-a9ab5670611c
github.com/karalabe/hid v1.0.1-0.20240306101548-573246063e52
@ -87,7 +87,6 @@ require (
github.com/rivo/uniseg v0.2.0 // indirect
github.com/rogpeppe/go-internal v1.9.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3 // indirect
github.com/supranational/blst v0.3.11 // indirect
github.com/tklauser/go-sysconf v0.3.14 // indirect
github.com/tklauser/numcpus v0.8.0 // indirect

6
go.sum
View file

@ -100,6 +100,8 @@ github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=
github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/holiman/bloomfilter/v2 v2.0.3 h1:73e0e/V0tCydx14a0SCYS/EWCxgwLZ18CZcZKVu0fao=
github.com/holiman/bloomfilter/v2 v2.0.3/go.mod h1:zpoh+gs7qcpqrHr3dB55AMiJwo0iURXE7ZOP9L9hSkA=
github.com/holiman/uint256 v1.2.4 h1:jUc4Nk8fm9jZabQuqr2JzednajVmBpC+oiTiXZJEApU=
github.com/holiman/uint256 v1.2.4/go.mod h1:EOMSn4q6Nyt9P6efbI3bueV4e1b3dGlUCXeiRV4ng7E=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
@ -197,10 +199,6 @@ github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible h1
github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/status-im/keycard-go v0.3.3 h1:qk/JHSkT9sMka+lVXrTOIVSgHIY7lDm46wrUqTsNa4s=
github.com/status-im/keycard-go v0.3.3/go.mod h1:wlp8ZLbsmrF6g6WjugPAx+IzoLrkdf9+mHxBEeo3Hbg=
github.com/steakknife/bloomfilter v0.0.0-20180922174646-6819c0d2a570 h1:gIlAHnH1vJb5vwEjIp5kBj/eu99p/bl0Ay2goiPe5xE=
github.com/steakknife/bloomfilter v0.0.0-20180922174646-6819c0d2a570/go.mod h1:8OR4w3TdeIHIh1g6EMY5p0gVNOovcWC+1vpc7naMuAw=
github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3 h1:njlZPzLwU639dk2kqnCPPv+wNjq7Xb6EfUxe/oX0/NM=
github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3/go.mod h1:hpGUWaI9xL8pRQCTXQgocU38Qw1g0Us7n5PxxTwTCYU=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=

View file

@ -19,7 +19,6 @@ package trie
import (
"encoding/binary"
"fmt"
"math"
"sync"
"sync/atomic"
"time"
@ -29,7 +28,7 @@ import (
"github.com/XinFinOrg/XDPoSChain/ethdb"
"github.com/XinFinOrg/XDPoSChain/log"
"github.com/XinFinOrg/XDPoSChain/metrics"
"github.com/steakknife/bloomfilter"
bloomfilter "github.com/holiman/bloomfilter/v2"
)
var (
@ -41,18 +40,6 @@ var (
bloomErrorGauge = metrics.NewRegisteredGauge("trie/bloom/error", nil)
)
// syncBloomHasher is a wrapper around a byte blob to satisfy the interface API
// requirements of the bloom library used. It's used to convert a trie hash or
// contract code hash into a 64 bit mini hash.
type syncBloomHasher []byte
func (f syncBloomHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
func (f syncBloomHasher) Sum(b []byte) []byte { panic("not implemented") }
func (f syncBloomHasher) Reset() { panic("not implemented") }
func (f syncBloomHasher) BlockSize() int { panic("not implemented") }
func (f syncBloomHasher) Size() int { return 8 }
func (f syncBloomHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) }
// SyncBloom is a bloom filter used during fast sync to quickly decide if a trie
// node or contract code already exists on disk or not. It self populates from the
// provided disk database on creation in a background thread and will only start
@ -69,7 +56,7 @@ type SyncBloom struct {
// initializes it from the database. The bloom is hard coded to use 3 filters.
func NewSyncBloom(memory uint64, database ethdb.Iteratee) *SyncBloom {
// Create the bloom filter to track known trie nodes
bloom, err := bloomfilter.New(memory*1024*1024*8, 3)
bloom, err := bloomfilter.New(memory*1024*1024*8, 4)
if err != nil {
panic(fmt.Sprintf("failed to create bloom: %v", err))
}
@ -107,15 +94,14 @@ func (b *SyncBloom) init(database ethdb.Iteratee) {
swap = time.Now()
)
for it.Next() && atomic.LoadUint32(&b.closed) == 0 {
// If the database entry is a trie Node, add it to the bloom
// If the database entry is a trie node, add it to the bloom
key := it.Key()
if len(key) == common.HashLength {
b.bloom.Add(syncBloomHasher(key))
b.bloom.AddHash(binary.BigEndian.Uint64(key))
bloomLoadMeter.Mark(1)
}
// If the database entry is a contract code, add it to the bloom
if ok, hash := rawdb.IsCodeKey(key); ok {
b.bloom.Add(syncBloomHasher(hash))
} else if ok, hash := rawdb.IsCodeKey(key); ok {
// If the database entry is a contract code, add it to the bloom
b.bloom.AddHash(binary.BigEndian.Uint64(hash))
bloomLoadMeter.Mark(1)
}
// If enough time elapsed since the last iterator swap, restart
@ -125,14 +111,14 @@ func (b *SyncBloom) init(database ethdb.Iteratee) {
it.Release()
it = database.NewIterator(nil, key)
log.Info("Initializing fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start)))
log.Info("Initializing state bloom", "items", b.bloom.N(), "errorrate", b.bloom.FalsePosititveProbability(), "elapsed", common.PrettyDuration(time.Since(start)))
swap = time.Now()
}
}
it.Release()
// Mark the bloom filter inited and return
log.Info("Initialized fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start)))
log.Info("Initialized state bloom", "items", b.bloom.N(), "errorrate", b.bloom.FalsePosititveProbability(), "elapsed", common.PrettyDuration(time.Since(start)))
atomic.StoreUint32(&b.inited, 1)
}
@ -141,7 +127,7 @@ func (b *SyncBloom) init(database ethdb.Iteratee) {
func (b *SyncBloom) meter() {
for {
// Report the current error ration. No floats, lame, scale it up.
bloomErrorGauge.Update(int64(b.errorRate() * 100000))
bloomErrorGauge.Update(int64(b.bloom.FalsePosititveProbability() * 100000))
// Wait one second, but check termination more frequently
for i := 0; i < 10; i++ {
@ -162,7 +148,7 @@ func (b *SyncBloom) Close() error {
b.pend.Wait()
// Wipe the bloom, but mark it "uninited" just in case someone attempts an access
log.Info("Deallocated fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate())
log.Info("Deallocated state bloom", "items", b.bloom.N(), "errorrate", b.bloom.FalsePosititveProbability())
atomic.StoreUint32(&b.inited, 0)
b.bloom = nil
@ -170,12 +156,12 @@ func (b *SyncBloom) Close() error {
return nil
}
// Add inserts a new trie Node hash into the bloom filter.
// Add inserts a new trie node hash into the bloom filter.
func (b *SyncBloom) Add(hash []byte) {
if atomic.LoadUint32(&b.closed) == 1 {
return
}
b.bloom.Add(syncBloomHasher(hash))
b.bloom.AddHash(binary.BigEndian.Uint64(hash))
bloomAddMeter.Mark(1)
}
@ -193,22 +179,9 @@ func (b *SyncBloom) Contains(hash []byte) bool {
return true
}
// Bloom initialized, check the real one and report any successful misses
maybe := b.bloom.Contains(syncBloomHasher(hash))
maybe := b.bloom.ContainsHash(binary.BigEndian.Uint64(hash))
if !maybe {
bloomMissMeter.Mark(1)
}
return maybe
}
// errorRate calculates the probability of a random containment test returning a
// false positive.
//
// We're calculating it ourselves because the bloom library we used missed a
// parentheses in the formula and calculates it wrong. And it's discontinued...
func (b *SyncBloom) errorRate() float64 {
k := float64(b.bloom.K())
n := float64(b.bloom.N())
m := float64(b.bloom.M())
return math.Pow(1.0-math.Exp((-k)*(n+0.5)/(m-1)), k)
}