core/filtermaps: optimize memory allocation with bitset and cache reuse

This commit is contained in:
allen 2025-11-02 12:35:52 -05:00
parent 18a902799e
commit 2e53c11539
4 changed files with 348 additions and 44 deletions

135
core/filtermaps/bitset.go Normal file
View file

@ -0,0 +1,135 @@
// Copyright 2024 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package filtermaps
import "math/bits"
// indexBitset represents a set of indices using a bitmap.
type indexBitset struct {
minIndex uint32
maxIndex uint32
bits []uint64
}
// newIndexBitset creates a bitset from a list of indices.
// Returns an empty bitset if indices is empty.
func newIndexBitset(indices []uint32) *indexBitset {
if len(indices) == 0 {
return &indexBitset{}
}
// Find index range
minIdx, maxIdx := indices[0], indices[0]
for _, idx := range indices[1:] {
if idx < minIdx {
minIdx = idx
}
if idx > maxIdx {
maxIdx = idx
}
}
// Calculate number of uint64 needed
rangeSize := maxIdx - minIdx + 1
bitsCount := (rangeSize + 63) / 64
bitset := &indexBitset{
minIndex: minIdx,
maxIndex: maxIdx,
bits: make([]uint64, bitsCount),
}
// Set all specified indices
for _, idx := range indices {
bitset.Set(idx)
}
return bitset
}
// Has checks if an index exists in the set.
func (b *indexBitset) Has(idx uint32) bool {
if b.bits == nil || idx < b.minIndex || idx > b.maxIndex {
return false
}
pos := idx - b.minIndex
wordIdx := pos / 64
bitIdx := pos % 64
return (b.bits[wordIdx] & (1 << bitIdx)) != 0
}
// Set adds an index to the set.
func (b *indexBitset) Set(idx uint32) {
if b.bits == nil || idx < b.minIndex || idx > b.maxIndex {
return
}
pos := idx - b.minIndex
wordIdx := pos / 64
bitIdx := pos % 64
b.bits[wordIdx] |= 1 << bitIdx
}
// Clear removes an index from the set.
func (b *indexBitset) Clear(idx uint32) {
if b.bits == nil || idx < b.minIndex || idx > b.maxIndex {
return
}
pos := idx - b.minIndex
wordIdx := pos / 64
bitIdx := pos % 64
b.bits[wordIdx] &^= 1 << bitIdx
}
// Count returns the number of indices in the set.
func (b *indexBitset) Count() int {
if b.bits == nil {
return 0
}
count := 0
for _, word := range b.bits {
count += bits.OnesCount64(word)
}
return count
}
// IsEmpty checks if the set is empty.
func (b *indexBitset) IsEmpty() bool {
if b.bits == nil {
return true
}
for _, word := range b.bits {
if word != 0 {
return false
}
}
return true
}
// Iterate traverses all indices in the set.
// The callback function fn is called with each index in the set.
// Iteration order is from smallest to largest.
func (b *indexBitset) Iterate(fn func(uint32)) {
if b.bits == nil {
return
}
for i, word := range b.bits {
if word == 0 {
continue
}
baseIdx := b.minIndex + uint32(i*64)
for bitIdx := 0; bitIdx < 64; bitIdx++ {
if (word & (1 << bitIdx)) != 0 {
fn(baseIdx + uint32(bitIdx))
}
}
}
}

View file

@ -0,0 +1,170 @@
// Copyright 2024 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package filtermaps
import (
"testing"
)
func TestBitsetBasic(t *testing.T) {
indices := []uint32{100, 101, 105, 110, 115}
bs := newIndexBitset(indices)
// Test Has - existing indices
for _, idx := range indices {
if !bs.Has(idx) {
t.Errorf("Expected Has(%d) = true", idx)
}
}
// Test Has - non-existing indices
notInSet := []uint32{99, 102, 103, 104, 106, 107, 108, 109, 111, 116}
for _, idx := range notInSet {
if bs.Has(idx) {
t.Errorf("Expected Has(%d) = false", idx)
}
}
// Test Count
if count := bs.Count(); count != len(indices) {
t.Errorf("Expected Count = %d, got %d", len(indices), count)
}
}
func TestBitsetClear(t *testing.T) {
indices := []uint32{100, 101, 105, 110, 115}
bs := newIndexBitset(indices)
// Clear an index
bs.Clear(105)
if bs.Has(105) {
t.Error("Expected Has(105) = false after Clear")
}
// Count should decrease
if count := bs.Count(); count != 4 {
t.Errorf("Expected Count = 4, got %d", count)
}
// Other indices should remain unaffected
if !bs.Has(100) || !bs.Has(101) || !bs.Has(110) || !bs.Has(115) {
t.Error("Other indices should remain")
}
}
func TestBitsetSet(t *testing.T) {
indices := []uint32{100, 105, 110}
bs := newIndexBitset(indices)
// Set a new index
bs.Set(102)
if !bs.Has(102) {
t.Error("Expected Has(102) = true after Set")
}
// Count should increase
if count := bs.Count(); count != 4 {
t.Errorf("Expected Count = 4, got %d", count)
}
}
func TestBitsetIterate(t *testing.T) {
indices := []uint32{100, 101, 105, 110, 115}
bs := newIndexBitset(indices)
// Collect iterated indices
var collected []uint32
bs.Iterate(func(idx uint32) {
collected = append(collected, idx)
})
// Verify count
if len(collected) != len(indices) {
t.Errorf("Expected %d indices, got %d", len(indices), len(collected))
}
// Verify all indices are present
for _, idx := range indices {
found := false
for _, c := range collected {
if c == idx {
found = true
break
}
}
if !found {
t.Errorf("Index %d not found in iteration", idx)
}
}
}
// Benchmark: Bitset vs Map
func BenchmarkBitsetVsMap(b *testing.B) {
// Generate test data: 1000 consecutive indices
indices := make([]uint32, 1000)
for i := range indices {
indices[i] = uint32(5000 + i)
}
b.Run("Bitset_Has", func(b *testing.B) {
bs := newIndexBitset(indices)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = bs.Has(5500)
}
})
b.Run("Map_Has", func(b *testing.B) {
m := make(map[uint32]struct{})
for _, idx := range indices {
m[idx] = struct{}{}
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = m[5500]
}
})
b.Run("Bitset_Set", func(b *testing.B) {
bs := newIndexBitset(indices)
b.ResetTimer()
for i := 0; i < b.N; i++ {
bs.Set(5500)
}
})
b.Run("Map_Set", func(b *testing.B) {
m := make(map[uint32]struct{})
for _, idx := range indices {
m[idx] = struct{}{}
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
m[5500] = struct{}{}
}
})
b.Run("Bitset_Clear", func(b *testing.B) {
bs := newIndexBitset(indices)
b.ResetTimer()
for i := 0; i < b.N; i++ {
bs.Clear(5500)
bs.Set(5500) // Reset for next iteration
}
})
b.Run("Map_Delete", func(b *testing.B) {
m := make(map[uint32]struct{})
for _, idx := range indices {
m[idx] = struct{}{}
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
delete(m, 5500)
m[5500] = struct{}{} // Re-add for next iteration
}
})
}

View file

@ -48,14 +48,17 @@ var (
// mapRenderer represents a process that renders filter maps in a specified
// range according to the actual targetView.
type mapRenderer struct {
f *FilterMaps
renderBefore uint32
currentMap *renderedMap
finishedMaps map[uint32]*renderedMap
finished common.Range[uint32]
iterator *logIterator
f *FilterMaps
renderBefore uint32
currentMap *renderedMap
finishedMaps map[uint32]*renderedMap
finished common.Range[uint32]
iterator *logIterator
rowMappingCache *lru.Cache[common.Hash, lvPos]
}
type lvPos struct{ rowIndex, layerIndex uint32 }
// renderedMap represents a single filter map that is being rendered in memory.
type renderedMap struct {
filterMap filterMap
@ -110,10 +113,11 @@ func (f *FilterMaps) renderMapsFromSnapshot(cp *renderedMap) (*mapRenderer, erro
lastBlock: cp.lastBlock,
blockLvPtrs: slices.Clone(cp.blockLvPtrs),
},
finishedMaps: make(map[uint32]*renderedMap),
finished: common.NewRange(cp.mapIndex, 0),
renderBefore: math.MaxUint32,
iterator: iter,
finishedMaps: make(map[uint32]*renderedMap),
finished: common.NewRange(cp.mapIndex, 0),
renderBefore: math.MaxUint32,
iterator: iter,
rowMappingCache: lru.NewCache[common.Hash, lvPos](cachedRowMappings),
}, nil
}
@ -131,10 +135,11 @@ func (f *FilterMaps) renderMapsFromMapBoundary(firstMap, renderBefore uint32, st
mapIndex: firstMap,
lastBlock: iter.blockNumber,
},
finishedMaps: make(map[uint32]*renderedMap),
finished: common.NewRange(firstMap, 0),
renderBefore: renderBefore,
iterator: iter,
finishedMaps: make(map[uint32]*renderedMap),
finished: common.NewRange(firstMap, 0),
renderBefore: renderBefore,
iterator: iter,
rowMappingCache: lru.NewCache[common.Hash, lvPos](cachedRowMappings),
}, nil
}
@ -319,9 +324,8 @@ func (r *mapRenderer) renderCurrentMap(stopCb func() bool) (bool, error) {
if r.iterator.lvIndex == 0 {
r.currentMap.blockLvPtrs = []uint64{0}
}
type lvPos struct{ rowIndex, layerIndex uint32 }
rowMappingCache := lru.NewCache[common.Hash, lvPos](cachedRowMappings)
defer rowMappingCache.Purge()
// Clear the cache for this map rendering
r.rowMappingCache.Purge()
for r.iterator.lvIndex < uint64(r.currentMap.mapIndex+1)<<r.f.logValuesPerMap && !r.iterator.finished {
waitCnt++
@ -337,7 +341,7 @@ func (r *mapRenderer) renderCurrentMap(stopCb func() bool) (bool, error) {
waitCnt = 0
}
if logValue := r.iterator.getValueHash(); logValue != (common.Hash{}) {
lvp, cached := rowMappingCache.Get(logValue)
lvp, cached := r.rowMappingCache.Get(logValue)
if !cached {
lvp = lvPos{rowIndex: r.f.rowIndex(r.currentMap.mapIndex, 0, logValue)}
}
@ -348,7 +352,7 @@ func (r *mapRenderer) renderCurrentMap(stopCb func() bool) (bool, error) {
}
r.currentMap.filterMap[lvp.rowIndex] = append(r.currentMap.filterMap[lvp.rowIndex], r.f.columnIndex(r.iterator.lvIndex, &logValue))
if !cached {
rowMappingCache.Add(logValue, lvp)
r.rowMappingCache.Add(logValue, lvp)
}
}
if err := r.iterator.next(); err != nil {

View file

@ -591,14 +591,9 @@ type matchSequence struct {
// newInstance creates a new instance of matchSequence.
func (m *matchSequence) newInstance(mapIndices []uint32) matcherInstance {
// determine set of indices to request from next matcher
needMatched := make(map[uint32]struct{})
baseRequested := make(map[uint32]struct{})
nextRequested := make(map[uint32]struct{})
for _, mapIndex := range mapIndices {
needMatched[mapIndex] = struct{}{}
baseRequested[mapIndex] = struct{}{}
nextRequested[mapIndex] = struct{}{}
}
needMatched := newIndexBitset(mapIndices)
baseRequested := newIndexBitset(mapIndices)
nextRequested := newIndexBitset(mapIndices)
return &matchSequenceInstance{
matchSequence: m,
baseInstance: m.base.newInstance(mapIndices),
@ -693,7 +688,7 @@ func newMatchSequence(params *Params, matchers []matcher) matcher {
type matchSequenceInstance struct {
*matchSequence
baseInstance, nextInstance matcherInstance
baseRequested, nextRequested, needMatched map[uint32]struct{}
baseRequested, nextRequested, needMatched *indexBitset
baseResults, nextResults map[uint32]potentialMatches
}
@ -715,26 +710,26 @@ func (m *matchSequenceInstance) getMatchesForLayer(ctx context.Context, layerInd
}
}
// evaluate and return matched results where possible
for mapIndex := range m.needMatched {
if _, ok := m.baseRequested[mapIndex]; ok {
continue
m.needMatched.Iterate(func(mapIndex uint32) {
if m.baseRequested.Has(mapIndex) {
return
}
if _, ok := m.nextRequested[mapIndex]; ok {
continue
if m.nextRequested.Has(mapIndex) {
return
}
matchedResults = append(matchedResults, matcherResult{
mapIndex: mapIndex,
matches: m.params.matchResults(mapIndex, m.offset, m.baseResults[mapIndex], m.nextResults[mapIndex]),
})
delete(m.needMatched, mapIndex)
}
m.needMatched.Clear(mapIndex)
})
return matchedResults, nil
}
// dropIndices implements matcherInstance.
func (m *matchSequenceInstance) dropIndices(dropIndices []uint32) {
for _, mapIndex := range dropIndices {
delete(m.needMatched, mapIndex)
m.needMatched.Clear(mapIndex)
}
var dropBase, dropNext []uint32
for _, mapIndex := range dropIndices {
@ -764,7 +759,7 @@ func (m *matchSequenceInstance) evalBase(ctx context.Context, layerIndex uint32)
)
for _, r := range results {
m.baseResults[r.mapIndex] = r.matches
delete(m.baseRequested, r.mapIndex)
m.baseRequested.Clear(r.mapIndex)
stats.add(r.matches != nil && len(r.matches) == 0, layerIndex)
}
m.mergeBaseStats(stats)
@ -792,7 +787,7 @@ func (m *matchSequenceInstance) evalNext(ctx context.Context, layerIndex uint32)
)
for _, r := range results {
m.nextResults[r.mapIndex] = r.matches
delete(m.nextRequested, r.mapIndex)
m.nextRequested.Clear(r.mapIndex)
stats.add(r.matches != nil && len(r.matches) == 0, layerIndex)
}
m.mergeNextStats(stats)
@ -811,15 +806,15 @@ func (m *matchSequenceInstance) evalNext(ctx context.Context, layerIndex uint32)
// matcher based on the known results from the next matcher and removes it
// from the internal requested set and returns true if possible.
func (m *matchSequenceInstance) dropBase(mapIndex uint32) bool {
if _, ok := m.baseRequested[mapIndex]; !ok {
if !m.baseRequested.Has(mapIndex) {
return false
}
if _, ok := m.needMatched[mapIndex]; ok {
if m.needMatched.Has(mapIndex) {
if next := m.nextResults[mapIndex]; next == nil || len(next) > 0 {
return false
}
}
delete(m.baseRequested, mapIndex)
m.baseRequested.Clear(mapIndex)
return true
}
@ -827,15 +822,15 @@ func (m *matchSequenceInstance) dropBase(mapIndex uint32) bool {
// matcher based on the known results from the base matcher and removes it
// from the internal requested set and returns true if possible.
func (m *matchSequenceInstance) dropNext(mapIndex uint32) bool {
if _, ok := m.nextRequested[mapIndex]; !ok {
if !m.nextRequested.Has(mapIndex) {
return false
}
if _, ok := m.needMatched[mapIndex]; ok {
if m.needMatched.Has(mapIndex) {
if base := m.baseResults[mapIndex]; base == nil || len(base) > 0 {
return false
}
}
delete(m.nextRequested, mapIndex)
m.nextRequested.Clear(mapIndex)
return true
}