core/rawdb, triedb/pathdb: enable trienode history alongside existing data (#33934)

Fixes https://github.com/ethereum/go-ethereum/issues/33907

Notably there is a behavioral change:
- Previously Geth will refuse to restart if the existing trienode
history is gapped with the state data
- With this PR, the gapped trienode history will be entirely reset and
being constructed from scratch
This commit is contained in:
rjl493456442 2026-03-12 09:21:54 +08:00 committed by GitHub
parent 59512b1849
commit 7d13acd030
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 261 additions and 51 deletions

View file

@ -260,6 +260,46 @@ func basicWrite(t *testing.T, newFn func(kinds []string) ethdb.AncientStore) {
if err != nil { if err != nil {
t.Fatalf("Failed to write ancient data %v", err) t.Fatalf("Failed to write ancient data %v", err)
} }
// Write should work after truncating from tail but over the head
db.TruncateTail(200)
head, err := db.Ancients()
if err != nil {
t.Fatalf("Failed to retrieve head ancients %v", err)
}
tail, err := db.Tail()
if err != nil {
t.Fatalf("Failed to retrieve tail ancients %v", err)
}
if head != 200 || tail != 200 {
t.Fatalf("Ancient head and tail are not expected")
}
_, err = db.ModifyAncients(func(op ethdb.AncientWriteOp) error {
offset := uint64(200)
for i := 0; i < 100; i++ {
if err := op.AppendRaw("a", offset+uint64(i), dataA[i]); err != nil {
return err
}
if err := op.AppendRaw("b", offset+uint64(i), dataB[i]); err != nil {
return err
}
}
return nil
})
if err != nil {
t.Fatalf("Failed to write ancient data %v", err)
}
head, err = db.Ancients()
if err != nil {
t.Fatalf("Failed to retrieve head ancients %v", err)
}
tail, err = db.Tail()
if err != nil {
t.Fatalf("Failed to retrieve tail ancients %v", err)
}
if head != 300 || tail != 200 {
t.Fatalf("Ancient head and tail are not expected")
}
} }
func nonMutable(t *testing.T, newFn func(kinds []string) ethdb.AncientStore) { func nonMutable(t *testing.T, newFn func(kinds []string) ethdb.AncientStore) {

View file

@ -59,7 +59,7 @@ const freezerTableSize = 2 * 1000 * 1000 * 1000
// - The in-order data ensures that disk reads are always optimized. // - The in-order data ensures that disk reads are always optimized.
type Freezer struct { type Freezer struct {
datadir string datadir string
frozen atomic.Uint64 // Number of items already frozen head atomic.Uint64 // Number of items stored (including items removed from tail)
tail atomic.Uint64 // Number of the first stored item in the freezer tail atomic.Uint64 // Number of the first stored item in the freezer
// This lock synchronizes writers and the truncate operation, as well as // This lock synchronizes writers and the truncate operation, as well as
@ -97,12 +97,12 @@ func NewFreezer(datadir string, namespace string, readonly bool, maxTableSize ui
return nil, errSymlinkDatadir return nil, errSymlinkDatadir
} }
} }
// Leveldb/Pebble uses LOCK as the filelock filename. To prevent the
// name collision, we use FLOCK as the lock name.
flockFile := filepath.Join(datadir, "FLOCK") flockFile := filepath.Join(datadir, "FLOCK")
if err := os.MkdirAll(filepath.Dir(flockFile), 0755); err != nil { if err := os.MkdirAll(filepath.Dir(flockFile), 0755); err != nil {
return nil, err return nil, err
} }
// Leveldb uses LOCK as the filelock filename. To prevent the
// name collision, we use FLOCK as the lock name.
lock := flock.New(flockFile) lock := flock.New(flockFile)
tryLock := lock.TryLock tryLock := lock.TryLock
if readonly { if readonly {
@ -213,7 +213,7 @@ func (f *Freezer) AncientBytes(kind string, id, offset, length uint64) ([]byte,
// Ancients returns the length of the frozen items. // Ancients returns the length of the frozen items.
func (f *Freezer) Ancients() (uint64, error) { func (f *Freezer) Ancients() (uint64, error) {
return f.frozen.Load(), nil return f.head.Load(), nil
} }
// Tail returns the number of first stored item in the freezer. // Tail returns the number of first stored item in the freezer.
@ -252,7 +252,7 @@ func (f *Freezer) ModifyAncients(fn func(ethdb.AncientWriteOp) error) (writeSize
defer f.writeLock.Unlock() defer f.writeLock.Unlock()
// Roll back all tables to the starting position in case of error. // Roll back all tables to the starting position in case of error.
prevItem := f.frozen.Load() prevItem := f.head.Load()
defer func() { defer func() {
if err != nil { if err != nil {
// The write operation has failed. Go back to the previous item position. // The write operation has failed. Go back to the previous item position.
@ -273,7 +273,7 @@ func (f *Freezer) ModifyAncients(fn func(ethdb.AncientWriteOp) error) (writeSize
if err != nil { if err != nil {
return 0, err return 0, err
} }
f.frozen.Store(item) f.head.Store(item)
return writeSize, nil return writeSize, nil
} }
@ -286,7 +286,7 @@ func (f *Freezer) TruncateHead(items uint64) (uint64, error) {
f.writeLock.Lock() f.writeLock.Lock()
defer f.writeLock.Unlock() defer f.writeLock.Unlock()
oitems := f.frozen.Load() oitems := f.head.Load()
if oitems <= items { if oitems <= items {
return oitems, nil return oitems, nil
} }
@ -295,7 +295,7 @@ func (f *Freezer) TruncateHead(items uint64) (uint64, error) {
return 0, err return 0, err
} }
} }
f.frozen.Store(items) f.head.Store(items)
return oitems, nil return oitems, nil
} }
@ -320,6 +320,11 @@ func (f *Freezer) TruncateTail(tail uint64) (uint64, error) {
} }
} }
f.tail.Store(tail) f.tail.Store(tail)
// Update the head if the requested tail exceeds the current head
if f.head.Load() < tail {
f.head.Store(tail)
}
return old, nil return old, nil
} }
@ -379,7 +384,7 @@ func (f *Freezer) validate() error {
prunedTail = &tmp prunedTail = &tmp
} }
f.frozen.Store(head) f.head.Store(head)
f.tail.Store(*prunedTail) f.tail.Store(*prunedTail)
return nil return nil
} }
@ -414,7 +419,7 @@ func (f *Freezer) repair() error {
} }
} }
f.frozen.Store(head) f.head.Store(head)
f.tail.Store(prunedTail) f.tail.Store(prunedTail)
return nil return nil
} }

View file

@ -113,7 +113,7 @@ func (t *memoryTable) truncateTail(items uint64) error {
return nil return nil
} }
if t.items < items { if t.items < items {
return errors.New("truncation above head") return t.reset(items)
} }
for i := uint64(0); i < items-t.offset; i++ { for i := uint64(0); i < items-t.offset; i++ {
if t.size > uint64(len(t.data[i])) { if t.size > uint64(len(t.data[i])) {
@ -127,6 +127,16 @@ func (t *memoryTable) truncateTail(items uint64) error {
return nil return nil
} }
// reset clears the entire table and sets both the head and tail to the given
// value. It assumes the caller holds the lock and that tail > t.items.
func (t *memoryTable) reset(offset uint64) error {
t.size = 0
t.data = nil
t.items = offset
t.offset = offset
return nil
}
// commit merges the given item batch into table. It's presumed that the // commit merges the given item batch into table. It's presumed that the
// batch is ordered and continuous with table. // batch is ordered and continuous with table.
func (t *memoryTable) commit(batch [][]byte) error { func (t *memoryTable) commit(batch [][]byte) error {
@ -387,6 +397,9 @@ func (f *MemoryFreezer) TruncateTail(tail uint64) (uint64, error) {
} }
} }
f.tail = tail f.tail = tail
if f.items < tail {
f.items = tail
}
return old, nil return old, nil
} }

View file

@ -707,12 +707,13 @@ func (t *freezerTable) truncateTail(items uint64) error {
t.lock.Lock() t.lock.Lock()
defer t.lock.Unlock() defer t.lock.Unlock()
// Ensure the given truncate target falls in the correct range // Short-circuit if the requested tail deletion points to a stale position
if t.itemHidden.Load() >= items { if t.itemHidden.Load() >= items {
return nil return nil
} }
// If the requested tail exceeds the current head, reset the entire table
if t.items.Load() < items { if t.items.Load() < items {
return errors.New("truncation above head") return t.resetTo(items)
} }
// Load the new tail index by the given new tail position // Load the new tail index by the given new tail position
var ( var (
@ -822,10 +823,9 @@ func (t *freezerTable) truncateTail(items uint64) error {
shorten := indexEntrySize * int64(newDeleted-deleted) shorten := indexEntrySize * int64(newDeleted-deleted)
if t.metadata.flushOffset <= shorten { if t.metadata.flushOffset <= shorten {
return fmt.Errorf("invalid index flush offset: %d, shorten: %d", t.metadata.flushOffset, shorten) return fmt.Errorf("invalid index flush offset: %d, shorten: %d", t.metadata.flushOffset, shorten)
} else { }
if err := t.metadata.setFlushOffset(t.metadata.flushOffset-shorten, true); err != nil { if err := t.metadata.setFlushOffset(t.metadata.flushOffset-shorten, true); err != nil {
return err return err
}
} }
// Retrieve the new size and update the total size counter // Retrieve the new size and update the total size counter
newSize, err := t.sizeNolock() newSize, err := t.sizeNolock()
@ -836,6 +836,59 @@ func (t *freezerTable) truncateTail(items uint64) error {
return nil return nil
} }
// resetTo clears the entire table and sets both the head and tail to the given
// value. It assumes the caller holds the lock and that tail > t.items.
func (t *freezerTable) resetTo(tail uint64) error {
// Sync the entire table before resetting, eliminating the potential
// data corruption.
err := t.doSync()
if err != nil {
return err
}
// Update the index file to reflect the new offset
if err := t.index.Close(); err != nil {
return err
}
entry := &indexEntry{
filenum: t.headId + 1,
offset: uint32(tail),
}
if err := reset(t.index.Name(), entry.append(nil)); err != nil {
return err
}
if err := t.metadata.setVirtualTail(tail, true); err != nil {
return err
}
if err := t.metadata.setFlushOffset(indexEntrySize, true); err != nil {
return err
}
t.index, err = openFreezerFileForAppend(t.index.Name())
if err != nil {
return err
}
// Purge all the existing data file
if err := t.head.Close(); err != nil {
return err
}
t.headId = t.headId + 1
t.tailId = t.headId
t.headBytes = 0
t.head, err = t.openFile(t.headId, openFreezerFileTruncated)
if err != nil {
return err
}
t.releaseFilesBefore(t.headId, true)
t.items.Store(tail)
t.itemOffset.Store(tail)
t.itemHidden.Store(tail)
t.sizeGauge.Update(0)
return nil
}
// Close closes all opened files and finalizes the freezer table for use. // Close closes all opened files and finalizes the freezer table for use.
// This operation must be completed before shutdown to prevent the loss of // This operation must be completed before shutdown to prevent the loss of
// recent writes. // recent writes.
@ -1247,25 +1300,20 @@ func (t *freezerTable) doSync() error {
if t.index == nil || t.head == nil || t.metadata.file == nil { if t.index == nil || t.head == nil || t.metadata.file == nil {
return errClosed return errClosed
} }
var err error if err := t.index.Sync(); err != nil {
trackError := func(e error) { return err
if e != nil && err == nil { }
err = e if err := t.head.Sync(); err != nil {
} return err
} }
trackError(t.index.Sync())
trackError(t.head.Sync())
// A crash may occur before the offset is updated, leaving the offset // A crash may occur before the offset is updated, leaving the offset
// points to a old position. If so, the extra items above the offset // points to an old position. If so, the extra items above the offset
// will be truncated during the next run. // will be truncated during the next run.
stat, err := t.index.Stat() stat, err := t.index.Stat()
if err != nil { if err != nil {
return err return err
} }
offset := stat.Size() return t.metadata.setFlushOffset(stat.Size(), true)
trackError(t.metadata.setFlushOffset(offset, true))
return err
} }
func (t *freezerTable) dumpIndexStdout(start, stop int64) { func (t *freezerTable) dumpIndexStdout(start, stop int64) {

View file

@ -1139,6 +1139,7 @@ const (
opTruncateHeadAll opTruncateHeadAll
opTruncateTail opTruncateTail
opTruncateTailAll opTruncateTailAll
opTruncateTailOverHead
opCheckAll opCheckAll
opMax // boundary value, not an actual op opMax // boundary value, not an actual op
) )
@ -1226,6 +1227,11 @@ func (randTest) Generate(r *rand.Rand, size int) reflect.Value {
step.target = deleted + uint64(len(items)) step.target = deleted + uint64(len(items))
items = items[:0] items = items[:0]
deleted = step.target deleted = step.target
case opTruncateTailOverHead:
newDeleted := deleted + uint64(len(items)) + 10
step.target = newDeleted
deleted = newDeleted
items = items[:0]
} }
steps = append(steps, step) steps = append(steps, step)
} }
@ -1268,7 +1274,7 @@ func runRandTest(rt randTest) bool {
for i := 0; i < len(step.items); i++ { for i := 0; i < len(step.items); i++ {
batch.AppendRaw(step.items[i], step.blobs[i]) batch.AppendRaw(step.items[i], step.blobs[i])
} }
batch.commit() rt[i].err = batch.commit()
values = append(values, step.blobs...) values = append(values, step.blobs...)
case opRetrieve: case opRetrieve:
@ -1290,24 +1296,28 @@ func runRandTest(rt randTest) bool {
} }
case opTruncateHead: case opTruncateHead:
f.truncateHead(step.target) rt[i].err = f.truncateHead(step.target)
length := f.items.Load() - f.itemHidden.Load() length := f.items.Load() - f.itemHidden.Load()
values = values[:length] values = values[:length]
case opTruncateHeadAll: case opTruncateHeadAll:
f.truncateHead(step.target) rt[i].err = f.truncateHead(step.target)
values = nil values = nil
case opTruncateTail: case opTruncateTail:
prev := f.itemHidden.Load() prev := f.itemHidden.Load()
f.truncateTail(step.target) rt[i].err = f.truncateTail(step.target)
truncated := f.itemHidden.Load() - prev truncated := f.itemHidden.Load() - prev
values = values[truncated:] values = values[truncated:]
case opTruncateTailAll: case opTruncateTailAll:
f.truncateTail(step.target) rt[i].err = f.truncateTail(step.target)
values = nil
case opTruncateTailOverHead:
rt[i].err = f.truncateTail(step.target)
values = nil values = nil
} }
// Abort the test on error. // Abort the test on error.
@ -1633,3 +1643,43 @@ func TestFreezerAncientBytes(t *testing.T) {
}) })
} }
} }
func TestTruncateOverHead(t *testing.T) {
t.Parallel()
fn := fmt.Sprintf("t-%d", rand.Uint64())
f, err := newTable(os.TempDir(), fn, metrics.NewMeter(), metrics.NewMeter(), metrics.NewGauge(), 100, freezerTableConfig{noSnappy: true}, false)
if err != nil {
t.Fatal(err)
}
// Tail truncation on an empty table
if err := f.truncateTail(10); err != nil {
t.Fatal(err)
}
batch := f.newBatch()
data := getChunk(10, 1)
require.NoError(t, batch.AppendRaw(uint64(10), data))
require.NoError(t, batch.commit())
got, err := f.RetrieveItems(uint64(10), 1, 0)
require.NoError(t, err)
if !bytes.Equal(got[0], data) {
t.Fatalf("Unexpected bytes, want: %v, got: %v", data, got[0])
}
// Tail truncation on the non-empty table
if err := f.truncateTail(20); err != nil {
t.Fatal(err)
}
batch = f.newBatch()
data = getChunk(10, 1)
require.NoError(t, batch.AppendRaw(uint64(20), data))
require.NoError(t, batch.commit())
got, err = f.RetrieveItems(uint64(20), 1, 0)
require.NoError(t, err)
if !bytes.Equal(got[0], data) {
t.Fatalf("Unexpected bytes, want: %v, got: %v", data, got[0])
}
}

View file

@ -22,6 +22,19 @@ import (
"path/filepath" "path/filepath"
) )
func atomicRename(src, dest string) error {
if err := os.Rename(src, dest); err != nil {
return err
}
dir, err := os.Open(filepath.Dir(src))
if err != nil {
return err
}
defer dir.Close()
return dir.Sync()
}
// copyFrom copies data from 'srcPath' at offset 'offset' into 'destPath'. // copyFrom copies data from 'srcPath' at offset 'offset' into 'destPath'.
// The 'destPath' is created if it doesn't exist, otherwise it is overwritten. // The 'destPath' is created if it doesn't exist, otherwise it is overwritten.
// Before the copy is executed, there is a callback can be registered to // Before the copy is executed, there is a callback can be registered to
@ -73,13 +86,48 @@ func copyFrom(srcPath, destPath string, offset uint64, before func(f *os.File) e
return err return err
} }
f = nil f = nil
return os.Rename(fname, destPath)
return atomicRename(fname, destPath)
}
// reset atomically replaces the file at the given path with the provided content.
func reset(path string, content []byte) error {
// Create a temp file in the same dir where we want it to wind up
f, err := os.CreateTemp(filepath.Dir(path), "*")
if err != nil {
return err
}
fname := f.Name()
// Clean up the leftover file
defer func() {
if f != nil {
f.Close()
}
os.Remove(fname)
}()
// Write the content into the temp file
_, err = f.Write(content)
if err != nil {
return err
}
// Permanently persist the content into disk
if err := f.Sync(); err != nil {
return err
}
if err := f.Close(); err != nil {
return err
}
f = nil
return atomicRename(fname, path)
} }
// openFreezerFileForAppend opens a freezer table file and seeks to the end // openFreezerFileForAppend opens a freezer table file and seeks to the end
func openFreezerFileForAppend(filename string) (*os.File, error) { func openFreezerFileForAppend(filename string) (*os.File, error) {
// Open the file without the O_APPEND flag // Open the file without the O_APPEND flag
// because it has differing behaviour during Truncate operations // because it has differing behavior during Truncate operations
// on different OS's // on different OS's
file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0644) file, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0644)
if err != nil { if err != nil {

View file

@ -412,28 +412,34 @@ func repairHistory(db ethdb.Database, isVerkle bool, readOnly bool, stateID uint
// Truncate excessive history entries in either the state history or // Truncate excessive history entries in either the state history or
// the trienode history, ensuring both histories remain aligned with // the trienode history, ensuring both histories remain aligned with
// the state. // the state.
head, err := states.Ancients() shead, err := states.Ancients()
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
if stateID > head { if stateID > shead { // Gap is not permitted in the state history
return nil, nil, fmt.Errorf("gap between state [#%d] and state history [#%d]", stateID, head) return nil, nil, fmt.Errorf("gap between state [#%d] and state history [#%d]", stateID, shead)
} }
truncTo := min(shead, stateID)
if trienodes != nil { if trienodes != nil {
th, err := trienodes.Ancients() thead, err := trienodes.Ancients()
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
if stateID > th { if stateID <= thead {
return nil, nil, fmt.Errorf("gap between state [#%d] and trienode history [#%d]", stateID, th) truncTo = min(truncTo, thead)
} } else {
if th != head { if thead == 0 {
log.Info("Histories are not aligned with each other", "state", head, "trienode", th) _, err = trienodes.TruncateTail(stateID)
head = min(head, th) if err != nil {
return nil, nil, err
}
log.Warn("Initialized trienode history")
} else {
return nil, nil, fmt.Errorf("gap between state [#%d] and trienode history [#%d]", stateID, thead)
}
} }
} }
head = min(head, stateID)
// Truncate the extra history elements above in freezer in case it's not // Truncate the extra history elements above in freezer in case it's not
// aligned with the state. It might happen after an unclean shutdown. // aligned with the state. It might happen after an unclean shutdown.
truncate := func(store ethdb.AncientStore, typ historyType, nhead uint64) { truncate := func(store ethdb.AncientStore, typ historyType, nhead uint64) {
@ -448,7 +454,7 @@ func repairHistory(db ethdb.Database, isVerkle bool, readOnly bool, stateID uint
log.Warn("Truncated extra histories", "typ", typ, "number", pruned) log.Warn("Truncated extra histories", "typ", typ, "number", pruned)
} }
} }
truncate(states, typeStateHistory, head) truncate(states, typeStateHistory, truncTo)
truncate(trienodes, typeTrienodeHistory, head) truncate(trienodes, typeTrienodeHistory, truncTo)
return states, trienodes, nil return states, trienodes, nil
} }

View file

@ -349,7 +349,7 @@ func (db *Database) HistoricNodeReader(root common.Hash) (*HistoricalNodeReader,
// are not accessible. // are not accessible.
meta, err := readTrienodeMetadata(db.trienodeFreezer, *id+1) meta, err := readTrienodeMetadata(db.trienodeFreezer, *id+1)
if err != nil { if err != nil {
return nil, err // e.g., the referred trienode history has been pruned return nil, fmt.Errorf("state %#x is not available", root) // e.g., the referred trienode history has been pruned
} }
if meta.parent != root { if meta.parent != root {
return nil, fmt.Errorf("state %#x is not canonincal", root) return nil, fmt.Errorf("state %#x is not canonincal", root)