From bfb77d98f601abc1d1b3a57f1bc6ca1e83c8cd0b Mon Sep 17 00:00:00 2001 From: CPerezz Date: Wed, 8 Apr 2026 00:18:34 +0200 Subject: [PATCH] core/state,triedb/pathdb: enable bintrie flat state reads end-to-end Wires the pieces from Commits 1-9 into a running system: * triedb/pathdb.New: install the bintrieFlatCodec when isVerkle is set, backed by the same verkle-namespaced db used for trie nodes. * triedb/pathdb.database.go: drop isVerkle from the noBuild guard so the bintrie generator (Commit 9) runs on startup, and remove it from the generateSnapshot call path for the same reason. * triedb/pathdb.disklayer.revert: hard-fail on bintrie because the reorg path would replay merkle-shaped origin records against a per-stem layout. Tracked in BINTRIE_FLAT_STATE_REORG_GAP.md. * triedb/pathdb.journal: add IsBintrie to journalGenerator (rlp:"optional" so v3 journals still decode) and make journalProgress a method on generator so it stamps the active scheme; loadGenerator discards any journal whose scheme does not match the database, forcing a fresh regeneration. * triedb/pathdb.reader: export RawStateReader, a small extension of database.StateReader that exposes AccountRLP so callers outside the package can reach the raw flat-state bytes without going through the slim-RLP decode path that assumes merkle shape. * core/state.reader: add bintrieFlatReader, the bintrie equivalent of flatReader. It derives the EIP-7864 stem keys from (addr, slot), performs two AccountRLP lookups per Account call (BasicData + CodeHash), and decodes via bintrie.UnpackBasicData. Storage reads go through a single AccountRLP lookup at the slot's full bintrie key. * core/state.database.StateReader: dispatch to bintrieFlatReader when the path database is in verkle mode; merkle path unchanged. Depends on the lookup sentinel fix in the previous commit; without it missing-account reads on bintrie misreport as "layer stale". --- core/state/database.go | 17 ++- core/state/reader.go | 122 +++++++++++++++++++++ core/state/reader_bintrie_test.go | 172 ++++++++++++++++++++++++++++++ triedb/pathdb/database.go | 25 +++-- triedb/pathdb/disklayer.go | 13 +++ triedb/pathdb/generate.go | 19 ++-- triedb/pathdb/generate_bintrie.go | 6 +- triedb/pathdb/journal.go | 28 ++++- triedb/pathdb/reader.go | 20 ++++ 9 files changed, 403 insertions(+), 19 deletions(-) create mode 100644 core/state/reader_bintrie_test.go diff --git a/core/state/database.go b/core/state/database.go index 57293b19b4..921eaecd3d 100644 --- a/core/state/database.go +++ b/core/state/database.go @@ -204,10 +204,25 @@ func (db *CachingDB) StateReader(stateRoot common.Hash) (StateReader, error) { // This reader offers improved performance but is optional and only // partially useful if the snapshot data in path database is not // fully generated. + // + // For binary-trie databases the reader needs codec-specific key + // derivation (EIP-7864 stem || offset) and a separate decode path + // (BasicData/CodeHash leaves rather than slim RLP), so we install + // a bintrieFlatReader instead of the historical merkle flatReader. + // If the underlying path-database reader can't expose raw-byte + // access — e.g. a hypothetical wrapper that only implements the + // minimal database.StateReader — we silently fall through to the + // trie reader, which always works. if db.TrieDB().Scheme() == rawdb.PathScheme { reader, err := db.triedb.StateReader(stateRoot) if err == nil { - readers = append(readers, newFlatReader(reader)) + if db.TrieDB().IsVerkle() { + if br := newBintrieFlatReader(reader); br != nil { + readers = append(readers, br) + } + } else { + readers = append(readers, newFlatReader(reader)) + } } } // Configure the trie reader, which is expected to be available as the diff --git a/core/state/reader.go b/core/state/reader.go index 120253ff0c..ac5491aedd 100644 --- a/core/state/reader.go +++ b/core/state/reader.go @@ -179,6 +179,128 @@ func (r *flatReader) Storage(addr common.Address, key common.Hash) (common.Hash, return value, nil } +// bintrieFlatReader is the binary-trie analogue of flatReader. It exposes +// the StateReader interface backed by the path database's per-stem flat +// state, doing the EIP-7864 key derivation locally so the underlying +// pathdb reader only sees raw 32-byte (stem || offset) lookup keys. +// +// Each Account call performs TWO underlying lookups (BasicData at offset +// 0 and CodeHash at offset 1), because the diff layers store one entry +// per offset rather than a pre-aggregated stem blob — this lets two +// different blocks touch the same account at different offsets without +// stomping on each other. Storage calls perform a single lookup at the +// slot's full bintrie key. +// +// The reader holds a pathdb.RawStateReader (a small extension of +// database.StateReader that exposes AccountRLP for raw-byte access) +// because reader.Account() in pathdb decodes its result as slim RLP, +// which is the wrong format for bintrie leaves. AccountRLP returns the +// raw 32-byte leaf value untouched. +type bintrieFlatReader struct { + reader pathdbRawStateReader +} + +// pathdbRawStateReader is the local view of pathdb.RawStateReader. It is +// duplicated here (rather than imported) to avoid pulling pathdb into +// every consumer of state.StateReader; the runtime type-assertion in +// CachingDB.StateReader satisfies the interface dynamically. +type pathdbRawStateReader interface { + database.StateReader + AccountRLP(hash common.Hash) ([]byte, error) +} + +// newBintrieFlatReader constructs a state reader backed by the bintrie +// codec. It returns nil if the underlying database.StateReader is not +// raw-byte capable (which would be the case for any merkle path-database +// reader); callers should fall through to the trie reader in that case. +func newBintrieFlatReader(reader database.StateReader) *bintrieFlatReader { + raw, ok := reader.(pathdbRawStateReader) + if !ok { + return nil + } + return &bintrieFlatReader{reader: raw} +} + +// Account implements StateReader. It performs two underlying reads — one +// for the BasicData leaf (offset 0) and one for the CodeHash leaf +// (offset 1) — and combines them into a unified Account. If both leaves +// are absent the account is treated as non-existent (return nil, nil). +// +// Returning nil-with-no-error matches the merkle flatReader's +// "not present" semantics: the trie reader is the gatekeeper that +// distinguishes "missing" from "present-with-zero-balance". +func (r *bintrieFlatReader) Account(addr common.Address) (*Account, error) { + basicKey := common.BytesToHash(bintrie.GetBinaryTreeKeyBasicData(addr)) + codeKey := common.BytesToHash(bintrie.GetBinaryTreeKeyCodeHash(addr)) + + basicBlob, err := r.reader.AccountRLP(basicKey) + if err != nil { + return nil, err + } + codeBlob, err := r.reader.AccountRLP(codeKey) + if err != nil { + return nil, err + } + if len(basicBlob) == 0 && len(codeBlob) == 0 { + return nil, nil + } + // A bintrie leaf is always either absent or exactly 32 bytes; a + // shorter blob is a corruption signal we surface as an error rather + // than silently constructing a junk account. + if len(basicBlob) != 0 && len(basicBlob) != 32 { + return nil, errors.New("bintrie BasicData leaf has invalid length") + } + if len(codeBlob) != 0 && len(codeBlob) != 32 { + return nil, errors.New("bintrie CodeHash leaf has invalid length") + } + + acct := &Account{} + if len(basicBlob) == 32 { + var basic [32]byte + copy(basic[:], basicBlob) + nonce, balance, _ := bintrie.UnpackBasicData(basic) + acct.Nonce = nonce + acct.Balance = balance + } else { + // CodeHash present but BasicData absent: treat as a freshly + // created account whose body has not been written yet. The + // merkle path returns the empty-balance form in this case too. + acct.Balance = uint256.NewInt(0) + } + if len(codeBlob) == 32 { + acct.CodeHash = common.CopyBytes(codeBlob) + } else { + acct.CodeHash = types.EmptyCodeHash.Bytes() + } + return acct, nil +} + +// Storage implements StateReader. The caller's (addr, slot) pair is +// turned into a single 32-byte (stem || offset) bintrie key via +// GetBinaryTreeKeyStorageSlot, and we look it up via AccountRLP because +// the diff layer stores all bintrie leaves under accountData regardless +// of whether they came from an account header or a storage write. +// +// A nil result means "no entry in the flat state"; the caller must +// distinguish this from "entry present with zero value", which the +// bintrie writes as 32 zero bytes (the bintrie's tombstone convention). +func (r *bintrieFlatReader) Storage(addr common.Address, slot common.Hash) (common.Hash, error) { + fullKey := bintrie.GetBinaryTreeKeyStorageSlot(addr, slot[:]) + blob, err := r.reader.AccountRLP(common.BytesToHash(fullKey)) + if err != nil { + return common.Hash{}, err + } + if len(blob) == 0 { + return common.Hash{}, nil + } + if len(blob) != 32 { + return common.Hash{}, errors.New("bintrie storage leaf has invalid length") + } + var value common.Hash + copy(value[:], blob) + return value, nil +} + // trieReader implements the StateReader interface, providing functions to access // state from the referenced trie. // diff --git a/core/state/reader_bintrie_test.go b/core/state/reader_bintrie_test.go new file mode 100644 index 0000000000..6336615f81 --- /dev/null +++ b/core/state/reader_bintrie_test.go @@ -0,0 +1,172 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package state + +import ( + "testing" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/tracing" + "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/triedb" + "github.com/holiman/uint256" +) + +// TestBintrieFlatReaderEndToEnd is the integration test that exercises +// the full Commit-10 read path for a binary-trie database: +// +// 1. Build a fresh verkle pathdb-backed StateDB. +// 2. Mutate accounts (balance, nonce, code) and storage slots; the +// binaryHasher produces leaf writes via DrainStemWrites under the +// hood (Commit 7). +// 3. Commit through the standard StateDB.Commit pipeline. This drives +// stateUpdate.encodeBinary (Commit 8) which converts the leaves +// into per-offset accountData entries that flow into pathdb's +// stateSet, then are persisted to disk via the bintrie codec's +// Flush method (Commit 8). +// 4. Open a StateReader for the resulting root. CachingDB.StateReader +// installs a bintrieFlatReader (Commit 10) ahead of the trie +// reader because db.TrieDB().IsVerkle() is true. +// 5. Read the accounts and one storage slot back through the +// StateReader and assert the values round-trip exactly. +// +// This is the canonical "does the bintrie flat-state read path actually +// work end-to-end" test. If it fails, something between the hasher's +// leaf production and the disk-layer reads is wrong. +func TestBintrieFlatReaderEndToEnd(t *testing.T) { + disk := rawdb.NewMemoryDatabase() + tdb := triedb.NewDatabase(disk, triedb.VerkleDefaults) + sdb := NewDatabase(tdb, nil) + + // A fresh verkle pathdb's disk layer is keyed by EmptyVerkleHash + // (all-zero hash), not EmptyRootHash. The TestVerkleCodeSizePreserved + // helper documents this gotcha. + state, err := New(types.EmptyVerkleHash, sdb) + if err != nil { + t.Fatalf("init state: %v", err) + } + + var ( + addrA = common.HexToAddress("0xAAaaAAaaAAaaAAaaAAaaAAaaAAaaAAaaAAaaAAaa") + addrB = common.HexToAddress("0xBBbbBBbbBBbbBBbbBBbbBBbbBBbbBBbbBBbbBBbb") + balance = uint256.NewInt(0xCAFE) + slot = common.HexToHash("0x07") + value = common.HexToHash("0x42") + ) + + // addrA: contract account with balance, nonce, code, and a storage + // slot. Slot 7 is in the EIP-7864 header range so it shares a stem + // with the BasicData leaf, exercising the per-stem RMW path. + state.SetBalance(addrA, balance, tracing.BalanceChangeUnspecified) + state.SetNonce(addrA, 5, tracing.NonceChangeUnspecified) + state.SetCode(addrA, []byte{0x60, 0x80, 0x60, 0x40}, tracing.CodeChangeUnspecified) + state.SetState(addrA, slot, value) + + // addrB: EOA with only a balance set. Lives at a different stem so + // it tests two distinct stems landing in the same flush. + state.SetBalance(addrB, uint256.NewInt(0xBEEF), tracing.BalanceChangeUnspecified) + + root, err := state.Commit(0, true, false) + if err != nil { + t.Fatalf("commit: %v", err) + } + + // Now read the state back via a StateReader for the new root. The + // dispatch in CachingDB.StateReader uses bintrieFlatReader because + // IsVerkle() is true. + reader, err := sdb.StateReader(root) + if err != nil { + t.Fatalf("StateReader: %v", err) + } + + gotA, err := reader.Account(addrA) + if err != nil { + t.Fatalf("Account A: %v", err) + } + if gotA == nil { + t.Fatal("addrA: account is nil after commit") + } + if gotA.Nonce != 5 { + t.Errorf("addrA nonce: got %d, want 5", gotA.Nonce) + } + if gotA.Balance.Cmp(balance) != 0 { + t.Errorf("addrA balance: got %s, want %s", gotA.Balance, balance) + } + if len(gotA.CodeHash) != 32 { + t.Errorf("addrA code hash: got %d-byte hash, want 32", len(gotA.CodeHash)) + } + + gotB, err := reader.Account(addrB) + if err != nil { + t.Fatalf("Account B: %v", err) + } + if gotB == nil { + t.Fatal("addrB: account is nil after commit") + } + if gotB.Balance.Uint64() != 0xBEEF { + t.Errorf("addrB balance: got %s, want 0xBEEF", gotB.Balance) + } + + // Storage slot round-trip: SetState wrote value at slot 7 of addrA. + // The bintrieFlatReader.Storage call derives the bintrie storage + // key locally and looks it up via pathdb's AccountRLP path. + gotSlot, err := reader.Storage(addrA, slot) + if err != nil { + t.Fatalf("Storage: %v", err) + } + if gotSlot != value { + t.Errorf("storage slot: got %x, want %x", gotSlot, value) + } +} + +// TestBintrieFlatReaderMissingAccount verifies that an account never +// touched by any commit returns (nil, nil) — the standard "account +// doesn't exist" sentinel that the merkle flatReader also returns. +func TestBintrieFlatReaderMissingAccount(t *testing.T) { + disk := rawdb.NewMemoryDatabase() + tdb := triedb.NewDatabase(disk, triedb.VerkleDefaults) + sdb := NewDatabase(tdb, nil) + state, err := New(types.EmptyVerkleHash, sdb) + if err != nil { + t.Fatalf("init state: %v", err) + } + + // Touch addrA so the trie has at least one stem; otherwise we'd be + // reading from an empty disk layer where everything is trivially + // absent. + addrA := common.HexToAddress("0x0101010101010101010101010101010101010101") + state.SetBalance(addrA, uint256.NewInt(1), tracing.BalanceChangeUnspecified) + root, err := state.Commit(0, true, false) + if err != nil { + t.Fatalf("commit: %v", err) + } + + reader, err := sdb.StateReader(root) + if err != nil { + t.Fatalf("StateReader: %v", err) + } + + missing := common.HexToAddress("0xfeedfacefeedfacefeedfacefeedfacefeedface") + got, err := reader.Account(missing) + if err != nil { + t.Fatalf("Account(missing): %v", err) + } + if got != nil { + t.Errorf("missing account: got %+v, want nil", got) + } +} diff --git a/triedb/pathdb/database.go b/triedb/pathdb/database.go index f7a9cdec21..ef66420be6 100644 --- a/triedb/pathdb/database.go +++ b/triedb/pathdb/database.go @@ -169,10 +169,12 @@ func New(diskdb ethdb.Database, config *Config, isVerkle bool) *Database { if isVerkle { db.diskdb = rawdb.NewTable(diskdb, string(rawdb.VerklePrefix)) db.hasher = binaryNodeHasher - // NOTE: bintrieFlatCodec is introduced in a later commit. Until then, - // verkle databases also use the merkle codec for backward compatibility - // (the existing snapshot path is disabled for verkle anyway via the - // noBuild guard at setStateGenerator). + // Wire the bintrie flat-state codec so the disklayer/buffer/generator + // all use the per-stem on-disk layout. The codec needs a reader for + // the read-modify-write performed by applyWrites; the namespaced + // db.diskdb is the right backing store because all bintrie keys + // (trie nodes AND stem blobs) live under the verkle prefix. + db.flatCodec = newBintrieFlatCodec(db.diskdb) } // Construct the layer tree by resolving the in-disk singleton state // and in-memory layer journal. @@ -238,7 +240,7 @@ func (db *Database) setHistoryIndexer() { func (db *Database) setStateGenerator() error { // Load the state snapshot generation progress marker to prevent access // to uncovered states. - generator, root, err := loadGenerator(db.diskdb, db.hasher) + generator, root, err := loadGenerator(db.diskdb, db.hasher, db.isVerkle) if err != nil { return err } @@ -270,8 +272,13 @@ func (db *Database) setStateGenerator() error { // Disable the background snapshot building in these circumstances: // - the database is opened in read only mode // - the snapshot build is explicitly disabled - // - the database is opened in verkle tree mode - noBuild := db.readOnly || db.config.SnapshotNoBuild || db.isVerkle + // + // Note: bintrie/verkle mode is no longer excluded here. The bintrie + // codec ships its own snapshot generator (see generate_bintrie.go) so + // the unified flat-state path can populate stem blobs from an existing + // trie. Generator dispatch in newGenerator/generator.run picks the + // right routine based on the active flatStateCodec. + noBuild := db.readOnly || db.config.SnapshotNoBuild // Construct the generator and link it to the disk layer, ensuring that the // generation progress is resolved to prevent accessing uncovered states @@ -414,7 +421,9 @@ func (db *Database) Enable(root common.Hash) error { // Re-construct a new disk layer backed by persistent state // and schedule the state snapshot generation if it's permitted. - db.tree.init(generateSnapshot(db, root, db.isVerkle || db.config.SnapshotNoBuild)) + // Bintrie/verkle is no longer treated as "noBuild" — the bintrie + // generator (Commit 9) handles regeneration from the unified trie. + db.tree.init(generateSnapshot(db, root, db.config.SnapshotNoBuild)) // After snap sync, the state of the database may have changed completely. // To ensure the history indexer always matches the current state, we must: diff --git a/triedb/pathdb/disklayer.go b/triedb/pathdb/disklayer.go index 95a60480d2..b39ce335bd 100644 --- a/triedb/pathdb/disklayer.go +++ b/triedb/pathdb/disklayer.go @@ -17,6 +17,7 @@ package pathdb import ( + "errors" "fmt" "sync" "time" @@ -536,6 +537,18 @@ func (dl *diskLayer) revert(h *stateHistory) (*diskLayer, error) { if dl.id == 0 { return nil, fmt.Errorf("%w: zero state id", errStateUnrecoverable) } + // Bintrie flat state does not yet support revert. State history for + // bintrie carries keccak-keyed account/storage entries (the merkle + // shape), but the bintrie disk layout is per-stem and the merkle + // origin maps cannot be replayed onto it. Reorgs would silently + // produce wrong answers — fail loudly here so misuse is obvious. + // + // See BINTRIE_FLAT_STATE_REORG_GAP.md for the full design and the + // follow-up that lifts this restriction by emitting bintrie-shaped + // origin records on the write path. + if _, isBintrie := dl.db.flatCodec.(*bintrieFlatCodec); isBintrie { + return nil, errors.New("bintrie flat state revert is not supported (see BINTRIE_FLAT_STATE_REORG_GAP.md)") + } // Apply the reverse state changes upon the current state. This must // be done before holding the lock in order to access state in "this" // layer. diff --git a/triedb/pathdb/generate.go b/triedb/pathdb/generate.go index 125a21e275..6c01650854 100644 --- a/triedb/pathdb/generate.go +++ b/triedb/pathdb/generate.go @@ -206,13 +206,20 @@ func generateSnapshot(triedb *Database, root common.Hash, noBuild bool) *diskLay } // journalProgress persists the generator stats into the database to resume later. -func journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorStats) { +// +// It is a method on generator so it can stamp the journal entry with the +// active scheme (merkle vs. bintrie). loadGenerator uses that flag to +// discard journals from a different scheme rather than blindly resuming +// with an incompatible marker shape. +func (g *generator) journalProgress(db ethdb.KeyValueWriter, marker []byte, stats *generatorStats) { // Write out the generator marker. Note it's a standalone disk layer generator // which is not mixed with journal. It's ok if the generator is persisted while // journal is not. + _, isBintrie := g.codec.(*bintrieFlatCodec) entry := journalGenerator{ - Done: marker == nil, - Marker: marker, + Done: marker == nil, + Marker: marker, + IsBintrie: isBintrie, } if stats != nil { entry.Accounts = stats.accounts @@ -603,7 +610,7 @@ func (g *generator) checkAndFlush(ctx *generatorContext, current []byte) error { // Persist the progress marker regardless of whether the batch is empty or not. // It may happen that all the flat states in the database are correct, so the // generator indeed makes progress even if there is nothing to commit. - journalProgress(ctx.batch, current, g.stats) + g.journalProgress(ctx.batch, current, g.stats) // Flush out the database writes atomically if err := ctx.batch.Write(); err != nil { @@ -782,7 +789,7 @@ func (g *generator) generate(ctx *generatorContext) { if len(g.progress) == 0 { batch := g.db.NewBatch() rawdb.WriteSnapshotRoot(batch, ctx.root) - journalProgress(batch, g.progress, g.stats) + g.journalProgress(batch, g.progress, g.stats) if err := batch.Write(); err != nil { log.Crit("Failed to write initialized state marker", "err", err) } @@ -815,7 +822,7 @@ func (g *generator) generate(ctx *generatorContext) { // Snapshot fully generated, set the marker to nil. // Note even there is nothing to commit, persist the // generator anyway to mark the snapshot is complete. - journalProgress(ctx.batch, nil, g.stats) + g.journalProgress(ctx.batch, nil, g.stats) if err := ctx.batch.Write(); err != nil { log.Error("Failed to flush batch", "err", err) abort = <-g.abort diff --git a/triedb/pathdb/generate_bintrie.go b/triedb/pathdb/generate_bintrie.go index d0009bfc3f..ef4ee8544b 100644 --- a/triedb/pathdb/generate_bintrie.go +++ b/triedb/pathdb/generate_bintrie.go @@ -254,7 +254,7 @@ func (g *generator) checkAndFlushBin(ctx *bintrieGeneratorContext, current []byt // Persist progress regardless of whether the batch is empty — // it may be that all observed stems were already on disk and // nothing actually changed. - journalProgress(ctx.batch, current, g.stats) + g.journalProgress(ctx.batch, current, g.stats) if err := ctx.batch.Write(); err != nil { return err @@ -296,7 +296,7 @@ func (g *generator) generateBintrie(ctx *bintrieGeneratorContext) { if len(g.progress) == 0 { batch := ctx.db.NewBatch() rawdb.WriteSnapshotRoot(batch, ctx.root) - journalProgress(batch, g.progress, g.stats) + g.journalProgress(batch, g.progress, g.stats) if err := batch.Write(); err != nil { log.Crit("Failed to write initialized bintrie state marker", "err", err) } @@ -319,7 +319,7 @@ func (g *generator) generateBintrie(ctx *bintrieGeneratorContext) { // Successful completion: write the nil "done" marker so subsequent // loads know the snapshot is complete. - journalProgress(ctx.batch, nil, g.stats) + g.journalProgress(ctx.batch, nil, g.stats) if err := ctx.batch.Write(); err != nil { log.Error("Failed to flush bintrie batch", "err", err) abort = <-g.abort diff --git a/triedb/pathdb/journal.go b/triedb/pathdb/journal.go index 42eee2b7f8..7a91419669 100644 --- a/triedb/pathdb/journal.go +++ b/triedb/pathdb/journal.go @@ -123,10 +123,27 @@ type journalGenerator struct { Accounts uint64 Slots uint64 Storage uint64 + + // IsBintrie distinguishes a bintrie generator's progress marker from a + // merkle one. The two markers have incompatible semantics (single-tier + // 32-byte stem||offset vs. two-tier accountHash+storageHash) and the + // loader discards the journal whenever this flag does not match the + // database's mode, forcing a full regeneration. + // + // Marshalled with rlp:"optional" so older v3 journals (which never + // wrote this field) decode cleanly to false — the merkle default. + IsBintrie bool `rlp:"optional"` } // loadGenerator loads the state generation progress marker from the database. -func loadGenerator(db ethdb.KeyValueReader, hash nodeHasher) (*journalGenerator, common.Hash, error) { +// +// isBintrie indicates the database's active scheme. A persisted generator +// from the *other* scheme is discarded outright (and a fresh marker is +// returned) because the marker shapes are mutually unintelligible: a +// merkle marker is two-tier accountHash+storageHash, while a bintrie +// marker is a single 32-byte stem||offset key. Resuming with the wrong +// shape would either skip large stretches of the trie or revisit them. +func loadGenerator(db ethdb.KeyValueReader, hash nodeHasher, isBintrie bool) (*journalGenerator, common.Hash, error) { trieRoot, err := hash(rawdb.ReadAccountTrieNode(db, nil)) if err != nil { return nil, common.Hash{}, err @@ -143,6 +160,15 @@ func loadGenerator(db ethdb.KeyValueReader, hash nodeHasher) (*journalGenerator, log.Info("State snapshot generator is not compatible") return nil, trieRoot, nil } + // Scheme mismatch — drop the journal and force a full regeneration. + // IsBintrie defaults to false on legacy v3 entries (the field is + // rlp:"optional"), which is exactly the right answer for a merkle + // database opened against an old journal. + if generator.IsBintrie != isBintrie { + log.Info("State snapshot generator is for a different scheme, discarding", + "journalIsBintrie", generator.IsBintrie, "dbIsBintrie", isBintrie) + return nil, trieRoot, nil + } // The state snapshot is inconsistent with the trie data and must // be rebuilt. // diff --git a/triedb/pathdb/reader.go b/triedb/pathdb/reader.go index 474756fbba..9bb9b3932a 100644 --- a/triedb/pathdb/reader.go +++ b/triedb/pathdb/reader.go @@ -51,6 +51,26 @@ func (loc nodeLoc) string() string { return fmt.Sprintf("loc: %s, depth: %d", loc.loc, loc.depth) } +// RawStateReader is an extension of database.StateReader that exposes raw +// byte access to flat-state entries without applying any scheme-specific +// decoding (slim-RLP for merkle, no-op for bintrie). The bintrie state +// reader in core/state uses it to fetch the BasicData and CodeHash leaves +// for an account separately and reconstruct a slim account locally. +// +// The merkle pathdb reader implements this interface trivially because +// it already has AccountRLP. Callers should type-assert before using it +// rather than relying on the database.StateReader interface unconditionally. +type RawStateReader interface { + database.StateReader + + // AccountRLP returns the raw flat-state entry stored under the given + // lookup key. Semantics depend on the active codec: + // - merkle: slim-RLP-encoded account bytes + // - bintrie: 32-byte leaf value at the (stem || offset) tuple + // Returns nil if the entry is not present. + AccountRLP(hash common.Hash) ([]byte, error) +} + // reader implements the database.NodeReader interface, providing the functionalities to // retrieve trie nodes by wrapping the internal state layer. type reader struct {