diff --git a/core/blockchain.go b/core/blockchain.go index 1a8d6a9368..db26eba418 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -2271,7 +2271,9 @@ func (bc *BlockChain) ProcessBlock(ctx context.Context, parentRoot common.Hash, stats.MgasPerSecond = float64(res.GasUsed) * 1000 / float64(elapsed) if config.StatelessSelfValidation { - bc.crossValidation(ctx, statedb, block) + if err := bc.crossValidation(ctx, statedb, block); err != nil { + return nil, err + } } return &blockProcessingResult{ usedGas: res.GasUsed, diff --git a/core/state/database_hasher.go b/core/state/database_hasher.go index cf42d71e1c..925a7b31b6 100644 --- a/core/state/database_hasher.go +++ b/core/state/database_hasher.go @@ -121,7 +121,7 @@ type Hasher interface { // UpdateAccount writes a list of accounts into the state. UpdateAccount(addresses []common.Address, accounts []AccountMut) error - // UpdateStorage writes a list of storage slot value. + // UpdateStorage writes a list of storage slot values. UpdateStorage(address common.Address, keys []common.Hash, values []common.Hash) error // Hash computes and returns the state root hash without committing. diff --git a/core/state/database_hasher_binary.go b/core/state/database_hasher_binary.go index c12a9ab6b0..cc59ed0306 100644 --- a/core/state/database_hasher_binary.go +++ b/core/state/database_hasher_binary.go @@ -27,15 +27,15 @@ import ( "github.com/ethereum/go-ethereum/triedb" ) -// warpBinTrie pairs a BinaryTrie with an optional background prefetcher that +// wrapBinTrie pairs a BinaryTrie with an optional background prefetcher that // preloads trie nodes ahead of mutation. -type warpBinTrie struct { +type wrapBinTrie struct { *bintrie.BinaryTrie prefetcher *prefetcher } // newWrapBinTrie creates a binary trie with the optional prefetcher enabled. -func newWrapBinTrie(root common.Hash, db *triedb.Database, prefetch bool, prefetchRead bool) (*warpBinTrie, error) { +func newWrapBinTrie(root common.Hash, db *triedb.Database, prefetch bool, prefetchRead bool) (*wrapBinTrie, error) { t, err := bintrie.NewBinaryTrie(root, db) if err != nil { return nil, err @@ -44,13 +44,13 @@ func newWrapBinTrie(root common.Hash, db *triedb.Database, prefetch bool, prefet if prefetch { p = newPrefetcher(t, prefetchRead) } - return &warpBinTrie{BinaryTrie: t, prefetcher: p}, nil + return &wrapBinTrie{BinaryTrie: t, prefetcher: p}, nil } // term synchronously terminates the prefetcher (no-op if nil or already done). // After termination the prefetcher reference is nilled so subsequent calls are // a cheap pointer check. -func (tr *warpBinTrie) term() { +func (tr *wrapBinTrie) term() { if tr.prefetcher == nil { return } @@ -62,54 +62,54 @@ func (tr *warpBinTrie) term() { // access auto-terminates the prefetcher first. This makes data-race freedom // structural: callers never need to remember to call term() manually. -func (tr *warpBinTrie) UpdateAccount(address common.Address, acc *types.StateAccount, codeLen int) error { +func (tr *wrapBinTrie) UpdateAccount(address common.Address, acc *types.StateAccount, codeLen int) error { tr.term() return tr.BinaryTrie.UpdateAccount(address, acc, codeLen) } -func (tr *warpBinTrie) DeleteAccount(address common.Address) error { +func (tr *wrapBinTrie) DeleteAccount(address common.Address) error { tr.term() return tr.BinaryTrie.DeleteAccount(address) } -func (tr *warpBinTrie) UpdateStorage(address common.Address, key, value []byte) error { +func (tr *wrapBinTrie) UpdateStorage(address common.Address, key, value []byte) error { tr.term() return tr.BinaryTrie.UpdateStorage(address, key, value) } -func (tr *warpBinTrie) DeleteStorage(address common.Address, key []byte) error { +func (tr *wrapBinTrie) DeleteStorage(address common.Address, key []byte) error { tr.term() return tr.BinaryTrie.DeleteStorage(address, key) } -func (tr *warpBinTrie) Hash() common.Hash { +func (tr *wrapBinTrie) Hash() common.Hash { tr.term() return tr.BinaryTrie.Hash() } -func (tr *warpBinTrie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) { +func (tr *wrapBinTrie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) { tr.term() return tr.BinaryTrie.Commit(collectLeaf) } -func (tr *warpBinTrie) Prove(key []byte, proofDb ethdb.KeyValueWriter) error { +func (tr *wrapBinTrie) Prove(key []byte, proofDb ethdb.KeyValueWriter) error { tr.term() return tr.BinaryTrie.Prove(key, proofDb) } -func (tr *warpBinTrie) Witness() map[string][]byte { +func (tr *wrapBinTrie) Witness() map[string][]byte { tr.term() return tr.BinaryTrie.Witness() } -func (tr *warpBinTrie) prefetchAccounts(addresses []common.Address, read bool) { +func (tr *wrapBinTrie) prefetchAccounts(addresses []common.Address, read bool) { if tr.prefetcher == nil { return } tr.prefetcher.scheduleAccounts(addresses, read) } -func (tr *warpBinTrie) prefetchStorage(addr common.Address, keys []common.Hash, read bool) { +func (tr *wrapBinTrie) prefetchStorage(addr common.Address, keys []common.Hash, read bool) { if tr.prefetcher == nil { return } @@ -118,9 +118,9 @@ func (tr *warpBinTrie) prefetchStorage(addr common.Address, keys []common.Hash, // copy returns a deep-copied state trie. Notably the prefetcher is deliberately // not copied, as it only belongs to the original one. -func (tr *warpBinTrie) copy() *warpBinTrie { +func (tr *wrapBinTrie) copy() *wrapBinTrie { tr.term() - return &warpBinTrie{BinaryTrie: tr.BinaryTrie.Copy()} + return &wrapBinTrie{BinaryTrie: tr.BinaryTrie.Copy()} } // binaryHasher is a Hasher implementation backed by a unified single-layer @@ -139,7 +139,7 @@ type binaryHasher struct { root common.Hash prefetch bool - trie *warpBinTrie + trie *wrapBinTrie // leaves buffers flat-state writes produced as a side-effect of // UpdateAccount/UpdateStorage/deleteAccount. It is cleared by @@ -350,20 +350,23 @@ func (h *binaryHasher) Copy() Hasher { } } -// ProveAccount implements Prover, constructing a proof for the given account. +// ProveAccount implements Prover. NOTE: BinaryTrie.Prove is not yet +// implemented (panics at runtime). The key derivation also needs to use +// bintrie tree keys instead of keccak256. Do not call until the bintrie +// proof path is implemented. func (h *binaryHasher) ProveAccount(addr common.Address, proofDb ethdb.KeyValueWriter) error { return h.trie.Prove(crypto.Keccak256(addr.Bytes()), proofDb) } -// ProveStorage implements Prover, constructing a proof for the given storage -// slot of the specified account. +// ProveStorage implements Prover. NOTE: same limitation as ProveAccount — +// BinaryTrie.Prove panics and the key derivation is wrong. func (h *binaryHasher) ProveStorage(addr common.Address, key common.Hash, proofDb ethdb.KeyValueWriter) error { return h.trie.Prove(crypto.Keccak256(key.Bytes()), proofDb) } // CollectWitness implements WitnessCollector. It aggregates all trie nodes -// accessed (both read and write) across the account trie, all active storage -// tries and deleted storage tries into a single state witness. +// accessed during the state transition from the unified binary trie into +// a single state witness. func (h *binaryHasher) CollectWitness(witness *stateless.Witness) { witness.AddState(h.trie.Witness(), common.Hash{}) } @@ -376,8 +379,8 @@ func (h *binaryHasher) PrefetchAccount(addresses []common.Address, read bool) { h.trie.prefetchAccounts(addresses, read) } -// PrefetchStorage implements Prefetcher. The storage trie is opened eagerly -// so the prefetcher can begin loading nodes in the background. +// PrefetchStorage implements Prefetcher, scheduling storage slot nodes for +// background loading in the unified binary trie. func (h *binaryHasher) PrefetchStorage(addr common.Address, keys []common.Hash, read bool) { if !h.prefetch { return diff --git a/core/state/statedb.go b/core/state/statedb.go index 010c6768fa..cd9554c666 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -862,7 +862,7 @@ func (s *StateDB) deleteStorage(addrHash common.Hash) (map[common.Hash]common.Ha key := it.Hash() storages[key] = common.Hash{} - _, content, _, err := rlp.Split(it.Slot()) + _, content, _, err := rlp.Split(slot) if err != nil { return nil, nil, nil, err } diff --git a/core/state/stateupdate.go b/core/state/stateupdate.go index 342a80b73f..e4c555f7a8 100644 --- a/core/state/stateupdate.go +++ b/core/state/stateupdate.go @@ -280,9 +280,8 @@ func (sc *stateUpdate) encodeMerkle() (map[common.Hash][]byte, map[common.Addres // 32-byte keys. // // accountOrigin and storageOrigin are returned empty because state-history -// rollback for bintrie is deferred to a follow-up PR (see -// BINTRIE_FLAT_STATE_REORG_GAP.md). The pathdb layer's revertTo path -// panics for bintrie before it would observe these maps anyway. +// rollback for bintrie is not yet supported. The pathdb disklayer.revert +// guard blocks bintrie reverts before it would observe these maps. func (sc *stateUpdate) encodeBinary() (map[common.Hash][]byte, map[common.Address][]byte, map[common.Hash]map[common.Hash][]byte, map[common.Address]map[common.Hash][]byte, error) { var ( accounts = make(map[common.Hash][]byte, len(sc.leaves)) @@ -393,7 +392,17 @@ func (sc *stateUpdate) ToTracingUpdate() (*tracing.StateUpdate, error) { if !exists { return nil, fmt.Errorf("account %x not found", addr) } - hashes := sc.secondaryHashes[addr] + var hashes Hashes + if sc.secondaryHashes != nil { + var ok bool + hashes, ok = sc.secondaryHashes[addr] + if !ok { + return nil, fmt.Errorf("ToTracingUpdate: missing secondary hash for %x", addr) + } + } else { + // Bintrie: no per-account storage sub-tries, use empty root. + hashes = Hashes{Hash: types.EmptyRootHash, Prev: types.EmptyRootHash} + } change := &tracing.AccountChange{} if oldData != nil { diff --git a/triedb/pathdb/disklayer.go b/triedb/pathdb/disklayer.go index 4ca09b4df6..b5554e40c5 100644 --- a/triedb/pathdb/disklayer.go +++ b/triedb/pathdb/disklayer.go @@ -141,12 +141,11 @@ func (dl *diskLayer) node(owner common.Hash, path []byte, depth int) ([]byte, co cleanNodeHitMeter.Mark(1) cleanNodeReadMeter.Mark(int64(len(blob))) // Use the scheme-appropriate hasher (keccak256 for merkle, - // sha256-via-bintrie for binary trie). Prior to A5 this was - // hard-coded to crypto.Keccak256Hash, which returned the wrong - // hash for bintrie nodes — masked by noHashCheck=true in the - // verkle NodeReader, but silently wrong for any caller without - // that flag (e.g. HistoricalNodeReader.Node). - h, _ := dl.db.hasher(blob) + // sha256-via-bintrie for binary trie). + h, err := dl.db.hasher(blob) + if err != nil { + return nil, common.Hash{}, nodeLoc{}, fmt.Errorf("hash cached trie node: %w", err) + } return blob, h, nodeLoc{loc: locCleanCache, depth: depth}, nil } cleanNodeMissMeter.Mark(1) @@ -167,7 +166,10 @@ func (dl *diskLayer) node(owner common.Hash, path []byte, depth int) ([]byte, co dl.nodes.Set(key, blob) cleanNodeWriteMeter.Mark(int64(len(blob))) } - h, _ := dl.db.hasher(blob) + h, err := dl.db.hasher(blob) + if err != nil { + return nil, common.Hash{}, nodeLoc{}, fmt.Errorf("hash disk trie node: %w", err) + } return blob, h, nodeLoc{loc: locDiskLayer, depth: depth}, nil } @@ -557,12 +559,8 @@ func (dl *diskLayer) revert(h *stateHistory) (*diskLayer, error) { // shape), but the bintrie disk layout is per-stem and the merkle // origin maps cannot be replayed onto it. Reorgs would silently // produce wrong answers — fail loudly here so misuse is obvious. - // - // See BINTRIE_FLAT_STATE_REORG_GAP.md for the full design and the - // follow-up that lifts this restriction by emitting bintrie-shaped - // origin records on the write path. if _, isBintrie := dl.db.flatCodec.(*bintrieFlatCodec); isBintrie { - return nil, errors.New("bintrie flat state revert is not supported (see BINTRIE_FLAT_STATE_REORG_GAP.md)") + return nil, errors.New("bintrie flat state revert is not supported") } // Apply the reverse state changes upon the current state. This must // be done before holding the lock in order to access state in "this" diff --git a/triedb/pathdb/flat_codec.go b/triedb/pathdb/flat_codec.go index 9ccf7d7c78..9643933061 100644 --- a/triedb/pathdb/flat_codec.go +++ b/triedb/pathdb/flat_codec.go @@ -36,8 +36,8 @@ import ( // // Two implementations are provided: // - merkleFlatCodec: keccak-keyed flat state, the historical MPT scheme. -// - bintrieFlatCodec: per-stem flat state for the unified binary trie. Added -// in a later commit. Until then, only merkleFlatCodec is wired. +// - bintrieFlatCodec: per-stem flat state for the unified binary trie. +// Wired into pathdb.Database.New when isVerkle is true. // // All methods MUST be safe for concurrent use; the codec is shared across // goroutines (the disk layer's read path, the buffer flush path, and the @@ -46,9 +46,10 @@ type flatStateCodec interface { // AccountKey derives the flat-state lookup key for an account. // // For Merkle: returns keccak256(addr). - // For Bintrie: returns the stem of the BasicData leaf (a 31-byte stem - // zero-padded into a common.Hash). Subsequent reads of the stem blob - // extract the BasicData and CodeHash leaves at offsets 0 and 1. + // For Bintrie: returns the full 32-byte tree key (stem || offset) for + // the BasicData leaf. Since BasicDataLeafKey is 0, the last byte is + // zero, but the result is a full key — callers use stemFromKey / + // offsetFromKey to decompose it. AccountKey(addr common.Address) common.Hash // StorageKey derives the flat-state lookup keys for a storage slot. @@ -272,7 +273,7 @@ func (c *merkleFlatCodec) Flush(batch ethdb.Batch, genMarker []byte, accountData if len(blob) == 0 { c.DeleteAccount(batch, addrHash) if clean != nil { - clean.Set(cacheKey, nil) + clean.Set(cacheKey, []byte{}) } } else { c.WriteAccount(batch, addrHash, blob) @@ -301,7 +302,7 @@ func (c *merkleFlatCodec) Flush(batch ethdb.Batch, genMarker []byte, accountData if len(blob) == 0 { c.DeleteStorage(batch, addrHash, storageHash) if clean != nil { - clean.Set(cacheKey, nil) + clean.Set(cacheKey, []byte{}) } } else { c.WriteStorage(batch, addrHash, storageHash, blob) diff --git a/triedb/pathdb/flat_codec_bintrie.go b/triedb/pathdb/flat_codec_bintrie.go index a55d87b2b1..f0c6def7d8 100644 --- a/triedb/pathdb/flat_codec_bintrie.go +++ b/triedb/pathdb/flat_codec_bintrie.go @@ -152,6 +152,7 @@ func (c *bintrieFlatCodec) ReadAccount(db ethdb.KeyValueReader, key common.Hash) } val, err := extractStemOffset(blob, offsetFromKey(key)) if err != nil { + log.Error("Corrupt bintrie stem blob in ReadAccount", "key", key, "err", err) return nil } return val @@ -159,13 +160,8 @@ func (c *bintrieFlatCodec) ReadAccount(db ethdb.KeyValueReader, key common.Hash) // ReadStorage returns the 32-byte value stored at the storage slot's // offset within its stem, or nil if the offset is not populated. -// -// Unlike ReadAccount, this method DOES perform offset extraction from -// the stem blob: storage-slot reads are always a single-offset query, so -// returning the whole blob would just force every caller to re-run the -// extraction. A malformed stem blob is treated as absent and logged -// (returning nil) to match the behavior of rawdb.ReadStorageSnapshot on -// the merkle path. +// Like ReadAccount, it extracts a single offset from the on-disk stem +// blob. A malformed stem blob is treated as absent and logged. // // The first parameter (accountKey) is ignored: see StorageKey for the // reasoning behind the bintrie's zero-hash convention. @@ -176,11 +172,7 @@ func (c *bintrieFlatCodec) ReadStorage(db ethdb.KeyValueReader, _ common.Hash, s } val, err := extractStemOffset(blob, offsetFromKey(storageKey)) if err != nil { - // A well-formed blob never errors on a point read. If we get - // here the on-disk layout is corrupted — return nil rather than - // propagating the error, since the interface has no error path - // (the caller expects a value-or-nil just like - // rawdb.ReadStorageSnapshot). + log.Error("Corrupt bintrie stem blob in ReadStorage", "key", storageKey, "err", err) return nil } return val @@ -192,9 +184,9 @@ func (c *bintrieFlatCodec) ReadStorage(db ethdb.KeyValueReader, _ common.Hash, s // WriteAccount writes an account entry. The blob is expected to be a // two-slot payload containing BasicData (bytes 0..31) followed by the -// code hash (bytes 32..63) — the caller (binaryHasher, in a later -// commit) packs these together because they live at the same stem and -// benefit from a single read-modify-write pass. +// code hash (bytes 32..63) — the caller (binaryHasher) packs these +// together because they live at the same stem and benefit from a +// single read-modify-write pass. // // Writing nil or an empty blob is equivalent to clearing offsets 0 and 1 // at this stem (a partial account deletion); the codec merges the @@ -392,8 +384,8 @@ func (c *bintrieFlatCodec) StoragePrefixSize() int { // --------------------------------------------------------------------- // SplitMarker splits a generation progress marker into the account and -// full components. For bintrie the marker is a single 31-byte stem (or -// the full 32-byte key with offset 0), not the merkle two-tier +// full components. For bintrie the marker is a full 32-byte key +// (stem || offset), not the merkle two-tier // account-then-storage format, so both returned slices point at the // same data. The second half of the merkle marker (storage offset) has // no equivalent for bintrie: the generator iterates stems directly, @@ -419,7 +411,7 @@ func (c *bintrieFlatCodec) MarkerCompare(key []byte, marker []byte) int { // bintrieFlatCodec.StorageKey returns (zeroHash, fullKey). Comparing // this directly against the 32-byte generator marker yields the correct // ordering — unlike the merkle 64-byte combined key which was fail-open -// for bintrie (see A4 remediation plan for the full diagnosis). +// for bintrie. func (c *bintrieFlatCodec) StorageMarkerKey(_ common.Hash, storageHash common.Hash) []byte { return storageHash[:] } @@ -441,7 +433,7 @@ func (c *bintrieFlatCodec) StorageMarkerKey(_ common.Hash, storageHash common.Ha // // Cache update: after the per-stem RMW, the clean cache is updated // with each written offset's new value (per-offset entries, matching -// the shape returned by ReadAccount after the A1 remediation). Offsets +// the shape returned by ReadAccount). Offsets // that were not touched by this flush retain their existing cache // entries, which remain valid because the RMW did not modify them. // @@ -498,10 +490,10 @@ func (c *bintrieFlatCodec) Flush(batch ethdb.Batch, genMarker []byte, accountDat } } // Issue one RMW per stem, then update the clean cache per-offset - // using the fullKeys we captured in the aggregator. A nil/empty - // value stored in the cache means "confirmed absent" (the reader - // will fall through to the trie reader on this per feedback #3 / - // Commit A2); a 32-byte value means the offset is populated. + // using the fullKeys we captured in the aggregator. An empty value + // stored in the cache means "confirmed absent" (the reader will + // fall through to the trie reader); a 32-byte value means the + // offset is populated. for _, ag := range aggregated { if _, err := c.applyWrites(batch, ag.fullKeys[0][:bintrie.StemSize], ag.writes); err != nil { return accountWrites, storageWrites, fmt.Errorf("bintrie Flush: %w", err) @@ -511,7 +503,11 @@ func (c *bintrieFlatCodec) Flush(batch ethdb.Batch, genMarker []byte, accountDat } for i, fullKey := range ag.fullKeys { cacheKey := c.AccountCacheKey(fullKey) - clean.Set(cacheKey, ag.writes[i].Value) + val := ag.writes[i].Value + if val == nil { + val = []byte{} + } + clean.Set(cacheKey, val) } } return accountWrites, storageWrites, nil diff --git a/triedb/pathdb/generate_bintrie.go b/triedb/pathdb/generate_bintrie.go index cabfa4e49f..51dbdab1d7 100644 --- a/triedb/pathdb/generate_bintrie.go +++ b/triedb/pathdb/generate_bintrie.go @@ -173,20 +173,15 @@ func (g *generator) generateBinTrieStems(ctx *bintrieGeneratorContext) error { // Without this RMW, a mid-stem resume would overwrite the existing disk // blob with a partial one, silently dropping the earlier offsets. This // was bug C1 identified in the PR review. - flushStem := func() { + flushStem := func() error { if currentStem == nil || builder.empty() { - return + return nil } existing := rawdb.ReadBinTrieStem(ctx.db, currentStem) writes := builder.toOffsetValues() merged, err := mergeStemBlob(existing, writes) if err != nil { - // Corruption in the existing blob. Log and fall back to the - // builder content alone — at least the offsets from this pass - // land. A7 tightens this to a first-class error propagation. - log.Error("Bintrie generator: merge stem blob failed, writing builder only", - "stem", fmt.Sprintf("%x", currentStem), "err", err) - merged = builder.encode() + return fmt.Errorf("merge stem %x failed: %w", currentStem, err) } if merged == nil { rawdb.DeleteBinTrieStem(ctx.batch, currentStem) @@ -196,6 +191,7 @@ func (g *generator) generateBinTrieStems(ctx *bintrieGeneratorContext) error { builder.reset() // Bookkeeping: count one stem per emitted blob. g.stats.accounts++ + return nil } for it.Next(true) { @@ -218,7 +214,9 @@ func (g *generator) generateBinTrieStems(ctx *bintrieGeneratorContext) error { // Stem boundary detection: if we've moved to a new stem, persist // the previous one before starting a new builder. if currentStem != nil && !bytes.Equal(key[:bintrie.StemSize], currentStem) { - flushStem() + if err := flushStem(); err != nil { + return err + } currentStem = nil } if currentStem == nil { @@ -246,7 +244,9 @@ func (g *generator) generateBinTrieStems(ctx *bintrieGeneratorContext) error { return err } // Flush the trailing stem (the loop only flushes on transitions). - flushStem() + if err := flushStem(); err != nil { + return err + } return nil }