go-ethereum/trie/bintrie/trie.go
weiihann a1eaa21f24 trie/bintrie: fix hashInternal at group boundaries to match read-back hash
For an InternalNode at a group-boundary depth, hashInternal previously
computed pure SHA256(left, right) recursion over the natural-depth
in-memory tree built by UpdateStem. But serializeSubtree extends stems
to the group's bottom layer via key-bit extension, so the on-disk blob
encodes an extended-depth structure. When a fresh reader deserializes
that blob, hashInternal walks the extended-depth in-memory tree and
produces a different value.

The result was that for any subtree with multiple stems sharing a
prefix shorter than groupDepth, the parent's stored child-hash (computed
from the natural-depth in-memory tree at commit time) did not equal the
child blob's read-back hash. Geth's own write-read cycle was internally
inconsistent: state-actor's groundtruth test, which feeds the same
stems through state-actor's streaming builder and geth's UpdateStem +
Commit and diffs the resulting on-disk node sets, fails at n=4 with a
mismatched slot hash in the root group blob.

At a group boundary, recompute the hash via serializeSubtree +
groupedRecursiveHash so that the parent stores the same value the
reader will compute when it deserializes the child blob.

The fix is gated on groupDepth > 0, so nodeStore tests that construct
the store directly without going through NewBinaryTrie retain the
existing pure-SHA256 recursion semantics.

Verification:
- All existing trie/bintrie tests pass unchanged.
- state-actor/generator's TestStreamingMatchesGethCommit (which compares
  state-actor's streaming builder output to geth's Commit output
  byte-for-byte at n=2,4,8,32,128) now passes.
2026-05-13 09:39:19 +08:00

423 lines
15 KiB
Go

// Copyright 2025 go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package bintrie
import (
"bytes"
"encoding/binary"
"fmt"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/trie"
"github.com/ethereum/go-ethereum/trie/trienode"
"github.com/ethereum/go-ethereum/triedb/database"
"github.com/holiman/uint256"
)
// ChunkedCode represents a sequence of HashSize-byte chunks of code (StemSize bytes of which
// are actual code, and NodeTypeBytes byte is the pushdata offset).
type ChunkedCode []byte
// Copy the values here so as to avoid an import cycle
const (
PUSH1 = byte(0x60)
PUSH32 = byte(0x7f)
)
// ChunkifyCode generates the chunked version of an array representing EVM bytecode
// according to EIP-7864 specification.
//
// The code is divided into HashSize-byte chunks, where each chunk contains:
// - Byte 0: Metadata byte indicating the number of leading bytes that are PUSHDATA (0-StemSize)
// - Bytes 1-StemSize: Actual code bytes
//
// This format enables stateless clients to validate jump destinations within a chunk
// without requiring additional context. When a PUSH instruction's data spans multiple
// chunks, the metadata byte tells us how many bytes at the start of the chunk are
// part of the previous chunk's PUSH instruction data.
//
// For example:
// - If a chunk starts with regular code: metadata byte = 0
// - If a PUSH32 instruction starts at byte 30 of chunk N:
// - Chunk N: normal, contains PUSH32 opcode + 1 byte of data
// - Chunk N+1: metadata = StemSize (entire chunk is PUSH data)
// - Chunk N+2: metadata = 1 (first byte is PUSH data, then normal code resumes)
//
// This chunking approach ensures that jump destination validity can be determined
// by examining only the chunk containing the potential JUMPDEST, making it ideal
// for stateless execution and verkle/binary tries.
//
// Reference: https://eips.ethereum.org/EIPS/eip-7864
func ChunkifyCode(code []byte) ChunkedCode {
var (
chunkOffset = 0 // offset in the chunk
chunkCount = len(code) / StemSize
codeOffset = 0 // offset in the code
)
if len(code)%StemSize != 0 {
chunkCount++
}
chunks := make([]byte, chunkCount*HashSize)
for i := 0; i < chunkCount; i++ {
// number of bytes to copy, StemSize unless the end of the code has been reached.
end := min(len(code), StemSize*(i+1))
copy(chunks[i*HashSize+1:], code[StemSize*i:end]) // copy the code itself
// chunk offset = taken from the last chunk.
if chunkOffset > StemSize {
// skip offset calculation if push data covers the whole chunk
chunks[i*HashSize] = StemSize
chunkOffset = 1
continue
}
chunks[HashSize*i] = byte(chunkOffset)
chunkOffset = 0
// Check each instruction and update the offset it should be 0 unless
// a PUSH-N overflows.
for ; codeOffset < end; codeOffset++ {
if code[codeOffset] >= PUSH1 && code[codeOffset] <= PUSH32 {
codeOffset += int(code[codeOffset] - PUSH1 + 1)
if codeOffset+1 >= StemSize*(i+1) {
codeOffset++
chunkOffset = codeOffset - StemSize*(i+1)
break
}
}
}
}
return chunks
}
// BinaryTrie is the implementation of https://eips.ethereum.org/EIPS/eip-7864.
type BinaryTrie struct {
store *nodeStore
reader *trie.Reader
tracer *trie.PrevalueTracer
groupDepth int // Number of levels per serialized group (1-8, default 8)
}
func (t *BinaryTrie) GroupDepth() int {
return t.groupDepth
}
// ToDot converts the binary trie to a DOT language representation. Useful for debugging.
func (t *BinaryTrie) ToDot() string {
t.store.computeHash(t.store.root)
return t.store.toDot(t.store.root, "", "")
}
// NewBinaryTrie creates a new binary trie.
// groupDepth specifies the number of levels per serialized group (1-8).
func NewBinaryTrie(root common.Hash, db database.NodeDatabase, groupDepth int) (*BinaryTrie, error) {
if groupDepth < 1 || groupDepth > MaxGroupDepth {
panic("invalid group depth size")
}
reader, err := trie.NewReader(root, common.Hash{}, db)
if err != nil {
return nil, err
}
store := newNodeStore()
store.groupDepth = groupDepth
t := &BinaryTrie{
store: store,
reader: reader,
tracer: trie.NewPrevalueTracer(),
groupDepth: groupDepth,
}
// Parse the root node if it's not empty
if root != types.EmptyBinaryHash && root != types.EmptyRootHash {
blob, err := t.nodeResolver(nil, root)
if err != nil {
return nil, err
}
ref, err := t.store.deserializeNodeWithHash(blob, 0, root)
if err != nil {
return nil, err
}
t.store.root = ref
}
return t, nil
}
// nodeResolver is a node resolver that reads nodes from the flatdb.
func (t *BinaryTrie) nodeResolver(path []byte, hash common.Hash) ([]byte, error) {
// empty nodes will be serialized as common.Hash{}, so capture
// this special use case.
if hash == (common.Hash{}) {
return nil, nil // empty node
}
blob, err := t.reader.Node(path, hash)
if err != nil {
return nil, err
}
t.tracer.Put(path, blob)
return blob, nil
}
// GetKey returns the sha3 preimage of a hashed key that was previously used
// to store a value.
func (t *BinaryTrie) GetKey(key []byte) []byte {
return key
}
// GetWithHashedKey returns the value, assuming that the key has already
// been hashed.
func (t *BinaryTrie) GetWithHashedKey(key []byte) ([]byte, error) {
return t.store.Get(key, t.nodeResolver)
}
// GetAccount returns the account information for the given address.
func (t *BinaryTrie) GetAccount(addr common.Address) (*types.StateAccount, error) {
var (
err error
acc = &types.StateAccount{}
key = GetBinaryTreeKey(addr, zero[:])
)
values, err := t.store.GetValuesAtStem(key[:StemSize], t.nodeResolver)
if err != nil {
return nil, fmt.Errorf("GetAccount (%x) error: %v", addr, err)
}
// The following code is required for the MPT->Binary conversion.
// An account can be partially migrated, where storage slots were moved to the binary
// but not yet the account. This means some account information as (header) storage slots
// are in the binary trie but basic account information must be read in the base tree (MPT).
// TODO: we can simplify this logic depending if the conversion is in progress or finished.
emptyAccount := true
for i := 0; values != nil && i <= CodeHashLeafKey && emptyAccount; i++ {
emptyAccount = emptyAccount && values[i] == nil
}
if emptyAccount {
return nil, nil
}
// If the account has been deleted, BasicData and CodeHash will both be
// 32-byte zero blobs (not nil). If the account is recreated afterwards,
// UpdateAccount overwrites BasicData and CodeHash with non-zero values,
// so this branch won't activate.
if bytes.Equal(values[BasicDataLeafKey], zero[:]) &&
bytes.Equal(values[CodeHashLeafKey], zero[:]) {
return nil, nil
}
acc.Nonce = binary.BigEndian.Uint64(values[BasicDataLeafKey][BasicDataNonceOffset:])
var balance [16]byte
copy(balance[:], values[BasicDataLeafKey][BasicDataBalanceOffset:])
acc.Balance = new(uint256.Int).SetBytes(balance[:])
acc.CodeHash = values[CodeHashLeafKey]
return acc, nil
}
// GetStorage returns the value for key stored in the trie. The value bytes must
// not be modified by the caller. If a node was not found in the database, a
// trie.MissingNodeError is returned.
func (t *BinaryTrie) GetStorage(addr common.Address, key []byte) ([]byte, error) {
return t.store.Get(GetBinaryTreeKeyStorageSlot(addr, key), t.nodeResolver)
}
// UpdateAccount updates the account information for the given address.
func (t *BinaryTrie) UpdateAccount(addr common.Address, acc *types.StateAccount, codeLen int) error {
var (
basicData [HashSize]byte
values = make([][]byte, StemNodeWidth)
stem = GetBinaryTreeKey(addr, zero[:])
)
binary.BigEndian.PutUint32(basicData[BasicDataCodeSizeOffset-1:], uint32(codeLen))
binary.BigEndian.PutUint64(basicData[BasicDataNonceOffset:], acc.Nonce)
// Because the balance is a max of 16 bytes, truncate
// the extra values. This happens in devmode, where
// 0xff**HashSize is allocated to the developer account.
balanceBytes := acc.Balance.Bytes()
// TODO: reduce the size of the allocation in devmode, then panic instead
// of truncating.
if len(balanceBytes) > 16 {
balanceBytes = balanceBytes[16:]
}
copy(basicData[HashSize-len(balanceBytes):], balanceBytes[:])
values[BasicDataLeafKey] = basicData[:]
values[CodeHashLeafKey] = acc.CodeHash[:]
return t.store.InsertValuesAtStem(stem, values, t.nodeResolver)
}
// UpdateStem updates the values for the given stem key.
func (t *BinaryTrie) UpdateStem(key []byte, values [][]byte) error {
return t.store.InsertValuesAtStem(key, values, t.nodeResolver)
}
// UpdateStorage associates key with value in the trie. If value has length zero, any
// existing value is deleted from the trie. The value bytes must not be modified
// by the caller while they are stored in the trie. If a node was not found in the
// database, a trie.MissingNodeError is returned.
func (t *BinaryTrie) UpdateStorage(address common.Address, key, value []byte) error {
k := GetBinaryTreeKeyStorageSlot(address, key)
var v [HashSize]byte
if len(value) >= HashSize {
copy(v[:], value[:HashSize])
} else {
copy(v[HashSize-len(value):], value[:])
}
err := t.store.Insert(k, v[:], t.nodeResolver)
if err != nil {
return fmt.Errorf("UpdateStorage (%x) error: %v", address, err)
}
return nil
}
// DeleteAccount erases an account by overwriting the account
// descriptors with 0s.
func (t *BinaryTrie) DeleteAccount(addr common.Address) error {
var (
values = make([][]byte, StemNodeWidth)
stem = GetBinaryTreeKey(addr, zero[:])
)
// Clear BasicData (nonce, balance, code size) and CodeHash.
values[BasicDataLeafKey] = zero[:]
values[CodeHashLeafKey] = zero[:]
return t.store.InsertValuesAtStem(stem, values, t.nodeResolver)
}
// DeleteStorage removes any existing value for key from the trie. If a node was not
// found in the database, a trie.MissingNodeError is returned.
func (t *BinaryTrie) DeleteStorage(addr common.Address, key []byte) error {
k := GetBinaryTreeKeyStorageSlot(addr, key)
var zero [HashSize]byte
err := t.store.Insert(k, zero[:], t.nodeResolver)
if err != nil {
return fmt.Errorf("DeleteStorage (%x) error: %v", addr, err)
}
return nil
}
// Hash returns the root hash of the trie. It does not write to the database and
// can be used even if the trie doesn't have one.
func (t *BinaryTrie) Hash() common.Hash {
return t.store.computeHash(t.store.root)
}
// Commit writes all nodes to the trie's memory database, tracking the internal
// and external (for account tries) references.
func (t *BinaryTrie) Commit(_ bool) (common.Hash, *trienode.NodeSet) {
nodeset := trienode.NewNodeSet(common.Hash{})
var rootPath BitArray
t.store.collectNodes(t.store.root, rootPath, func(path BitArray, hash common.Hash, serialized []byte) {
var buf [33]byte
pathBytes := path.PutKeyBytes(buf[:])
nodeset.AddNode(pathBytes, trienode.NewNodeWithPrev(hash, serialized, t.tracer.Get(pathBytes)))
}, t.groupDepth)
return t.Hash(), nodeset
}
// NodeIterator returns an iterator that returns nodes of the trie. Iteration
// starts at the key after the given start key.
func (t *BinaryTrie) NodeIterator(startKey []byte) (trie.NodeIterator, error) {
return newBinaryNodeIterator(t, nil)
}
// Prove constructs a Merkle proof for key. The result contains all encoded nodes
// on the path to the value at key. The value itself is also included in the last
// node and can be retrieved by verifying the proof.
//
// If the trie does not contain a value for key, the returned proof contains all
// nodes of the longest existing prefix of the key (at least the root), ending
// with the node that proves the absence of the key.
func (t *BinaryTrie) Prove(key []byte, proofDb ethdb.KeyValueWriter) error {
panic("not implemented")
}
// Copy creates a deep copy of the trie.
func (t *BinaryTrie) Copy() *BinaryTrie {
return &BinaryTrie{
store: t.store.Copy(),
reader: t.reader,
tracer: t.tracer.Copy(),
groupDepth: t.groupDepth,
}
}
// IsUBT returns true if the trie is a Verkle tree.
func (t *BinaryTrie) IsUBT() bool {
// TODO @gballet This is technically NOT a verkle tree, but it has the same
// behavior and basic structure, so for all intents and purposes, it can be
// treated as such. Rename this when verkle gets removed.
return true
}
// UpdateContractCode updates the contract code into the trie.
//
// Note: the basic data leaf needs to have been previously created for this to work
func (t *BinaryTrie) UpdateContractCode(addr common.Address, codeHash common.Hash, code []byte) error {
var (
chunks = ChunkifyCode(code)
values [][]byte
key []byte
err error
)
for i, chunknr := 0, uint64(0); i < len(chunks); i, chunknr = i+HashSize, chunknr+1 {
groupOffset := (chunknr + 128) % StemNodeWidth
if groupOffset == 0 /* start of new group */ || chunknr == 0 /* first chunk in header group */ {
values = make([][]byte, StemNodeWidth)
var offset [HashSize]byte
binary.BigEndian.PutUint64(offset[24:], chunknr+128)
key = GetBinaryTreeKey(addr, offset[:])
}
values[groupOffset] = chunks[i : i+HashSize]
if groupOffset == StemNodeWidth-1 || len(chunks)-i <= HashSize {
err = t.UpdateStem(key[:StemSize], values)
if err != nil {
return fmt.Errorf("UpdateContractCode (addr=%x) error: %w", addr[:], err)
}
}
}
return nil
}
// PrefetchAccount attempts to resolve specific accounts from the database
// to accelerate subsequent trie operations.
func (t *BinaryTrie) PrefetchAccount(addresses []common.Address) error {
for _, addr := range addresses {
if _, err := t.GetAccount(addr); err != nil {
return err
}
}
return nil
}
// PrefetchStorage attempts to resolve specific storage slots from the database
// to accelerate subsequent trie operations.
func (t *BinaryTrie) PrefetchStorage(addr common.Address, keys [][]byte) error {
for _, key := range keys {
if _, err := t.GetStorage(addr, key); err != nil {
return err
}
}
return nil
}
// Witness returns a set containing all trie nodes that have been accessed.
func (t *BinaryTrie) Witness() map[string][]byte {
return t.tracer.Values()
}