triedb/pathdb,core/state: fix disklayer.storage fail-open gate and historicStateReader rlp.Split bug

Addresses review finding C4 + Opus agent audit secondary bug.

Bug 1 — fail-open gate in disklayer.storage:

disklayer.storage() compared a 64-byte merkle-shaped combinedKey
(accountHash || storageHash) against the 32-byte bintrie generator
marker via codec.MarkerCompare. For bintrie, accountHash is always
common.Hash{} (since bintrieFlatCodec.StorageKey returns zero for
the account key), so the combinedKey started with 32 zero bytes.
The sha256-derived marker's first byte is essentially never 0x00,
so bytes.Compare returned -1, the > 0 branch never fired, and the
generator-progress gate was silently DISABLED. During active
generation, disklayer.storage served whatever was on disk (nil or
stale) without returning errNotCoveredYet.

Fix: add StorageMarkerKey(accountHash, storageHash) to the
flatStateCodec interface. Merkle returns the 64-byte concatenation
(preserving existing behavior); bintrie returns storageHash[:]
(the 32-byte stem||offset key matching the generator marker shape).
disklayer.storage now uses the codec method.

Bug 2 — rlp.Split on raw bintrie storage leaves in historicStateReader:

historicStateReader.Storage at core/state/database_history.go:87
calls rlp.Split(blob) on whatever bytes the pathdb historical reader
returns. Merkle storage values are RLP-encoded (trimmed-left-zeros);
bintrie leaves are raw 32 bytes. rlp.Split on raw 32-byte input
either errors or decodes garbage. Even after fixing Bug 1, bintrie
historical storage reads were broken end-to-end.

Fix: add isVerkle bool to historicStateReader; when true, bypass
rlp.Split and copy the raw 32-byte blob directly. The flag is set
from db.triedb.IsVerkle() at construction time.
This commit is contained in:
CPerezz 2026-04-09 11:27:35 +02:00
parent 45de1c3cc1
commit 21f243ff8a
No known key found for this signature in database
GPG key ID: 62045F34B97177DD
4 changed files with 49 additions and 7 deletions

View file

@ -34,13 +34,14 @@ import (
// historicStateReader implements StateReader, wrapping a historical state reader // historicStateReader implements StateReader, wrapping a historical state reader
// defined in path database and provide historic state serving over the path scheme. // defined in path database and provide historic state serving over the path scheme.
type historicStateReader struct { type historicStateReader struct {
reader *pathdb.HistoricalStateReader reader *pathdb.HistoricalStateReader
lock sync.Mutex // Lock for protecting concurrent read isVerkle bool // true when the database uses the binary trie scheme
lock sync.Mutex // Lock for protecting concurrent read
} }
// newHistoricStateReader constructs a reader for historical state serving. // newHistoricStateReader constructs a reader for historical state serving.
func newHistoricStateReader(r *pathdb.HistoricalStateReader) *historicStateReader { func newHistoricStateReader(r *pathdb.HistoricalStateReader, isVerkle bool) *historicStateReader {
return &historicStateReader{reader: r} return &historicStateReader{reader: r, isVerkle: isVerkle}
} }
// Account implements StateReader, retrieving the account specified by the address. // Account implements StateReader, retrieving the account specified by the address.
@ -84,6 +85,17 @@ func (r *historicStateReader) Storage(addr common.Address, key common.Hash) (com
if len(blob) == 0 { if len(blob) == 0 {
return common.Hash{}, nil return common.Hash{}, nil
} }
// Bintrie storage leaves are raw 32-byte values (not RLP-encoded)
// because the bintrie flat-state codec stores leaves verbatim.
// The merkle path encodes storage values as trimmed-left-zeros RLP
// before writing, so rlp.Split is the correct decoder there.
// Without this dispatch, bintrie historical storage reads would
// either decode garbage or error from rlp.Split on raw 32 bytes.
if r.isVerkle {
var slot common.Hash
copy(slot[:], blob)
return slot, nil
}
_, content, _, err := rlp.Split(blob) _, content, _, err := rlp.Split(blob)
if err != nil { if err != nil {
return common.Hash{}, err return common.Hash{}, err
@ -240,7 +252,7 @@ func (db *HistoricDB) Reader(stateRoot common.Hash) (Reader, error) {
var readers []StateReader var readers []StateReader
sr, err := db.triedb.HistoricStateReader(stateRoot) sr, err := db.triedb.HistoricStateReader(stateRoot)
if err == nil { if err == nil {
readers = append(readers, newHistoricStateReader(sr)) readers = append(readers, newHistoricStateReader(sr, db.triedb.IsVerkle()))
} }
nr, err := db.triedb.HistoricNodeReader(stateRoot) nr, err := db.triedb.HistoricNodeReader(stateRoot)
if err == nil { if err == nil {

View file

@ -278,10 +278,18 @@ func (dl *diskLayer) storage(accountHash, storageHash common.Hash, depth int) ([
// If the layer is being generated, ensure the requested storage slot // If the layer is being generated, ensure the requested storage slot
// has already been covered by the generator. // has already been covered by the generator.
//
// The codec derives the scheme-appropriate marker comparison key:
// merkle uses the 64-byte (accountHash||storageHash) concatenation;
// bintrie uses the 32-byte storageHash directly (which is the full
// stem||offset key matching the bintrie generator's 32-byte marker).
// Pre-A4 this always used the 64-byte shape, which was fail-open
// for bintrie because the zero accountHash sorts before any
// sha256-derived marker byte.
codec := dl.db.flatCodec codec := dl.db.flatCodec
combinedKey := storageKeySlice(accountHash, storageHash) // marker comparison key (merkle layout) markerKey := codec.StorageMarkerKey(accountHash, storageHash)
marker := dl.genMarker() marker := dl.genMarker()
if marker != nil && codec.MarkerCompare(combinedKey, marker) > 0 { if marker != nil && codec.MarkerCompare(markerKey, marker) > 0 {
return nil, errNotCoveredYet return nil, errNotCoveredYet
} }
// Try to retrieve the storage slot from the memory cache. The codec // Try to retrieve the storage slot from the memory cache. The codec

View file

@ -129,6 +129,14 @@ type flatStateCodec interface {
// disklayer.account/storage gating logic and by writeStates. // disklayer.account/storage gating logic and by writeStates.
MarkerCompare(key []byte, marker []byte) int MarkerCompare(key []byte, marker []byte) int
// StorageMarkerKey returns the byte representation used to compare a
// (accountHash, storageHash) pair against the generator progress
// marker in disklayer.storage's generation-progress gate. Merkle
// uses the 64-byte concatenation (two-tier keying); bintrie uses
// the 32-byte storageHash directly (single-tier, stem||offset key
// space matching the bintrie generator's 32-byte marker).
StorageMarkerKey(accountHash, storageHash common.Hash) []byte
// Flush drains all pending mutations from the in-memory accountData and // Flush drains all pending mutations from the in-memory accountData and
// storageData maps into the supplied batch and updates the clean cache // storageData maps into the supplied batch and updates the clean cache
// in lockstep. The codec controls iteration order, key derivation, and // in lockstep. The codec controls iteration order, key derivation, and
@ -233,6 +241,10 @@ func (c *merkleFlatCodec) MarkerCompare(key []byte, marker []byte) int {
return bytes.Compare(key, marker) return bytes.Compare(key, marker)
} }
func (c *merkleFlatCodec) StorageMarkerKey(accountHash, storageHash common.Hash) []byte {
return storageKeySlice(accountHash, storageHash)
}
// Flush drains the supplied account/storage maps into the batch using the // Flush drains the supplied account/storage maps into the batch using the
// historical merkle per-entry layout: one rawdb write per accountData entry // historical merkle per-entry layout: one rawdb write per accountData entry
// and one per storage slot. Entries past the genMarker are skipped (the // and one per storage slot. Entries past the genMarker are skipped (the

View file

@ -409,6 +409,16 @@ func (c *bintrieFlatCodec) MarkerCompare(key []byte, marker []byte) int {
return bytes.Compare(key, marker) return bytes.Compare(key, marker)
} }
// StorageMarkerKey returns the 32-byte storageHash directly. For bintrie,
// the storageHash IS the full (stem || offset) key because
// bintrieFlatCodec.StorageKey returns (zeroHash, fullKey). Comparing
// this directly against the 32-byte generator marker yields the correct
// ordering — unlike the merkle 64-byte combined key which was fail-open
// for bintrie (see A4 remediation plan for the full diagnosis).
func (c *bintrieFlatCodec) StorageMarkerKey(_ common.Hash, storageHash common.Hash) []byte {
return storageHash[:]
}
// Flush drains the in-memory accountData and storageData maps into the // Flush drains the in-memory accountData and storageData maps into the
// batch using the bintrie per-stem layout. The maps are expected to hold // batch using the bintrie per-stem layout. The maps are expected to hold
// per-offset entries — each key is a 32-byte (stem || offset) tuple // per-offset entries — each key is a 32-byte (stem || offset) tuple