From eaf5523a5a88a09608e735941d972d3d14c14b7d Mon Sep 17 00:00:00 2001 From: CPerezz Date: Tue, 7 Apr 2026 14:17:16 +0200 Subject: [PATCH] triedb/pathdb: introduce flatStateCodec abstraction Introduce flatStateCodec, a small interface that captures the trie-specific aspects of flat-state storage: key derivation from (address, slot), persistence of account/storage entries, clean-cache key disambiguation, iterator setup, and progress-marker handling. Mirrors the existing nodeHasher pattern and complements the Hasher interface from state-hasher-iface-2 (which abstracts trie-side hashing and commit). The codec is stored on Database alongside the existing hasher field, ready to be threaded through the flat-state call sites (disklayer, flush, generator, reader) in the next commit. Provides merkleFlatCodec, a thin wrapper over the existing rawdb snapshot accessors and helpers. This is a pure refactor: behavior is unchanged. The bintrie-side codec implementation is added in a later commit, after all call sites have been routed through the abstraction. --- triedb/pathdb/database.go | 24 ++-- triedb/pathdb/flat_codec.go | 216 ++++++++++++++++++++++++++++++++++++ 2 files changed, 231 insertions(+), 9 deletions(-) create mode 100644 triedb/pathdb/flat_codec.go diff --git a/triedb/pathdb/database.go b/triedb/pathdb/database.go index a61d302b1d..380637cd16 100644 --- a/triedb/pathdb/database.go +++ b/triedb/pathdb/database.go @@ -125,10 +125,11 @@ type Database struct { // readOnly is the flag whether the mutation is allowed to be applied. // It will be set automatically when the database is journaled during // the shutdown to reject all following unexpected mutations. - readOnly bool // Flag if database is opened in read only mode - waitSync bool // Flag if database is deactivated due to initial state sync - isVerkle bool // Flag if database is used for verkle tree - hasher nodeHasher // Trie node hasher + readOnly bool // Flag if database is opened in read only mode + waitSync bool // Flag if database is deactivated due to initial state sync + isVerkle bool // Flag if database is used for verkle tree + hasher nodeHasher // Trie node hasher + flatCodec flatStateCodec // Flat-state key derivation, persistence and iteration config *Config // Configuration for database diskdb ethdb.Database // Persistent storage for matured trie nodes @@ -153,11 +154,12 @@ func New(diskdb ethdb.Database, config *Config, isVerkle bool) *Database { config = config.sanitize() db := &Database{ - readOnly: config.ReadOnly, - isVerkle: isVerkle, - config: config, - diskdb: diskdb, - hasher: merkleNodeHasher, + readOnly: config.ReadOnly, + isVerkle: isVerkle, + config: config, + diskdb: diskdb, + hasher: merkleNodeHasher, + flatCodec: &merkleFlatCodec{}, } // Establish a dedicated database namespace tailored for verkle-specific // data, ensuring the isolation of both verkle and merkle tree data. It's @@ -167,6 +169,10 @@ func New(diskdb ethdb.Database, config *Config, isVerkle bool) *Database { if isVerkle { db.diskdb = rawdb.NewTable(diskdb, string(rawdb.VerklePrefix)) db.hasher = binaryNodeHasher + // NOTE: bintrieFlatCodec is introduced in a later commit. Until then, + // verkle databases also use the merkle codec for backward compatibility + // (the existing snapshot path is disabled for verkle anyway via the + // noBuild guard at setStateGenerator). } // Construct the layer tree by resolving the in-disk singleton state // and in-memory layer journal. diff --git a/triedb/pathdb/flat_codec.go b/triedb/pathdb/flat_codec.go new file mode 100644 index 0000000000..ca0f72a381 --- /dev/null +++ b/triedb/pathdb/flat_codec.go @@ -0,0 +1,216 @@ +// Copyright 2026 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package pathdb + +import ( + "bytes" + + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/ethdb" +) + +// flatStateCodec abstracts the trie-specific aspects of flat-state storage: +// key derivation from (address, slot), persistence of account/storage entries +// to disk, clean-cache key disambiguation, and iterator construction. +// +// It mirrors the existing nodeHasher pattern (a hot, small interface plugged +// into the Database struct), and complements the Hasher interface from +// state-hasher-iface-2 which abstracts trie-side hashing/commit. +// +// Two implementations are provided: +// - merkleFlatCodec: keccak-keyed flat state, the historical MPT scheme. +// - bintrieFlatCodec: per-stem flat state for the unified binary trie. Added +// in a later commit. Until then, only merkleFlatCodec is wired. +// +// All methods MUST be safe for concurrent use; the codec is shared across +// goroutines (the disk layer's read path, the buffer flush path, and the +// background generator may all call into it simultaneously). +type flatStateCodec interface { + // AccountKey derives the flat-state lookup key for an account. + // + // For Merkle: returns keccak256(addr). + // For Bintrie: returns the stem of the BasicData leaf (a 31-byte stem + // zero-padded into a common.Hash). Subsequent reads of the stem blob + // extract the BasicData and CodeHash leaves at offsets 0 and 1. + AccountKey(addr common.Address) common.Hash + + // StorageKey derives the flat-state lookup keys for a storage slot. + // + // The first return value carries the account-side hash (e.g. + // keccak256(addr) for Merkle, or zero for bintrie which has no per-account + // grouping). The second return value carries the slot-side hash + // (keccak256(slot) for Merkle, or the full bintrie key for bintrie). + // + // Read/Write methods receive the same pair, so the codec implementation + // is the only place that has to interpret them. + StorageKey(addr common.Address, slot common.Hash) (accountKey common.Hash, storageKey common.Hash) + + // ReadAccount loads an account flat-state entry from persistent storage. + // Returns nil if the entry is not present. + ReadAccount(db ethdb.KeyValueReader, key common.Hash) []byte + + // ReadStorage loads a storage flat-state entry from persistent storage. + // Returns nil if the entry is not present. + ReadStorage(db ethdb.KeyValueReader, accountKey common.Hash, storageKey common.Hash) []byte + + // WriteAccount persists an account flat-state entry into the supplied batch. + WriteAccount(batch ethdb.Batch, key common.Hash, blob []byte) + + // DeleteAccount removes an account flat-state entry via the supplied batch. + DeleteAccount(batch ethdb.Batch, key common.Hash) + + // WriteStorage persists a storage flat-state entry into the supplied batch. + WriteStorage(batch ethdb.Batch, accountKey common.Hash, storageKey common.Hash, blob []byte) + + // DeleteStorage removes a storage flat-state entry via the supplied batch. + DeleteStorage(batch ethdb.Batch, accountKey common.Hash, storageKey common.Hash) + + // AccountCacheKey returns the byte key used in the disk-layer clean state + // cache (fastcache) for an account entry. The cache is shared between + // account and storage lookups, so codecs must ensure their key spaces are + // disjoint to avoid collisions. + AccountCacheKey(key common.Hash) []byte + + // StorageCacheKey returns the byte key used in the disk-layer clean state + // cache (fastcache) for a storage entry. See AccountCacheKey for the + // disjointness requirement. + StorageCacheKey(accountKey common.Hash, storageKey common.Hash) []byte + + // AccountPrefix returns the rawdb key prefix used by account entries on + // disk. Used by the generator to set up its account-range iterator. + AccountPrefix() []byte + + // StoragePrefix returns the rawdb key prefix used by storage entries on + // disk. Used by the generator to set up its storage-range iterator. + StoragePrefix() []byte + + // AccountKeyLength returns the expected total length (prefix + payload) + // of an on-disk account key. The generator uses this to filter spurious + // matches when iterating with a length-bounded iterator. + AccountKeyLength() int + + // StorageKeyLength returns the expected total length (prefix + payload) + // of an on-disk storage key. See AccountKeyLength. + StorageKeyLength() int + + // AccountPrefixSize returns the per-entry on-disk overhead used by the + // stateSet to estimate flush sizes. This is just the prefix length for + // merkle codecs; bintrie codecs may use a different convention. + AccountPrefixSize() int + + // StoragePrefixSize returns the per-entry on-disk overhead for storage + // entries. + StoragePrefixSize() int + + // SplitMarker decomposes a generation progress marker into the account + // portion and the full marker. For Merkle the account part is the first + // 32 bytes; for bintrie both halves are the same single 32-byte stem. + SplitMarker(marker []byte) (accountMarker []byte, fullMarker []byte) + + // MarkerCompare compares a flat-state key against a generation progress + // marker. Returns the same semantics as bytes.Compare. Used by the + // disklayer.account/storage gating logic and by writeStates. + MarkerCompare(key []byte, marker []byte) int +} + +// merkleFlatCodec implements flatStateCodec for the keccak-keyed MPT flat +// state scheme. All methods are thin wrappers over rawdb accessors and +// existing helpers; this codec preserves the historical behavior bit-for-bit. +type merkleFlatCodec struct{} + +// Compile-time interface check. +var _ flatStateCodec = (*merkleFlatCodec)(nil) + +func (c *merkleFlatCodec) AccountKey(addr common.Address) common.Hash { + return crypto.Keccak256Hash(addr.Bytes()) +} + +func (c *merkleFlatCodec) StorageKey(addr common.Address, slot common.Hash) (common.Hash, common.Hash) { + return crypto.Keccak256Hash(addr.Bytes()), crypto.Keccak256Hash(slot.Bytes()) +} + +func (c *merkleFlatCodec) ReadAccount(db ethdb.KeyValueReader, key common.Hash) []byte { + return rawdb.ReadAccountSnapshot(db, key) +} + +func (c *merkleFlatCodec) ReadStorage(db ethdb.KeyValueReader, accountKey, storageKey common.Hash) []byte { + return rawdb.ReadStorageSnapshot(db, accountKey, storageKey) +} + +func (c *merkleFlatCodec) WriteAccount(batch ethdb.Batch, key common.Hash, blob []byte) { + rawdb.WriteAccountSnapshot(batch, key, blob) +} + +func (c *merkleFlatCodec) DeleteAccount(batch ethdb.Batch, key common.Hash) { + rawdb.DeleteAccountSnapshot(batch, key) +} + +func (c *merkleFlatCodec) WriteStorage(batch ethdb.Batch, accountKey, storageKey common.Hash, blob []byte) { + rawdb.WriteStorageSnapshot(batch, accountKey, storageKey, blob) +} + +func (c *merkleFlatCodec) DeleteStorage(batch ethdb.Batch, accountKey, storageKey common.Hash) { + rawdb.DeleteStorageSnapshot(batch, accountKey, storageKey) +} + +func (c *merkleFlatCodec) AccountCacheKey(key common.Hash) []byte { + // The historical merkle clean cache uses the bare 32-byte account hash. + // This is a slice into the caller's hash; callers must not retain it. + return key[:] +} + +func (c *merkleFlatCodec) StorageCacheKey(accountKey, storageKey common.Hash) []byte { + return storageKeySlice(accountKey, storageKey) +} + +func (c *merkleFlatCodec) AccountPrefix() []byte { + return rawdb.SnapshotAccountPrefix +} + +func (c *merkleFlatCodec) StoragePrefix() []byte { + return rawdb.SnapshotStoragePrefix +} + +func (c *merkleFlatCodec) AccountKeyLength() int { + return len(rawdb.SnapshotAccountPrefix) + common.HashLength +} + +func (c *merkleFlatCodec) StorageKeyLength() int { + return len(rawdb.SnapshotStoragePrefix) + 2*common.HashLength +} + +func (c *merkleFlatCodec) AccountPrefixSize() int { + return len(rawdb.SnapshotAccountPrefix) +} + +func (c *merkleFlatCodec) StoragePrefixSize() int { + return len(rawdb.SnapshotStoragePrefix) +} + +func (c *merkleFlatCodec) SplitMarker(marker []byte) ([]byte, []byte) { + var accMarker []byte + if len(marker) > 0 { + accMarker = marker[:common.HashLength] + } + return accMarker, marker +} + +func (c *merkleFlatCodec) MarkerCompare(key []byte, marker []byte) int { + return bytes.Compare(key, marker) +}