go-ethereum/triedb/pathdb/flat_codec.go

// Copyright 2026 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

package pathdb

import (
	"bytes"

	"github.com/VictoriaMetrics/fastcache"
	"github.com/ethereum/go-ethereum/common"
	"github.com/ethereum/go-ethereum/core/rawdb"
	"github.com/ethereum/go-ethereum/crypto"
	"github.com/ethereum/go-ethereum/ethdb"
)

// flatStateCodec abstracts the trie-specific aspects of flat-state storage:
// key derivation from (address, slot), persistence of account/storage entries
// to disk, clean-cache key disambiguation, and iterator construction.
//
// It mirrors the existing nodeHasher pattern (a hot, small interface plugged
// into the Database struct), and complements the Hasher interface from
// state-hasher-iface-2 which abstracts trie-side hashing/commit.
//
// Two implementations are provided:
//   - merkleFlatCodec: keccak-keyed flat state, the historical MPT scheme.
//   - bintrieFlatCodec: per-stem flat state for the unified binary trie. Added
//     in a later commit. Until then, only merkleFlatCodec is wired.
//
// All methods MUST be safe for concurrent use; the codec is shared across
// goroutines (the disk layer's read path, the buffer flush path, and the
// background generator may all call into it simultaneously).
type flatStateCodec interface {
	// AccountKey derives the flat-state lookup key for an account.
	//
	// For Merkle: returns keccak256(addr).
	// For Bintrie: returns the stem of the BasicData leaf (a 31-byte stem
	// zero-padded into a common.Hash). Subsequent reads of the stem blob
	// extract the BasicData and CodeHash leaves at offsets 0 and 1.
	AccountKey(addr common.Address) common.Hash

	// StorageKey derives the flat-state lookup keys for a storage slot.
	//
	// The first return value carries the account-side hash (e.g.
	// keccak256(addr) for Merkle, or zero for bintrie which has no per-account
	// grouping). The second return value carries the slot-side hash
	// (keccak256(slot) for Merkle, or the full bintrie key for bintrie).
	//
	// Read/Write methods receive the same pair, so the codec implementation
	// is the only place that has to interpret them.
	StorageKey(addr common.Address, slot common.Hash) (accountKey common.Hash, storageKey common.Hash)

	// ReadAccount loads an account flat-state entry from persistent storage.
	// Returns nil if the entry is not present.
	ReadAccount(db ethdb.KeyValueReader, key common.Hash) []byte

	// ReadStorage loads a storage flat-state entry from persistent storage.
	// Returns nil if the entry is not present.
	ReadStorage(db ethdb.KeyValueReader, accountKey common.Hash, storageKey common.Hash) []byte

	// WriteAccount persists an account flat-state entry into the supplied batch.
	WriteAccount(batch ethdb.Batch, key common.Hash, blob []byte)

	// DeleteAccount removes an account flat-state entry via the supplied batch.
	DeleteAccount(batch ethdb.Batch, key common.Hash)

	// WriteStorage persists a storage flat-state entry into the supplied batch.
	WriteStorage(batch ethdb.Batch, accountKey common.Hash, storageKey common.Hash, blob []byte)

	// DeleteStorage removes a storage flat-state entry via the supplied batch.
	DeleteStorage(batch ethdb.Batch, accountKey common.Hash, storageKey common.Hash)

	// AccountCacheKey returns the byte key used in the disk-layer clean state
	// cache (fastcache) for an account entry. The cache is shared between
	// account and storage lookups, so codecs must ensure their key spaces are
	// disjoint to avoid collisions.
	AccountCacheKey(key common.Hash) []byte

	// StorageCacheKey returns the byte key used in the disk-layer clean state
	// cache (fastcache) for a storage entry. See AccountCacheKey for the
	// disjointness requirement.
	StorageCacheKey(accountKey common.Hash, storageKey common.Hash) []byte

	// AccountPrefix returns the rawdb key prefix used by account entries on
	// disk. Used by the generator to set up its account-range iterator.
	AccountPrefix() []byte

	// StoragePrefix returns the rawdb key prefix used by storage entries on
	// disk. Used by the generator to set up its storage-range iterator.
	StoragePrefix() []byte

	// AccountKeyLength returns the expected total length (prefix + payload)
	// of an on-disk account key. The generator uses this to filter spurious
	// matches when iterating with a length-bounded iterator.
	AccountKeyLength() int

	// StorageKeyLength returns the expected total length (prefix + payload)
	// of an on-disk storage key. See AccountKeyLength.
	StorageKeyLength() int

	// AccountPrefixSize returns the per-entry on-disk overhead used by the
	// stateSet to estimate flush sizes. This is just the prefix length for
	// merkle codecs; bintrie codecs may use a different convention.
	AccountPrefixSize() int

	// StoragePrefixSize returns the per-entry on-disk overhead for storage
	// entries.
	StoragePrefixSize() int

	// SplitMarker decomposes a generation progress marker into the account
	// portion and the full marker. For Merkle the account part is the first
	// 32 bytes; for bintrie both halves are the same single 32-byte stem.
	SplitMarker(marker []byte) (accountMarker []byte, fullMarker []byte)

	// MarkerCompare compares a flat-state key against a generation progress
	// marker. Returns the same semantics as bytes.Compare. Used by the
	// disklayer.account/storage gating logic and by writeStates.
	MarkerCompare(key []byte, marker []byte) int

	// Flush drains all pending mutations from the in-memory accountData and
	// storageData maps into the supplied batch and updates the clean cache
	// in lockstep. The codec controls iteration order, key derivation, and
	// any aggregation that may be required (e.g. the bintrie codec must
	// merge per-offset writes into per-stem read-modify-writes to avoid
	// quadratic disk reads).
	//
	// Entries strictly past genMarker (per the codec's MarkerCompare
	// semantics) are skipped because they will be regenerated by the
	// background snapshot generator.
	//
	// Returns (account-entry count, storage-entry count) for metric
	// reporting; the merkle codec reports one per map entry, while the
	// bintrie codec reports one per logical offset write (so the metrics
	// remain comparable across schemes).
	Flush(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int)
}

// merkleFlatCodec implements flatStateCodec for the keccak-keyed MPT flat
// state scheme. All methods are thin wrappers over rawdb accessors and
// existing helpers; this codec preserves the historical behavior bit-for-bit.
type merkleFlatCodec struct{}

// Compile-time interface check.
var _ flatStateCodec = (*merkleFlatCodec)(nil)

func (c *merkleFlatCodec) AccountKey(addr common.Address) common.Hash {
	return crypto.Keccak256Hash(addr.Bytes())
}

func (c *merkleFlatCodec) StorageKey(addr common.Address, slot common.Hash) (common.Hash, common.Hash) {
	return crypto.Keccak256Hash(addr.Bytes()), crypto.Keccak256Hash(slot.Bytes())
}

func (c *merkleFlatCodec) ReadAccount(db ethdb.KeyValueReader, key common.Hash) []byte {
	return rawdb.ReadAccountSnapshot(db, key)
}

func (c *merkleFlatCodec) ReadStorage(db ethdb.KeyValueReader, accountKey, storageKey common.Hash) []byte {
	return rawdb.ReadStorageSnapshot(db, accountKey, storageKey)
}

func (c *merkleFlatCodec) WriteAccount(batch ethdb.Batch, key common.Hash, blob []byte) {
	rawdb.WriteAccountSnapshot(batch, key, blob)
}

func (c *merkleFlatCodec) DeleteAccount(batch ethdb.Batch, key common.Hash) {
	rawdb.DeleteAccountSnapshot(batch, key)
}

func (c *merkleFlatCodec) WriteStorage(batch ethdb.Batch, accountKey, storageKey common.Hash, blob []byte) {
	rawdb.WriteStorageSnapshot(batch, accountKey, storageKey, blob)
}

func (c *merkleFlatCodec) DeleteStorage(batch ethdb.Batch, accountKey, storageKey common.Hash) {
	rawdb.DeleteStorageSnapshot(batch, accountKey, storageKey)
}

func (c *merkleFlatCodec) AccountCacheKey(key common.Hash) []byte {
	// The historical merkle clean cache uses the bare 32-byte account hash.
	// This is a slice into the caller's hash; callers must not retain it.
	return key[:]
}

func (c *merkleFlatCodec) StorageCacheKey(accountKey, storageKey common.Hash) []byte {
	return storageKeySlice(accountKey, storageKey)
}

func (c *merkleFlatCodec) AccountPrefix() []byte {
	return rawdb.SnapshotAccountPrefix
}

func (c *merkleFlatCodec) StoragePrefix() []byte {
	return rawdb.SnapshotStoragePrefix
}

func (c *merkleFlatCodec) AccountKeyLength() int {
	return len(rawdb.SnapshotAccountPrefix) + common.HashLength
}

func (c *merkleFlatCodec) StorageKeyLength() int {
	return len(rawdb.SnapshotStoragePrefix) + 2*common.HashLength
}

func (c *merkleFlatCodec) AccountPrefixSize() int {
	return len(rawdb.SnapshotAccountPrefix)
}

func (c *merkleFlatCodec) StoragePrefixSize() int {
	return len(rawdb.SnapshotStoragePrefix)
}

func (c *merkleFlatCodec) SplitMarker(marker []byte) ([]byte, []byte) {
	var accMarker []byte
	if len(marker) > 0 {
		accMarker = marker[:common.HashLength]
	}
	return accMarker, marker
}

func (c *merkleFlatCodec) MarkerCompare(key []byte, marker []byte) int {
	return bytes.Compare(key, marker)
}

// Flush drains the supplied account/storage maps into the batch using the
// historical merkle per-entry layout: one rawdb write per accountData entry
// and one per storage slot. Entries past the genMarker are skipped (the
// generator will fill them in). The clean cache is kept in sync with each
// write so subsequent reads do not stale.
//
// This is the implementation that previously lived directly in writeStates.
// It has been moved into the codec so the bintrie codec can supply its own
// per-stem aggregating implementation alongside this one.
func (c *merkleFlatCodec) Flush(batch ethdb.Batch, genMarker []byte, accountData map[common.Hash][]byte, storageData map[common.Hash]map[common.Hash][]byte, clean *fastcache.Cache) (int, int) {
	var (
		accounts int
		slots    int
	)
	for addrHash, blob := range accountData {
		// Skip any account not yet covered by the snapshot. The account
		// at the generation marker position (addrHash == genMarker[:common.HashLength])
		// should still be updated, as it would be skipped in the next
		// generation cycle.
		if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 {
			continue
		}
		accounts++
		cacheKey := c.AccountCacheKey(addrHash)
		if len(blob) == 0 {
			c.DeleteAccount(batch, addrHash)
			if clean != nil {
				clean.Set(cacheKey, nil)
			}
		} else {
			c.WriteAccount(batch, addrHash, blob)
			if clean != nil {
				clean.Set(cacheKey, blob)
			}
		}
	}
	for addrHash, storages := range storageData {
		// Skip any account not covered yet by the snapshot
		if genMarker != nil && bytes.Compare(addrHash[:], genMarker) > 0 {
			continue
		}
		midAccount := genMarker != nil && bytes.Equal(addrHash[:], genMarker[:common.HashLength])

		for storageHash, blob := range storages {
			// Skip any storage slot not yet covered by the snapshot. The storage slot
			// at the generation marker position (addrHash == genMarker[:common.HashLength]
			// and storageHash == genMarker[common.HashLength:]) should still be updated,
			// as it would be skipped in the next generation cycle.
			if midAccount && bytes.Compare(storageHash[:], genMarker[common.HashLength:]) > 0 {
				continue
			}
			slots++
			cacheKey := c.StorageCacheKey(addrHash, storageHash)
			if len(blob) == 0 {
				c.DeleteStorage(batch, addrHash, storageHash)
				if clean != nil {
					clean.Set(cacheKey, nil)
				}
			} else {
				c.WriteStorage(batch, addrHash, storageHash, blob)
				if clean != nil {
					clean.Set(cacheKey, blob)
				}
			}
		}
	}
	return accounts, slots
}