detect entry by tag

This commit is contained in:
Sina Mahmoodi 2026-05-07 13:54:21 +00:00
parent 15aff745b1
commit acb4e0001f
4 changed files with 246 additions and 76 deletions

View file

@ -528,7 +528,7 @@ func importHistory(ctx *cli.Context) error {
var (
format = ctx.String(utils.EraFormatFlag.Name)
from func(era.ReadAtSeekCloser) (era.Era, error)
from func(f era.ReadAtSeekCloser) (era.Era, error)
)
switch format {
case "era1", "era":

View file

@ -18,6 +18,7 @@ package execdb
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math/big"
@ -28,7 +29,10 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/internal/era"
"github.com/ethereum/go-ethereum/internal/era/e2store"
"github.com/ethereum/go-ethereum/rlp"
"github.com/golang/snappy"
)
func TestEre(t *testing.T) {
@ -166,14 +170,17 @@ func TestEre(t *testing.T) {
if e.Count() != uint64(totalBlocks) {
t.Fatalf("wrong block count: want %d, got %d", totalBlocks, e.Count())
}
// Verify component count: 4 when TD is stored (pre-merge or
// transition), 3 otherwise (pure post-merge).
wantComponents := uint64(3)
if tt.preMerge > 0 {
wantComponents = 4
// Verify the layout detected from on-disk type tags. Header,
// body, and receipts are always present; TD is only present
// when the epoch contains pre-merge blocks.
if !e.HasComponent(header) || !e.HasComponent(body) || !e.HasComponent(receipts) {
t.Fatalf("missing required component in layout %v", e.m.layout)
}
if e.m.components != wantComponents {
t.Fatalf("wrong component count: want %d, got %d", wantComponents, e.m.components)
if got, want := e.HasComponent(td), tt.preMerge > 0; got != want {
t.Fatalf("td component presence mismatch: want %v, got %v", want, got)
}
if e.HasComponent(proof) {
t.Fatalf("proof component should not be present in layout %v", e.m.layout)
}
// Verify accumulator in file.
@ -349,10 +356,118 @@ func TestInitialTD(t *testing.T) {
}
}
// TestDetectLayoutNoReceipts hand-builds an Ere file with the receipts slot
// replaced by a TotalDifficulty entry (the on-disk shape a "noreceipts"
// profile would take) and verifies the reader detects this from the e2store
// type tags rather than misreading TD as receipts. This is the core safety
// property of detectLayout — exercise it via From, where no filename is
// available.
func TestDetectLayoutNoReceipts(t *testing.T) {
t.Parallel()
dir := t.TempDir()
path := filepath.Join(dir, "synthetic.ere")
f, err := os.Create(path)
if err != nil {
t.Fatalf("create: %v", err)
}
w := e2store.NewWriter(f)
written := uint64(0)
writeEntry := func(typ uint16, data []byte) {
n, err := w.Write(typ, data)
if err != nil {
t.Fatalf("write type 0x%04x: %v", typ, err)
}
written += uint64(n)
}
var snappyBuf bytes.Buffer
writeSnappy := func(typ uint16, data []byte) {
snappyBuf.Reset()
sw := snappy.NewBufferedWriter(&snappyBuf)
if _, err := sw.Write(data); err != nil {
t.Fatalf("snappy write: %v", err)
}
if err := sw.Flush(); err != nil {
t.Fatalf("snappy flush: %v", err)
}
writeEntry(typ, snappyBuf.Bytes())
}
// Version
writeEntry(era.TypeVersion, nil)
// Block 0 components in order: header, body, td (no receipts).
headerBytes := mustEncode(&types.Header{Number: big.NewInt(0), Difficulty: big.NewInt(1)})
bodyBytes := mustEncode(&types.Body{})
tdLE := make([]byte, 32) // uint256(1) little-endian
tdLE[0] = 1
headerOff := written
writeSnappy(era.TypeCompressedHeader, headerBytes)
bodyOff := written
writeSnappy(era.TypeCompressedBody, bodyBytes)
tdOff := written
writeEntry(era.TypeTotalDifficulty, tdLE)
// Build the DynamicBlockIndex with 3 components per block, 1 block, and
// the third slot pointing at the TD entry rather than at receipts.
base := int64(written)
relative := func(absolute uint64) uint64 { return uint64(int64(absolute) - base) }
var indexBuf bytes.Buffer
writeU64 := func(v uint64) {
if err := binary.Write(&indexBuf, binary.LittleEndian, v); err != nil {
t.Fatalf("index write: %v", err)
}
}
writeU64(0) // starting block number
writeU64(relative(headerOff))
writeU64(relative(bodyOff))
writeU64(relative(tdOff))
writeU64(3) // component count
writeU64(1) // block count
writeEntry(era.TypeDynamicBlockIndex, indexBuf.Bytes())
if err := f.Close(); err != nil {
t.Fatalf("close: %v", err)
}
// Open via From — no filename is consulted, so the layout map is the
// only line of defence.
g, err := os.Open(path)
if err != nil {
t.Fatalf("reopen: %v", err)
}
t.Cleanup(func() { g.Close() })
e, err := From(g)
if err != nil {
t.Fatalf("From: %v", err)
}
defer e.Close()
era := e.(*Era)
if era.HasComponent(receipts) {
t.Errorf("receipts should not be reported as present in synthetic noreceipts file")
}
if !era.HasComponent(td) {
t.Errorf("td should be reported as present")
}
if got, want := era.m.layout[td], 2; got != want {
t.Errorf("td slot: want %d, got %d", want, got)
}
// Reading receipts must fail loudly, not silently decode TD bytes.
if _, err := era.GetRawReceiptsByNumber(0); err == nil {
t.Error("expected error when reading receipts from a noreceipts file")
}
}
// TestOpenRejectsNoreceiptsProfile verifies that Open() refuses to decode an
// Ere file whose filename declares the unsupported "noreceipts" profile. The
// positional reader can't safely interpret such a file because TD would be
// shifted into the receipts slot.
// Ere file whose filename declares the unsupported "noreceipts" profile. This
// is the defence-in-depth filename check; structural safety is provided by
// detectLayout (covered separately by TestDetectLayoutNoReceipts).
func TestOpenRejectsNoreceiptsProfile(t *testing.T) {
t.Parallel()

View file

@ -192,7 +192,7 @@ func (it *RawIterator) Next() bool {
}
// Check if TD component is present in this file (pre-merge or merge-transition epoch).
if int(td) < int(it.e.m.components) {
if it.e.HasComponent(td) {
tdOffset, err := it.e.tdOff(it.next)
if err != nil {
it.setErr(err)

View file

@ -18,6 +18,7 @@ package execdb
import (
"encoding/binary"
"errors"
"fmt"
"io"
"math/big"
@ -50,10 +51,10 @@ func Filename(network string, epoch int, lastBlockHash common.Hash) string {
return fmt.Sprintf("%s-%05d-%s-noproofs.ere", network, epoch, lastBlockHash.Hex()[2:10])
}
// Open accesses the era file. The path is used to parse the profile postfix
// (per the Ere spec filename convention); files written with the "noreceipts"
// profile are rejected because the positional index reader assumes receipts
// are present.
// Open accesses the era file at the given path. The basename is used to parse
// the profile postfix (per the Ere spec filename convention) as a defence-in-
// depth check; structural safety is enforced by detectLayout, which reads the
// e2store type tag at each index slot rather than trusting position.
func Open(path string) (*Era, error) {
if err := checkProfile(filepath.Base(path)); err != nil {
return nil, err
@ -62,12 +63,8 @@ func Open(path string) (*Era, error) {
if err != nil {
return nil, err
}
e := &Era{f: f, s: e2store.NewReader(f)}
if err := e.loadIndex(); err != nil {
f.Close()
return nil, err
}
if err := e.checkComponents(); err != nil {
e, err := from(f)
if err != nil {
f.Close()
return nil, err
}
@ -84,26 +81,30 @@ func (e *Era) Close() error {
return err
}
// From returns an Era backed by f. Since no filename is available, the profile
// cannot be inspected; the component count is still validated against the
// supported layouts (header, body, receipts, [td]).
// From returns an Era backed by f. Component layout is derived from the
// e2store type tags stored in the file itself, so callers do not need to
// supply a filename or profile.
func From(f era.ReadAtSeekCloser) (era.Era, error) {
e := &Era{f: f, s: e2store.NewReader(f)}
if err := e.loadIndex(); err != nil {
f.Close()
return nil, err
}
if err := e.checkComponents(); err != nil {
e, err := from(f)
if err != nil {
f.Close()
return nil, err
}
return e, nil
}
func from(f era.ReadAtSeekCloser) (*Era, error) {
e := &Era{f: f, s: e2store.NewReader(f)}
if err := e.loadIndex(); err != nil {
return nil, err
}
return e, nil
}
// checkProfile inspects the profile postfix(es) in an Ere filename and rejects
// any combination this reader can't safely decode. The reader maps components
// by fixed positions (header, body, receipts, td?, proof?), so a file written
// with the "noreceipts" profile would silently shift TD into the receipts slot.
// any combination this reader doesn't support. This is a best-effort, defence-
// in-depth check; the authoritative layout detection happens in detectLayout
// from the on-disk type tags.
//
// The Ere format itself does not require a particular filename, so this check
// is permissive about non-conforming names: validation only kicks in when a
@ -122,18 +123,6 @@ func checkProfile(name string) error {
return nil
}
// checkComponents verifies the file's component count matches what this reader
// supports. The reader assumes the fixed positional layout
// (header, body, receipts, td?, proof?), and the builder in this package only
// produces files with 3 (post-merge) or 4 (pre-merge / transition) components.
// Files with 2 (noreceipts) or 5 (proofs present) components are rejected.
func (e *Era) checkComponents() error {
if e.m.components < 3 || e.m.components > 4 {
return fmt.Errorf("unsupported Ere component count %d (reader expects header, body, receipts, and optional total difficulty)", e.m.components)
}
return nil
}
// Start retrieves the starting block number.
func (e *Era) Start() uint64 {
return e.m.start
@ -240,12 +229,19 @@ func (e *Era) GetRawReceiptsByNumber(blockNum uint64) ([]byte, error) {
return io.ReadAll(r)
}
// HasComponent reports whether the given component is recorded in the file's
// index, as detected from the on-disk e2store type tags.
func (e *Era) HasComponent(c componentType) bool {
_, ok := e.m.layout[c]
return ok
}
// InitialTD returns initial total difficulty before the difficulty of the
// first block of the Era is applied. Returns an error if TD is not available
// (e.g., post-merge epoch).
func (e *Era) InitialTD() (*big.Int, error) {
// Check if TD component exists.
if int(td) >= int(e.m.components) {
if !e.HasComponent(td) {
return nil, fmt.Errorf("total difficulty not available in this epoch")
}
@ -275,7 +271,8 @@ func (e *Era) Accumulator() (common.Hash, error) {
return common.BytesToHash(entry.Value), nil
}
// loadIndex loads in the index table containing all offsets and caches it.
// loadIndex loads in the index table trailer (start, count, component-count)
// and then derives the component→slot layout from the on-disk type tags.
func (e *Era) loadIndex() error {
var err error
e.m.length, err = e.f.Seek(0, io.SeekEnd)
@ -296,30 +293,68 @@ func (e *Era) loadIndex() error {
if err != nil {
return err
}
e.m.start = binary.LittleEndian.Uint64(b[:8])
layout, err := e.detectLayout()
if err != nil {
return err
}
e.m.layout = layout
return nil
}
// headerOff, bodyOff, receiptOff, and tdOff return the offsets of the respective components for a given block number.
func (e *Era) headerOff(num uint64) (int64, error) { return e.indexOffset(num, header) }
func (e *Era) bodyOff(num uint64) (int64, error) { return e.indexOffset(num, body) }
func (e *Era) receiptOff(num uint64) (int64, error) { return e.indexOffset(num, receipts) }
func (e *Era) tdOff(num uint64) (int64, error) { return e.indexOffset(num, td) }
// indexOffset calculates offset to a certain component for a block number within a file.
func (e *Era) indexOffset(n uint64, component componentType) (int64, error) {
if n < e.m.start || n >= e.m.start+e.m.count {
return 0, fmt.Errorf("block %d out of range [%d,%d)", n, e.m.start, e.m.start+e.m.count)
// detectLayout reads the e2store type tag at each component slot of the first
// block and builds a componentType→slot map. This makes the reader robust
// against profile variations: receipts, td, and proof can appear in any
// supported subset, and the slot positions are looked up by tag.
func (e *Era) detectLayout() (map[componentType]int, error) {
if e.m.count == 0 {
return nil, errors.New("Ere file contains no blocks")
}
if int(component) >= int(e.m.components) {
return 0, fmt.Errorf("component %d not present", component)
tagToComponent := map[uint16]componentType{
era.TypeCompressedHeader: header,
era.TypeCompressedBody: body,
era.TypeCompressedSlimReceipts: receipts,
era.TypeTotalDifficulty: td,
era.TypeProof: proof,
}
layout := make(map[componentType]int, e.m.components)
for slot := 0; slot < int(e.m.components); slot++ {
off, err := e.slotOffset(0, slot)
if err != nil {
return nil, fmt.Errorf("read slot %d offset: %w", slot, err)
}
typ, _, err := e.s.ReadMetadataAt(off)
if err != nil {
return nil, fmt.Errorf("read slot %d type tag: %w", slot, err)
}
comp, ok := tagToComponent[typ]
if !ok {
return nil, fmt.Errorf("unknown e2store type 0x%04x at index slot %d", typ, slot)
}
if existing, dup := layout[comp]; dup {
return nil, fmt.Errorf("duplicate component %d at slots %d and %d", comp, existing, slot)
}
layout[comp] = slot
}
if _, ok := layout[header]; !ok {
return nil, errors.New("Ere index has no header component")
}
if _, ok := layout[body]; !ok {
return nil, errors.New("Ere index has no body component")
}
return layout, nil
}
payloadlen := 8 + 8*e.m.count*e.m.components + 16 // 8 for start block, 8 per property per block, 16 for the number of properties and the number of blocks
// slotOffset returns the absolute file offset of the entry at the given slot
// of the given block index (0 = first block in file). It does no validation
// against the layout map and is intended for use by detectLayout and
// indexOffset.
func (e *Era) slotOffset(blockIdx uint64, slot int) (int64, error) {
payloadlen := 8 + 8*e.m.count*e.m.components + 16
indstart := e.m.length - int64(payloadlen) - 8
rec := (n-e.m.start)*e.m.components + uint64(component)
rec := blockIdx*e.m.components + uint64(slot)
pos := indstart + 8 + 8 + int64(rec*8)
var buf [8]byte
@ -330,23 +365,43 @@ func (e *Era) indexOffset(n uint64, component componentType) (int64, error) {
return int64(rel) + indstart, nil
}
// metadata contains the information about the era file that is written into the file.
type metadata struct {
start uint64 // start block number
count uint64 // number of blocks in the era
components uint64 // number of properties
length int64 // length of the file in bytes
// headerOff, bodyOff, receiptOff, and tdOff return the offsets of the respective components for a given block number.
func (e *Era) headerOff(num uint64) (int64, error) { return e.indexOffset(num, header) }
func (e *Era) bodyOff(num uint64) (int64, error) { return e.indexOffset(num, body) }
func (e *Era) receiptOff(num uint64) (int64, error) { return e.indexOffset(num, receipts) }
func (e *Era) tdOff(num uint64) (int64, error) { return e.indexOffset(num, td) }
// indexOffset calculates offset to a certain component for a block number
// within a file. The slot is resolved through the layout map detected at
// Open time, so files with optional components in any order are handled
// safely regardless of the on-disk position.
func (e *Era) indexOffset(n uint64, component componentType) (int64, error) {
if n < e.m.start || n >= e.m.start+e.m.count {
return 0, fmt.Errorf("block %d out of range [%d,%d)", n, e.m.start, e.m.start+e.m.count)
}
slot, ok := e.m.layout[component]
if !ok {
return 0, fmt.Errorf("component %d not present in this Ere file", component)
}
return e.slotOffset(n-e.m.start, slot)
}
// componentType represents the integer form of a specific type that can be present in the era file.
// metadata contains the information about the era file that is written into the file.
type metadata struct {
start uint64 // start block number
count uint64 // number of blocks in the era
components uint64 // number of slots per block in the index
layout map[componentType]int // component → slot index, derived from on-disk type tags
length int64 // length of the file in bytes
}
// componentType identifies a kind of per-block entry (header, body, etc.).
type componentType int
// header, body, receipts, td, and proof are the different types of components
// that can be present in the era file. The Ere spec defines receipts, td, and
// proof as independently optional, but this reader maps components to their
// position in the index using this fixed enum. That positional mapping is only
// safe as long as receipts are present (no "noreceipts" profile) — Open() and
// From() enforce this via checkProfile and checkComponents.
// The Ere spec defines receipts, td, and proof as independently optional. The
// reader resolves a component to its actual slot via the metadata.layout map,
// which is built at Open time from the e2store type tag of each slot — so the
// position of a component within the index is never assumed.
const (
header componentType = iota
body