diff --git a/cmd/era/main.go b/cmd/era/main.go index 43279e7001..3abe54a8b4 100644 --- a/cmd/era/main.go +++ b/cmd/era/main.go @@ -183,11 +183,11 @@ func open(ctx *cli.Context, epoch uint64) (era.Era, error) { return openByPath(path) } -// openByPath tries to open a single file as either eraE or era1 based on extension, +// openByPath tries to open a single file as either Ere or Era1 based on extension, // falling back to the other reader if needed. func openByPath(path string) (era.Era, error) { switch strings.ToLower(filepath.Ext(path)) { - case ".erae": + case ".ere": if e, err := execdb.Open(path); err != nil { return nil, err } else { @@ -229,7 +229,7 @@ func verify(ctx *cli.Context) error { // Build the verification list respecting the rule: // era1: must have accumulator, always verify - // erae: verify only if accumulator exists (pre-merge) + // ere: verify only if accumulator exists (pre-merge / transition) // Build list of files to verify. verify := make([]string, 0, len(entries)) @@ -251,15 +251,15 @@ func verify(ctx *cli.Context) error { } verify = append(verify, path) - case ".erae": + case ".ere": e, err := execdb.Open(path) if err != nil { - return fmt.Errorf("error opening erae file %s: %w", name, err) + return fmt.Errorf("error opening ere file %s: %w", name, err) } _, accErr := e.Accumulator() e.Close() if accErr == nil { - verify = append(verify, path) // pre-merge only + verify = append(verify, path) // pre-merge / transition only } default: return fmt.Errorf("unsupported era file: %s", name) diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index 98ed348d8c..be3cdd3e92 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -528,15 +528,15 @@ func importHistory(ctx *cli.Context) error { var ( format = ctx.String(utils.EraFormatFlag.Name) - from func(era.ReadAtSeekCloser) (era.Era, error) + from func(f era.ReadAtSeekCloser) (era.Era, error) ) switch format { case "era1", "era": from = onedb.From - case "erae": + case "ere": from = execdb.From default: - return fmt.Errorf("unknown --era.format %q (expected 'era1' or 'erae')", format) + return fmt.Errorf("unknown --era.format %q (expected 'era1' or 'ere')", format) } if err := utils.ImportHistory(chain, dir, network, from); err != nil { return err @@ -582,11 +582,11 @@ func exportHistory(ctx *cli.Context) error { case "era1", "era": newBuilder = func(w io.Writer) era.Builder { return onedb.NewBuilder(w) } filename = func(network string, epoch int, root common.Hash) string { return onedb.Filename(network, epoch, root) } - case "erae": + case "ere": newBuilder = func(w io.Writer) era.Builder { return execdb.NewBuilder(w) } filename = func(network string, epoch int, root common.Hash) string { return execdb.Filename(network, epoch, root) } default: - return fmt.Errorf("unknown archive format %q (use 'era1' or 'erae')", format) + return fmt.Errorf("unknown archive format %q (use 'era1' or 'ere')", format) } if err := utils.ExportHistory(chain, dir, uint64(first), uint64(last), newBuilder, filename); err != nil { utils.Fatalf("Export error: %v\n", err) diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index c41cf4ee40..8a7ff8f350 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -1110,7 +1110,7 @@ Please note that --` + MetricsHTTPFlag.Name + ` must be set to start the server. // Era flags are a group of flags related to the era archive format. EraFormatFlag = &cli.StringFlag{ Name: "era.format", - Usage: "Archive format: 'era1' or 'erae'", + Usage: "Archive format: 'era1' or 'ere'", } ) diff --git a/cmd/utils/history_test.go b/cmd/utils/history_test.go index 6631946129..56375f9ff5 100644 --- a/cmd/utils/history_test.go +++ b/cmd/utils/history_test.go @@ -53,7 +53,7 @@ func TestHistoryImportAndExport(t *testing.T) { from func(f era.ReadAtSeekCloser) (era.Era, error) }{ {"era1", onedb.NewBuilder, onedb.Filename, onedb.From}, - {"erae", execdb.NewBuilder, execdb.Filename, execdb.From}, + {"ere", execdb.NewBuilder, execdb.Filename, execdb.From}, } { t.Run(tt.name, func(t *testing.T) { var ( diff --git a/internal/era/era.go b/internal/era/era.go index a3c8465bc4..0aae75e4bb 100644 --- a/internal/era/era.go +++ b/internal/era/era.go @@ -29,7 +29,7 @@ import ( "github.com/ethereum/go-ethereum/core/types" ) -// Type constants for the e2store entries in the Era1 and EraE formats. +// Type constants for the e2store entries in the Era1 and Ere formats. var ( TypeVersion uint16 = 0x3265 TypeCompressedHeader uint16 = 0x03 @@ -40,10 +40,9 @@ var ( TypeCompressedSlimReceipts uint16 = 0x0a // uses eth/69 encoding TypeProof uint16 = 0x0b TypeBlockIndex uint16 = 0x3266 - TypeComponentIndex uint16 = 0x3267 + TypeDynamicBlockIndex uint16 = 0x3267 MaxSize = 8192 - // headerSize uint64 = 8 ) type ReadAtSeekCloser interface { @@ -93,7 +92,7 @@ type Builder interface { // Finalize writes all collected entries and returns the epoch identifier. // For Era1 (onedb): returns the accumulator root. - // For EraE (execdb): returns the last block hash. + // For Ere (execdb): returns the last block hash. Finalize() (common.Hash, error) // Accumulator returns the accumulator root after Finalize has been called. @@ -115,7 +114,7 @@ type Era interface { } // ReadDir reads all the era files in a directory for a given network. -// Format: --.erae or --.era1 +// Format: --(-)*.ere or --.era1 func ReadDir(dir, network string) ([]string, error) { entries, err := os.ReadDir(dir) @@ -129,14 +128,16 @@ func ReadDir(dir, network string) ([]string, error) { ) for _, entry := range entries { ext := path.Ext(entry.Name()) - if ext != ".erae" && ext != ".era1" { + if ext != ".ere" && ext != ".era1" { continue } if dirType == "" { dirType = ext } parts := strings.Split(entry.Name(), "-") - if len(parts) != 3 || parts[0] != network { + // Ere files may carry an optional profile postfix (e.g. "-noproofs"), + // so the filename has at least 3 dash-separated parts. + if len(parts) < 3 || parts[0] != network { // Invalid era filename, skip. continue } diff --git a/internal/era/execdb/builder.go b/internal/era/execdb/builder.go index 6246b9caae..4c656ab2e0 100644 --- a/internal/era/execdb/builder.go +++ b/internal/era/execdb/builder.go @@ -16,40 +16,44 @@ package execdb -// EraE file format specification. +// Ere file format specification. +// +// See https://github.com/eth-clients/e2store-format-specs/blob/main/formats/ere.md. // // The format can be summarized with the following expression: // -// eraE := Version | CompressedHeader* | CompressedBody* | CompressedSlimReceipts* | TotalDifficulty* | other-entries* | Accumulator? | ComponentIndex +// ere := Version | CompressedHeader+ | CompressedBody+ | CompressedSlimReceipts* | Proof* | TotalDifficulty* | other-entries* | Accumulator? | DynamicBlockIndex // // Each basic element is its own e2store entry: // -// Version = { type: 0x3265, data: nil } -// CompressedHeader = { type: 0x03, data: snappyFramed(rlp(header)) } -// CompressedBody = { type: 0x04, data: snappyFramed(rlp(body)) } -// CompressedSlimReceipts = { type: 0x0a, data: snappyFramed(rlp([tx-type, post-state-or-status, cumulative-gas, logs])) } -// TotalDifficulty = { type: 0x06, data: uint256 (header.total_difficulty) } -// AccumulatorRoot = { type: 0x07, data: hash_tree_root(List(HeaderRecord, 8192)) } -// ComponentIndex = { type: 0x3267, data: component-index } +// Version = { type: [0x65, 0x32], data: nil } +// CompressedHeader = { type: [0x03, 0x00], data: snappyFramed(rlp(header)) } +// CompressedBody = { type: [0x04, 0x00], data: snappyFramed(rlp(body)) } +// CompressedSlimReceipts = { type: [0x0a, 0x00], data: snappyFramed(rlp([tx-type, post-state-or-status, cumulative-gas, logs])) } +// TotalDifficulty = { type: [0x06, 0x00], data: uint256(header.total_difficulty) } +// Proof = { type: [0x0b, 0x00], data: snappyFramed(rlp([proof-type, ssz(proof)])) } +// AccumulatorRoot = { type: [0x07, 0x00], data: hash_tree_root(List(HeaderRecord, 8192)) } +// DynamicBlockIndex = { type: [0x67, 0x32], data: block-index } // // Notes: // - TotalDifficulty is present for pre-merge and merge transition epochs. // For pure post-merge epochs, TotalDifficulty is omitted entirely. // - In merge transition epochs, post-merge blocks store the final total // difficulty (the TD at which the merge occurred). -// - AccumulatorRoot is only written for pre-merge epochs. +// - AccumulatorRoot is only written for pre-merge or transition epochs. // - HeaderRecord is defined in the Portal Network specification. -// - Proofs (type 0x09) are defined in the spec but not yet supported in this implementation. +// - Proof entries are recommended by the spec but not produced by this +// implementation; files written here use the "noproofs" profile postfix. // -// ComponentIndex stores relative offsets to each block's components: +// DynamicBlockIndex stores relative offsets to each block's components: // -// component-index := starting-number | indexes | indexes | ... | component-count | count -// indexes := header-offset | body-offset | receipts-offset | td-offset? +// block-index := starting-number | indexes | indexes | ... | component-count | count +// indexes := header-index | body-index | receipts-index? | difficulty-index? | proof-index? // // All values are little-endian uint64. // // Due to the accumulator size limit of 8192, the maximum number of blocks in an -// EraE file is also 8192. +// Ere file is also 8192. import ( "bytes" @@ -67,7 +71,7 @@ import ( "github.com/golang/snappy" ) -// Builder is used to build an EraE e2store file. It collects block entries and +// Builder is used to build an Ere e2store file. It collects block entries and // writes them to the underlying e2store.Writer. type Builder struct { w *e2store.Writer @@ -326,7 +330,7 @@ func (b *Builder) writeIndex(o *offsets) error { write(uint64(componentCount)) write(uint64(count)) - n, err := b.w.Write(era.TypeComponentIndex, buf.Bytes()) + n, err := b.w.Write(era.TypeDynamicBlockIndex, buf.Bytes()) b.written += uint64(n) return err } diff --git a/internal/era/execdb/era_test.go b/internal/era/execdb/era_test.go index f66931b9ed..1b78b61541 100644 --- a/internal/era/execdb/era_test.go +++ b/internal/era/execdb/era_test.go @@ -18,19 +18,24 @@ package execdb import ( "bytes" + "encoding/binary" "fmt" "io" "math/big" "os" + "path/filepath" "slices" "testing" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" + "github.com/ethereum/go-ethereum/internal/era" + "github.com/ethereum/go-ethereum/internal/era/e2store" "github.com/ethereum/go-ethereum/rlp" + "github.com/golang/snappy" ) -func TestEraE(t *testing.T) { +func TestEre(t *testing.T) { t.Parallel() tests := []struct { @@ -74,7 +79,7 @@ func TestEraE(t *testing.T) { t.Run(tt.name, func(t *testing.T) { t.Parallel() - f, err := os.CreateTemp(t.TempDir(), "erae-test") + f, err := os.CreateTemp(t.TempDir(), "ere-test") if err != nil { t.Fatalf("error creating temp file: %v", err) } @@ -165,6 +170,18 @@ func TestEraE(t *testing.T) { if e.Count() != uint64(totalBlocks) { t.Fatalf("wrong block count: want %d, got %d", totalBlocks, e.Count()) } + // Verify the layout detected from on-disk type tags. Header, + // body, and receipts are always present; TD is only present + // when the epoch contains pre-merge blocks. + if !e.HasComponent(header) || !e.HasComponent(body) || !e.HasComponent(receipts) { + t.Fatalf("missing required component in layout %v", e.m.layout) + } + if got, want := e.HasComponent(td), tt.preMerge > 0; got != want { + t.Fatalf("td component presence mismatch: want %v, got %v", want, got) + } + if e.HasComponent(proof) { + t.Fatalf("proof component should not be present in layout %v", e.m.layout) + } // Verify accumulator in file. if tt.accumulator { @@ -295,7 +312,7 @@ func TestEraE(t *testing.T) { func TestInitialTD(t *testing.T) { t.Parallel() - f, err := os.CreateTemp(t.TempDir(), "erae-initial-td-test") + f, err := os.CreateTemp(t.TempDir(), "ere-initial-td-test") if err != nil { t.Fatalf("error creating temp file: %v", err) } @@ -339,6 +356,148 @@ func TestInitialTD(t *testing.T) { } } +// TestDetectLayoutNoReceipts hand-builds an Ere file with the receipts slot +// replaced by a TotalDifficulty entry (the on-disk shape a "noreceipts" +// profile would take) and verifies the reader detects this from the e2store +// type tags rather than misreading TD as receipts. This is the core safety +// property of detectLayout — exercise it via From, where no filename is +// available. +func TestDetectLayoutNoReceipts(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + path := filepath.Join(dir, "synthetic.ere") + f, err := os.Create(path) + if err != nil { + t.Fatalf("create: %v", err) + } + + w := e2store.NewWriter(f) + written := uint64(0) + writeEntry := func(typ uint16, data []byte) { + n, err := w.Write(typ, data) + if err != nil { + t.Fatalf("write type 0x%04x: %v", typ, err) + } + written += uint64(n) + } + + var snappyBuf bytes.Buffer + writeSnappy := func(typ uint16, data []byte) { + snappyBuf.Reset() + sw := snappy.NewBufferedWriter(&snappyBuf) + if _, err := sw.Write(data); err != nil { + t.Fatalf("snappy write: %v", err) + } + if err := sw.Flush(); err != nil { + t.Fatalf("snappy flush: %v", err) + } + writeEntry(typ, snappyBuf.Bytes()) + } + + // Version + writeEntry(era.TypeVersion, nil) + + // Block 0 components in order: header, body, td (no receipts). + headerBytes := mustEncode(&types.Header{Number: big.NewInt(0), Difficulty: big.NewInt(1)}) + bodyBytes := mustEncode(&types.Body{}) + tdLE := make([]byte, 32) // uint256(1) little-endian + tdLE[0] = 1 + + headerOff := written + writeSnappy(era.TypeCompressedHeader, headerBytes) + bodyOff := written + writeSnappy(era.TypeCompressedBody, bodyBytes) + tdOff := written + writeEntry(era.TypeTotalDifficulty, tdLE) + + // Build the DynamicBlockIndex with 3 components per block, 1 block, and + // the third slot pointing at the TD entry rather than at receipts. + base := int64(written) + relative := func(absolute uint64) uint64 { return uint64(int64(absolute) - base) } + + var indexBuf bytes.Buffer + writeU64 := func(v uint64) { + if err := binary.Write(&indexBuf, binary.LittleEndian, v); err != nil { + t.Fatalf("index write: %v", err) + } + } + writeU64(0) // starting block number + writeU64(relative(headerOff)) + writeU64(relative(bodyOff)) + writeU64(relative(tdOff)) + writeU64(3) // component count + writeU64(1) // block count + writeEntry(era.TypeDynamicBlockIndex, indexBuf.Bytes()) + + if err := f.Close(); err != nil { + t.Fatalf("close: %v", err) + } + + // Open via From — no filename is consulted, so the layout map is the + // only line of defence. + g, err := os.Open(path) + if err != nil { + t.Fatalf("reopen: %v", err) + } + t.Cleanup(func() { g.Close() }) + e, err := From(g) + if err != nil { + t.Fatalf("From: %v", err) + } + defer e.Close() + ere := e.(*Era) + + if ere.HasComponent(receipts) { + t.Errorf("receipts should not be reported as present in synthetic noreceipts file") + } + if !ere.HasComponent(td) { + t.Errorf("td should be reported as present") + } + if got, want := ere.m.layout[td], 2; got != want { + t.Errorf("td slot: want %d, got %d", want, got) + } + + // Reading receipts must fail loudly, not silently decode TD bytes. + if _, err := ere.GetRawReceiptsByNumber(0); err == nil { + t.Error("expected error when reading receipts from a noreceipts file") + } +} + +// TestOpenRejectsNoreceiptsProfile verifies that Open() refuses to decode an +// Ere file whose filename declares the unsupported "noreceipts" profile. This +// is the defence-in-depth filename check; structural safety is provided by +// detectLayout (covered separately by TestDetectLayoutNoReceipts). +func TestOpenRejectsNoreceiptsProfile(t *testing.T) { + t.Parallel() + + dir := t.TempDir() + + // Build a valid Ere file with default-profile contents directly at the + // noreceipts path so Open() rejects it on the filename alone. + path := filepath.Join(dir, "mainnet-00000-deadbeef-noreceipts.ere") + f, err := os.Create(path) + if err != nil { + t.Fatalf("create file: %v", err) + } + builder := NewBuilder(f) + header := mustEncode(&types.Header{Number: big.NewInt(0), Difficulty: big.NewInt(1)}) + body := mustEncode(&types.Body{}) + receipts := mustEncode([]types.SlimReceipt{}) + if err := builder.AddRLP(header, body, receipts, 0, common.Hash{0}, big.NewInt(1), big.NewInt(1)); err != nil { + t.Fatalf("AddRLP: %v", err) + } + if _, err := builder.Finalize(); err != nil { + t.Fatalf("Finalize: %v", err) + } + if err := f.Close(); err != nil { + t.Fatalf("close: %v", err) + } + if _, err := Open(path); err == nil { + t.Fatal("expected Open to reject noreceipts profile") + } +} + func mustEncode(obj any) []byte { b, err := rlp.EncodeToBytes(obj) if err != nil { diff --git a/internal/era/execdb/iterator.go b/internal/era/execdb/iterator.go index 8d17ac00a9..6b7b34241a 100644 --- a/internal/era/execdb/iterator.go +++ b/internal/era/execdb/iterator.go @@ -192,7 +192,7 @@ func (it *RawIterator) Next() bool { } // Check if TD component is present in this file (pre-merge or merge-transition epoch). - if int(td) < int(it.e.m.components) { + if it.e.HasComponent(td) { tdOffset, err := it.e.tdOff(it.next) if err != nil { it.setErr(err) diff --git a/internal/era/execdb/reader.go b/internal/era/execdb/reader.go index d0aaad1748..a8fbb67315 100644 --- a/internal/era/execdb/reader.go +++ b/internal/era/execdb/reader.go @@ -18,11 +18,14 @@ package execdb import ( "encoding/binary" + "errors" "fmt" "io" "math/big" "os" + "path/filepath" "slices" + "strings" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/core/types" @@ -39,20 +42,29 @@ type Era struct { m metadata // metadata for the Era file } -// Filename returns a recognizable filename for an EraE file. +// Filename returns a recognizable filename for an Ere file. // The filename uses the last block hash to uniquely identify the epoch's content. +// +// Files produced by this builder do not include Proof entries, so the +// "noproofs" profile postfix is appended per the Ere spec. func Filename(network string, epoch int, lastBlockHash common.Hash) string { - return fmt.Sprintf("%s-%05d-%s.erae", network, epoch, lastBlockHash.Hex()[2:10]) + return fmt.Sprintf("%s-%05d-%s-noproofs.ere", network, epoch, lastBlockHash.Hex()[2:10]) } -// Open accesses the era file. +// Open accesses the era file at the given path. The basename is used to parse +// the profile postfix (per the Ere spec filename convention) as a defence-in- +// depth check; structural safety is enforced by detectLayout, which reads the +// e2store type tag at each index slot rather than trusting position. func Open(path string) (*Era, error) { + if err := checkProfile(filepath.Base(path)); err != nil { + return nil, err + } f, err := os.Open(path) if err != nil { return nil, err } - e := &Era{f: f, s: e2store.NewReader(f)} - if err := e.loadIndex(); err != nil { + e, err := from(f) + if err != nil { f.Close() return nil, err } @@ -69,16 +81,48 @@ func (e *Era) Close() error { return err } -// From returns an Era backed by f. +// From returns an Era backed by f. Component layout is derived from the +// e2store type tags stored in the file itself, so callers do not need to +// supply a filename or profile. func From(f era.ReadAtSeekCloser) (era.Era, error) { - e := &Era{f: f, s: e2store.NewReader(f)} - if err := e.loadIndex(); err != nil { + e, err := from(f) + if err != nil { f.Close() return nil, err } return e, nil } +func from(f era.ReadAtSeekCloser) (*Era, error) { + e := &Era{f: f, s: e2store.NewReader(f)} + if err := e.loadIndex(); err != nil { + return nil, err + } + return e, nil +} + +// checkProfile inspects the profile postfix(es) in an Ere filename and rejects +// any combination this reader doesn't support. This is a best-effort, defence- +// in-depth check; the authoritative layout detection happens in detectLayout +// from the on-disk type tags. +// +// The Ere format itself does not require a particular filename, so this check +// is permissive about non-conforming names: validation only kicks in when a +// profile postfix is actually present. +func checkProfile(name string) error { + name = strings.TrimSuffix(name, ".ere") + parts := strings.Split(name, "-") + if len(parts) <= 3 { + return nil // no profile postfix to validate + } + for _, p := range parts[3:] { + if p == "noreceipts" { + return fmt.Errorf("Ere file %q uses the noreceipts profile, which is not supported", name) + } + } + return nil +} + // Start retrieves the starting block number. func (e *Era) Start() uint64 { return e.m.start @@ -185,12 +229,19 @@ func (e *Era) GetRawReceiptsByNumber(blockNum uint64) ([]byte, error) { return io.ReadAll(r) } +// HasComponent reports whether the given component is recorded in the file's +// index, as detected from the on-disk e2store type tags. +func (e *Era) HasComponent(c componentType) bool { + _, ok := e.m.layout[c] + return ok +} + // InitialTD returns initial total difficulty before the difficulty of the // first block of the Era is applied. Returns an error if TD is not available // (e.g., post-merge epoch). func (e *Era) InitialTD() (*big.Int, error) { // Check if TD component exists. - if int(td) >= int(e.m.components) { + if !e.HasComponent(td) { return nil, fmt.Errorf("total difficulty not available in this epoch") } @@ -210,8 +261,8 @@ func (e *Era) InitialTD() (*big.Int, error) { return new(big.Int).Sub(firstTD, header.Difficulty), nil } -// Accumulator reads the accumulator entry in the EraE file if it exists. -// Note that one premerge erae files will contain an accumulator entry. +// Accumulator reads the accumulator entry if present. Only pre-merge and +// merge-transition Ere files contain one. func (e *Era) Accumulator() (common.Hash, error) { entry, err := e.s.Find(era.TypeAccumulator) if err != nil { @@ -220,7 +271,8 @@ func (e *Era) Accumulator() (common.Hash, error) { return common.BytesToHash(entry.Value), nil } -// loadIndex loads in the index table containing all offsets and caches it. +// loadIndex loads in the index table trailer (start, count, component-count) +// and then derives the component→slot layout from the on-disk type tags. func (e *Era) loadIndex() error { var err error e.m.length, err = e.f.Seek(0, io.SeekEnd) @@ -241,30 +293,68 @@ func (e *Era) loadIndex() error { if err != nil { return err } - e.m.start = binary.LittleEndian.Uint64(b[:8]) + + layout, err := e.detectLayout() + if err != nil { + return err + } + e.m.layout = layout return nil } -// headerOff, bodyOff, receiptOff, and tdOff return the offsets of the respective components for a given block number. -func (e *Era) headerOff(num uint64) (int64, error) { return e.indexOffset(num, header) } -func (e *Era) bodyOff(num uint64) (int64, error) { return e.indexOffset(num, body) } -func (e *Era) receiptOff(num uint64) (int64, error) { return e.indexOffset(num, receipts) } -func (e *Era) tdOff(num uint64) (int64, error) { return e.indexOffset(num, td) } - -// indexOffset calculates offset to a certain component for a block number within a file. -func (e *Era) indexOffset(n uint64, component componentType) (int64, error) { - if n < e.m.start || n >= e.m.start+e.m.count { - return 0, fmt.Errorf("block %d out of range [%d,%d)", n, e.m.start, e.m.start+e.m.count) +// detectLayout reads the e2store type tag at each component slot of the first +// block and builds a componentType→slot map. This makes the reader robust +// against profile variations: receipts, td, and proof can appear in any +// supported subset, and the slot positions are looked up by tag. +func (e *Era) detectLayout() (map[componentType]int, error) { + if e.m.count == 0 { + return nil, errors.New("Ere file contains no blocks") } - if int(component) >= int(e.m.components) { - return 0, fmt.Errorf("component %d not present", component) + tagToComponent := map[uint16]componentType{ + era.TypeCompressedHeader: header, + era.TypeCompressedBody: body, + era.TypeCompressedSlimReceipts: receipts, + era.TypeTotalDifficulty: td, + era.TypeProof: proof, } + layout := make(map[componentType]int, e.m.components) + for slot := 0; slot < int(e.m.components); slot++ { + off, err := e.slotOffset(0, slot) + if err != nil { + return nil, fmt.Errorf("read slot %d offset: %w", slot, err) + } + typ, _, err := e.s.ReadMetadataAt(off) + if err != nil { + return nil, fmt.Errorf("read slot %d type tag: %w", slot, err) + } + comp, ok := tagToComponent[typ] + if !ok { + return nil, fmt.Errorf("unknown e2store type 0x%04x at index slot %d", typ, slot) + } + if existing, dup := layout[comp]; dup { + return nil, fmt.Errorf("duplicate component %d at slots %d and %d", comp, existing, slot) + } + layout[comp] = slot + } + if _, ok := layout[header]; !ok { + return nil, errors.New("Ere index has no header component") + } + if _, ok := layout[body]; !ok { + return nil, errors.New("Ere index has no body component") + } + return layout, nil +} - payloadlen := 8 + 8*e.m.count*e.m.components + 16 // 8 for start block, 8 per property per block, 16 for the number of properties and the number of blocks +// slotOffset returns the absolute file offset of the entry at the given slot +// of the given block index (0 = first block in file). It does no validation +// against the layout map and is intended for use by detectLayout and +// indexOffset. +func (e *Era) slotOffset(blockIdx uint64, slot int) (int64, error) { + payloadlen := 8 + 8*e.m.count*e.m.components + 16 indstart := e.m.length - int64(payloadlen) - 8 - rec := (n-e.m.start)*e.m.components + uint64(component) + rec := blockIdx*e.m.components + uint64(slot) pos := indstart + 8 + 8 + int64(rec*8) var buf [8]byte @@ -275,18 +365,43 @@ func (e *Era) indexOffset(n uint64, component componentType) (int64, error) { return int64(rel) + indstart, nil } -// metadata contains the information about the era file that is written into the file. -type metadata struct { - start uint64 // start block number - count uint64 // number of blocks in the era - components uint64 // number of properties - length int64 // length of the file in bytes +// headerOff, bodyOff, receiptOff, and tdOff return the offsets of the respective components for a given block number. +func (e *Era) headerOff(num uint64) (int64, error) { return e.indexOffset(num, header) } +func (e *Era) bodyOff(num uint64) (int64, error) { return e.indexOffset(num, body) } +func (e *Era) receiptOff(num uint64) (int64, error) { return e.indexOffset(num, receipts) } +func (e *Era) tdOff(num uint64) (int64, error) { return e.indexOffset(num, td) } + +// indexOffset calculates offset to a certain component for a block number +// within a file. The slot is resolved through the layout map detected at +// Open time, so files with optional components in any order are handled +// safely regardless of the on-disk position. +func (e *Era) indexOffset(n uint64, component componentType) (int64, error) { + if n < e.m.start || n >= e.m.start+e.m.count { + return 0, fmt.Errorf("block %d out of range [%d,%d)", n, e.m.start, e.m.start+e.m.count) + } + slot, ok := e.m.layout[component] + if !ok { + return 0, fmt.Errorf("component %d not present in this Ere file", component) + } + return e.slotOffset(n-e.m.start, slot) } -// componentType represents the integer form of a specific type that can be present in the era file. +// metadata contains the information about the era file that is written into the file. +type metadata struct { + start uint64 // start block number + count uint64 // number of blocks in the era + components uint64 // number of slots per block in the index + layout map[componentType]int // component → slot index, derived from on-disk type tags + length int64 // length of the file in bytes +} + +// componentType identifies a kind of per-block entry (header, body, etc.). type componentType int -// header, body, receipts, td, and proof are the different types of components that can be present in the era file. +// The Ere spec defines receipts, td, and proof as independently optional. The +// reader resolves a component to its actual slot via the metadata.layout map, +// which is built at Open time from the e2store type tag of each slot — so the +// position of a component within the index is never assumed. const ( header componentType = iota body