From f4efd4b09d5da70b3925891fccb442d689438c63 Mon Sep 17 00:00:00 2001 From: jeevan-sid Date: Wed, 18 Feb 2026 13:42:33 +0530 Subject: [PATCH] feat: enable tx indexing for era files --- cmd/geth/chaincmd.go | 87 +++++++++++++++++++++++ cmd/utils/cmd.go | 118 ++++++++++++++++++++++++++++++++ core/rawdb/accessors_indexes.go | 41 ++++++++++- core/rawdb/schema.go | 8 +++ 4 files changed, 253 insertions(+), 1 deletion(-) diff --git a/cmd/geth/chaincmd.go b/cmd/geth/chaincmd.go index 1ccb78d622..7efbe60680 100644 --- a/cmd/geth/chaincmd.go +++ b/cmd/geth/chaincmd.go @@ -171,6 +171,19 @@ from Era archives. Description: ` The export-history command will export blocks and their corresponding receipts into Era archives. Eras are typically packaged in steps of 8192 blocks. +`, + } + importEraIndexCommand = &cli.Command{ + Action: importEraIndex, + Name: "import-era-index", + Usage: "Import transaction index from era archive files", + ArgsUsage: "", + Flags: slices.Concat(utils.DatabaseFlags, utils.NetworkFlags, []cli.Flag{utils.EraFormatFlag}), + Description: ` +The import-era-index command indexes transactions from era files to enable +transaction lookups by hash +for pruned block ranges. Era files must be present in the specified directory. +The command is idempotent and can be re-run to index newly added era files. `, } importPreimagesCommand = &cli.Command{ @@ -589,6 +602,80 @@ func exportHistory(ctx *cli.Context) error { return nil } +func importEraIndex(ctx *cli.Context) error { + if ctx.Args().Len() != 1 { + utils.Fatalf("usage: %s", ctx.Command.ArgsUsage) + } + + stack, _ := makeConfigNode(ctx) + defer stack.Close() + + db := utils.MakeChainDatabase(ctx, stack, false) + defer db.Close() + + var ( + start = time.Now() + dir = ctx.Args().Get(0) + network string + ) + + // Determine network. + if utils.IsNetworkPreset(ctx) { + switch { + case ctx.Bool(utils.MainnetFlag.Name): + network = "mainnet" + case ctx.Bool(utils.SepoliaFlag.Name): + network = "sepolia" + case ctx.Bool(utils.HoleskyFlag.Name): + network = "holesky" + case ctx.Bool(utils.HoodiFlag.Name): + network = "hoodi" + } + } else { + // No network flag set, try to determine network based on files + // present in directory. + var networks []string + for _, n := range params.NetworkNames { + entries, err := era.ReadDir(dir, n) + if err != nil { + return fmt.Errorf("error reading %s: %w", dir, err) + } + if len(entries) > 0 { + networks = append(networks, n) + } + } + if len(networks) == 0 { + return fmt.Errorf("no era files found in %s", dir) + } + if len(networks) > 1 { + return errors.New("multiple networks found, use a network flag to specify network") + } + network = networks[0] + } + + // Determine era format. + var ( + format = ctx.String(utils.EraFormatFlag.Name) + from func(era.ReadAtSeekCloser) (era.Era, error) + ) + switch format { + case "era1", "era": + from = onedb.From + case "erae": + from = execdb.From + default: + return fmt.Errorf("unknown --era.format %q (expected 'era1' or 'erae')", format) + } + + if err := utils.ImportEraIndex(db, dir, network, from); err != nil { + return err + } + + fmt.Printf("Era indexing done in %v\n", time.Since(start)) + return nil + +} + // importPreimages imports preimage data from the specified file. // it is deprecated, and the export function has been removed, but // the import function is kept around for the time being so that diff --git a/cmd/utils/cmd.go b/cmd/utils/cmd.go index 995724e6fc..febfc1e07d 100644 --- a/cmd/utils/cmd.go +++ b/cmd/utils/cmd.go @@ -345,6 +345,124 @@ func ImportHistory(chain *core.BlockChain, dir string, network string, from func return nil } +// ImportEraIndex indexes transactions from era files into the database to enable +// transaction lookups by hash for pruned block ranges. +func ImportEraIndex(db ethdb.Database, dir string, network string, from func(f era.ReadAtSeekCloser) (era.Era, error)) error { + entries, err := era.ReadDir(dir, network) + if err != nil { + return fmt.Errorf("error reading era directory: %w", err) + } + if len(entries) == 0 { + return fmt.Errorf("no era files found for network %s in %s", network, dir) + } + + // Get the last indexed epoch to support resume. + tail := rawdb.ReadEraIndexTail(db) + startEpoch := uint64(0) + if tail != nil { + startEpoch = *tail + 1 + log.Info("Resuming era indexing", "lastEpoch", *tail, "nextEpoch", startEpoch) + } + + var ( + start = time.Now() + reported = time.Now() + batch = db.NewBatch() + totalBlocks uint64 + totalTxs uint64 + ) + + // Index each era file. + for epoch, entry := range entries { + if uint64(epoch) < startEpoch { + continue + } + + err := func() error { + path := filepath.Join(dir, entry) + f, err := os.Open(path) + if err != nil { + return fmt.Errorf("error opening era file %s: %w", path, err) + } + defer f.Close() + + e, err := from(f) + if err != nil { + return fmt.Errorf("error opening era: %w", err) + } + + it, err := e.Iterator() + if err != nil { + return fmt.Errorf("error creating iterator: %w", err) + } + + epochBlocks := uint64(0) + epochTxs := uint64(0) + + // Iterate over all blocks in this epoch. + for it.Next() { + if it.Error() != nil { + return fmt.Errorf("error iterating era file: %w", it.Error()) + } + + block, err := it.Block() + if err != nil { + return fmt.Errorf("error reading block: %w", err) + } + + // Index all transactions in this block. + txHashes := make([]common.Hash, len(block.Transactions())) + for i, tx := range block.Transactions() { + txHashes[i] = tx.Hash() + } + + if len(txHashes) > 0 { + rawdb.WriteEraTxLookupEntries(batch, block.NumberU64(), txHashes) + epochTxs += uint64(len(txHashes)) + } + + epochBlocks++ + totalBlocks++ + + // Write batch if it's getting large. + if batch.ValueSize() >= ethdb.IdealBatchSize { + if err := batch.Write(); err != nil { + return fmt.Errorf("error writing index batch: %w", err) + } + batch.Reset() + } + } + + // Flush remaining batch for this epoch. + if batch.ValueSize() > 0 { + if err := batch.Write(); err != nil { + return fmt.Errorf("error writing index batch: %w", err) + } + batch.Reset() + } + + // Mark this epoch as fully indexed. + rawdb.WriteEraIndexTail(db, uint64(epoch)) + + totalTxs += epochTxs + + if time.Since(reported) >= 8*time.Second { + log.Info("Indexing era files", "epoch", epoch, "blocks", epochBlocks, "txs", epochTxs, + "totalBlocks", totalBlocks, "totalTxs", totalTxs, "elapsed", common.PrettyDuration(time.Since(start))) + reported = time.Now() + } + + return nil + }() + if err != nil { + return err + } + } + + log.Info("Era indexing complete", "totalBlocks", totalBlocks, "totalTxs", totalTxs, "elapsed", common.PrettyDuration(time.Since(start))) + return nil +} + func missingBlocks(chain *core.BlockChain, blocks []*types.Block) []*types.Block { head := chain.CurrentBlock() for i, block := range blocks { diff --git a/core/rawdb/accessors_indexes.go b/core/rawdb/accessors_indexes.go index 8c8c3ec9bb..e8dcbad741 100644 --- a/core/rawdb/accessors_indexes.go +++ b/core/rawdb/accessors_indexes.go @@ -61,7 +61,8 @@ func DecodeTxLookupEntry(data []byte, db ethdb.Reader) *uint64 { func ReadTxLookupEntry(db ethdb.Reader, hash common.Hash) *uint64 { data, _ := db.Get(txLookupKey(hash)) if len(data) == 0 { - return nil + // Fallback: check era-derived index + return ReadEraTxLookupEntry(db, hash) } return DecodeTxLookupEntry(data, db) } @@ -134,6 +135,44 @@ func DeleteAllTxLookupEntries(db ethdb.KeyValueStore, condition func(common.Hash } } +// ReadEraTxLookupEntry retrieves the positional metadata associated with a transaction +// hash from the era1-derived index. +func ReadEraTxLookupEntry(db ethdb.Reader, hash common.Hash) *uint64 { + data, _ := db.Get(eraTxLookupKey(hash)) + if len(data) == 0 { + return nil + } + return DecodeTxLookupEntry(data, db) +} + +// WriteEraTxLookupEntries stores positional metadata for transactions from era1 files, +// enabling hash based transaction and receipt lookups for pruned history. +func WriteEraTxLookupEntries(db ethdb.KeyValueWriter, number uint64, hashes []common.Hash) { + numberBytes := new(big.Int).SetUint64(number).Bytes() + for _, hash := range hashes { + if err := db.Put(eraTxLookupKey(hash), numberBytes); err != nil { + log.Crit("Failed to store era1 transaction lookup entry", "err", err) + } + } +} + +// ReadEraIndexTail retrieves the last fully indexed era1 epoch. +func ReadEraIndexTail(db ethdb.Reader) *uint64 { + data, _ := db.Get(eraIndexTailKey) + if len(data) == 0 { + return nil + } + epoch := binary.BigEndian.Uint64(data) + return &epoch +} + +// WriteEraIndexTail stores the last fully indexed era1 epoch. +func WriteEraIndexTail(db ethdb.KeyValueWriter, epoch uint64) { + if err := db.Put(eraIndexTailKey, encodeBlockNumber(epoch)); err != nil { + log.Crit("Failed to store era1 index tail", "err", err) + } +} + // findTxInBlockBody traverses the given RLP-encoded block body, searching for // the transaction specified by its hash. func findTxInBlockBody(blockbody rlp.RawValue, target common.Hash) (*types.Transaction, uint64, error) { diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index d9140c5fd6..db402ff751 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -120,6 +120,9 @@ var ( CodePrefix = []byte("c") // CodePrefix + code hash -> account code skeletonHeaderPrefix = []byte("S") // skeletonHeaderPrefix + num (uint64 big endian) -> header + eraTxLookupPrefix = []byte("e") // eraTxLookupPrefix + hash -> transaction/receipt lookup metadata + eraIndexTailKey = []byte("eraIndexTail") // eraIndexTailKey -> last fully indexed epoch + // Path-based storage scheme of merkle patricia trie. TrieNodeAccountPrefix = []byte("A") // TrieNodeAccountPrefix + hexPath -> trie node TrieNodeStoragePrefix = []byte("O") // TrieNodeStoragePrefix + accountHash + hexPath -> trie node @@ -219,6 +222,11 @@ func txLookupKey(hash common.Hash) []byte { return append(txLookupPrefix, hash.Bytes()...) } +// eraTxLookupKey = eraTxLookupPrefix + hash +func eraTxLookupKey(hash common.Hash) []byte { + return append(eraTxLookupPrefix, hash.Bytes()...) +} + // accountSnapshotKey = SnapshotAccountPrefix + hash func accountSnapshotKey(hash common.Hash) []byte { return append(SnapshotAccountPrefix, hash.Bytes()...)