rlp: add RawList for working with un-decoded lists (#33755)

This adds a new type wrapper that decodes as a list, but does not actually decode the contents of the list. The type parameter exists as a marker, and enables decoding the elements lazily. RawList can also be used for building a list incrementally.
2026-02-26 15:47:21 +00:00 · 2026-02-04 20:16:24 +01:00 · 2026-02-04 20:16:24 +01:00 · 7b7be249cb
commit 7b7be249cb
parent 6b82cef68f
5 changed files with 386 additions and 15 deletions
--- a/rlp/encode.go
+++ b/rlp/encode.go
@ -102,6 +102,29 @@ func EncodeToReader(val interface{}) (size int, r io.Reader, err error) {
 	return buf.size(), &encReader{buf: buf}, nil
 }

+// EncodeToRawList encodes val as an RLP list and returns it as a RawList.
+func EncodeToRawList[T any](val []T) (RawList[T], error) {
+	if len(val) == 0 {
+		return RawList[T]{}, nil
+	}
+
+	// Encode the value to an internal buffer.
+	buf := getEncBuffer()
+	defer encBufferPool.Put(buf)
+	if err := buf.encode(val); err != nil {
+		return RawList[T]{}, err
+	}
+
+	// Create the RawList. RawList assumes the initial list header is padded
+	// 9 bytes, so we have to determine the offset where the value should be
+	// placed.
+	contentSize := buf.lheads[0].size
+	bytes := make([]byte, contentSize+9)
+	offset := 9 - headsize(uint64(contentSize))
+	buf.copyTo(bytes[offset:])
+	return RawList[T]{enc: bytes}, nil
+}
+
 type listhead struct {
 	offset int // index of this header in string data
 	size   int // total size of encoded data (including list headers)
--- a/rlp/iterator.go
+++ b/rlp/iterator.go
@ -16,14 +16,16 @@

 package rlp

-type listIterator struct {
-	data []byte
-	next []byte
-	err  error
+// Iterator is an iterator over the elements of an encoded container.
+type Iterator struct {
+	data   []byte
+	next   []byte
+	offset int
+	err    error
 }

-// NewListIterator creates an iterator for the (list) represented by data
-func NewListIterator(data RawValue) (*listIterator, error) {
+// NewListIterator creates an iterator for the (list) represented by data.
+func NewListIterator(data RawValue) (*Iterator, error) {
 	k, t, c, err := readKind(data)
 	if err != nil {
 		return nil, err
@ -31,16 +33,18 @@ func NewListIterator(data RawValue) (*listIterator, error) {
 	if k != List {
 		return nil, ErrExpectedList
 	}
-	it := &listIterator{
-		data: data[t : t+c],
-	}
+	it := &Iterator{data: data[t : t+c], offset: int(t)}
 	return it, nil
 }

+func newIterator(data []byte) *Iterator {
+	return &Iterator{data: data}
+}
+
 // Next forwards the iterator one step.
 // Returns true if there is a next item or an error occurred on this step (check Err()).
 // On parse error, the iterator is marked finished and subsequent calls return false.
-func (it *listIterator) Next() bool {
+func (it *Iterator) Next() bool {
 	if len(it.data) == 0 {
 		return false
 	}
@ -52,17 +56,34 @@ func (it *listIterator) Next() bool {
 		it.data = nil
 		return true
 	}
-	it.next = it.data[:t+c]
-	it.data = it.data[t+c:]
+	length := t + c
+	it.next = it.data[:length]
+	it.data = it.data[length:]
+	it.offset += int(length)
 	it.err = nil
 	return true
 }

-// Value returns the current value
-func (it *listIterator) Value() []byte {
+// Count returns the remaining number of items.
+// Note this is O(n) and the result may be incorrect if the list data is invalid.
+// The returned count is always an upper bound on the remaining items
+// that will be visited by the iterator.
+func (it *Iterator) Count() int {
+	count, _ := CountValues(it.data)
+	return count
+}
+
+// Value returns the current value.
+func (it *Iterator) Value() []byte {
 	return it.next
 }

-func (it *listIterator) Err() error {
+// Offset returns the offset of the current value into the list data.
+func (it *Iterator) Offset() int {
+	return it.offset - len(it.next)
+}
+
+// Err returns the error that caused Next to return false, if any.
+func (it *Iterator) Err() error {
 	return it.err
 }
--- a/rlp/iterator_test.go
+++ b/rlp/iterator_test.go
@ -38,10 +38,18 @@ func TestIterator(t *testing.T) {
 		t.Fatal("expected two elems, got zero")
 	}
 	txs := it.Value()
+	if offset := it.Offset(); offset != 3 {
+		t.Fatal("wrong offset", offset, "want 3")
+	}
+
 	// Check that uncles exist
 	if !it.Next() {
 		t.Fatal("expected two elems, got one")
 	}
+	if offset := it.Offset(); offset != 219 {
+		t.Fatal("wrong offset", offset, "want 219")
+	}
+
 	txit, err := NewListIterator(txs)
 	if err != nil {
 		t.Fatal(err)
--- a/rlp/raw.go
+++ b/rlp/raw.go
@ -17,8 +17,10 @@
 package rlp

 import (
+	"fmt"
 	"io"
 	"reflect"
+	"slices"
 )

 // RawValue represents an encoded RLP value and can be used to delay
@ -28,6 +30,121 @@ type RawValue []byte

 var rawValueType = reflect.TypeFor[RawValue]()

+// RawList represents an encoded RLP list.
+type RawList[T any] struct {
+	// The list is stored in encoded form.
+	// Note this buffer has some special properties:
+	//
+	//   - if the buffer is nil, it's the zero value, representing
+	//     an empty list.
+	//   - if the buffer is non-nil, it must have a length of at least
+	//     9 bytes, which is reserved padding for the encoded list header.
+	//     The remaining bytes, enc[9:], store the content bytes of the list.
+	//
+	// The implementation code mostly works with the Content method because it
+	// returns something valid either way.
+	enc []byte
+}
+
+// Content returns the RLP-encoded data of the list.
+// This does not include the list-header.
+// The return value is a direct reference to the internal buffer, not a copy.
+func (r *RawList[T]) Content() []byte {
+	if r.enc == nil {
+		return nil
+	} else {
+		return r.enc[9:]
+	}
+}
+
+// EncodeRLP writes the encoded list to the writer.
+func (r RawList[T]) EncodeRLP(w io.Writer) error {
+	_, err := w.Write(r.Bytes())
+	return err
+}
+
+// Bytes returns the RLP encoding of the list.
+// Note the return value aliases the internal buffer.
+func (r *RawList[T]) Bytes() []byte {
+	if r == nil || r.enc == nil {
+		return []byte{0xC0} // zero value encodes as empty list
+	}
+	n := puthead(r.enc, 0xC0, 0xF7, uint64(len(r.Content())))
+	copy(r.enc[9-n:], r.enc[:n])
+	return r.enc[9-n:]
+}
+
+// DecodeRLP decodes the list. This does not perform validation of the items!
+func (r *RawList[T]) DecodeRLP(s *Stream) error {
+	k, size, err := s.Kind()
+	if err != nil {
+		return err
+	}
+	if k != List {
+		return fmt.Errorf("%w for %T", ErrExpectedList, r)
+	}
+	enc := make([]byte, 9+size)
+	if err := s.readFull(enc[9:]); err != nil {
+		return err
+	}
+	*r = RawList[T]{enc: enc}
+	return nil
+}
+
+// Items decodes and returns all items in the list.
+func (r *RawList[T]) Items() ([]T, error) {
+	items := make([]T, r.Len())
+	it := r.ContentIterator()
+	for i := 0; it.Next(); i++ {
+		if err := DecodeBytes(it.Value(), &items[i]); err != nil {
+			return items[:i], err
+		}
+	}
+	return items, nil
+}
+
+// Len returns the number of items in the list.
+func (r *RawList[T]) Len() int {
+	len, _ := CountValues(r.Content())
+	return len
+}
+
+// Size returns the encoded size of the list.
+func (r *RawList[T]) Size() uint64 {
+	return ListSize(uint64(len(r.Content())))
+}
+
+// Empty returns true if the list contains no items.
+func (r *RawList[T]) Empty() bool {
+	return len(r.Content()) == 0
+}
+
+// ContentIterator returns an iterator over the content of the list.
+// Note the offsets returned by iterator.Offset are relative to the
+// Content bytes of the list.
+func (r *RawList[T]) ContentIterator() *Iterator {
+	return newIterator(r.Content())
+}
+
+// Append adds an item to the end of the list.
+func (r *RawList[T]) Append(item T) error {
+	if r.enc == nil {
+		r.enc = make([]byte, 9)
+	}
+
+	eb := getEncBuffer()
+	defer encBufferPool.Put(eb)
+
+	if err := eb.encode(item); err != nil {
+		return err
+	}
+	prevEnd := len(r.enc)
+	end := prevEnd + eb.size()
+	r.enc = slices.Grow(r.enc, eb.size())[:end]
+	eb.copyTo(r.enc[prevEnd:end])
+	return nil
+}
+
 // StringSize returns the encoded size of a string.
 func StringSize(s string) uint64 {
 	switch n := len(s); n {
--- a/rlp/raw_test.go
+++ b/rlp/raw_test.go
@ -19,11 +19,213 @@ package rlp
 import (
 	"bytes"
 	"errors"
+	"fmt"
 	"io"
+	"reflect"
 	"testing"
 	"testing/quick"
 )

+type rawListTest[T any] struct {
+	input   string
+	content string
+	items   []T
+	length  int
+}
+
+func (test rawListTest[T]) name() string {
+	return fmt.Sprintf("%T-%d", *new(T), test.length)
+}
+
+func (test rawListTest[T]) run(t *testing.T) {
+	// check decoding and properties
+	input := unhex(test.input)
+	inputSize := len(input)
+	var rl RawList[T]
+	if err := DecodeBytes(input, &rl); err != nil {
+		t.Fatal("decode failed:", err)
+	}
+	if l := rl.Len(); l != test.length {
+		t.Fatalf("wrong Len %d, want %d", l, test.length)
+	}
+	if sz := rl.Size(); sz != uint64(inputSize) {
+		t.Fatalf("wrong Size %d, want %d", sz, inputSize)
+	}
+	items, err := rl.Items()
+	if err != nil {
+		t.Fatal("Items failed:", err)
+	}
+	if !reflect.DeepEqual(items, test.items) {
+		t.Fatal("wrong items:", items)
+	}
+	if !bytes.Equal(rl.Content(), unhex(test.content)) {
+		t.Fatalf("wrong Content %x, want %s", rl.Content(), test.content)
+	}
+	if !bytes.Equal(rl.Bytes(), unhex(test.input)) {
+		t.Fatalf("wrong Bytes %x, want %s", rl.Bytes(), test.input)
+	}
+
+	// check iterator
+	it := rl.ContentIterator()
+	i := 0
+	if count := it.Count(); count != test.length {
+		t.Fatalf("iterator has wrong Count %d, want %d", count, test.length)
+	}
+	for it.Next() {
+		var item T
+		if err := DecodeBytes(it.Value(), &item); err != nil {
+			t.Fatalf("item %d decode error: %v", i, err)
+		}
+		if !reflect.DeepEqual(item, items[i]) {
+			t.Fatalf("iterator has wrong item %v at %d", item, i)
+		}
+		i++
+	}
+	if i != test.length {
+		t.Fatalf("iterator produced %d values, want %d", i, test.length)
+	}
+	if it.Err() != nil {
+		t.Fatalf("iterator error: %v", it.Err())
+	}
+
+	// check encoding round trip
+	output, err := EncodeToBytes(&rl)
+	if err != nil {
+		t.Fatal("encode error:", err)
+	}
+	if !bytes.Equal(output, unhex(test.input)) {
+		t.Fatalf("encoding does not round trip: %x", output)
+	}
+
+	// check EncodeToRawList on items produces same bytes
+	encRL, err := EncodeToRawList(test.items)
+	if err != nil {
+		t.Fatal("EncodeToRawList error:", err)
+	}
+	encRLOutput, err := EncodeToBytes(&encRL)
+	if err != nil {
+		t.Fatal("EncodeToBytes of encoded list failed:", err)
+	}
+	if !bytes.Equal(encRLOutput, output) {
+		t.Fatalf("wrong encoding of EncodeToRawList result: %x", encRLOutput)
+	}
+}
+
+func TestRawList(t *testing.T) {
+	tests := []interface {
+		name() string
+		run(t *testing.T)
+	}{
+		rawListTest[uint64]{
+			input:   "C0",
+			content: "",
+			items:   []uint64{},
+			length:  0,
+		},
+		rawListTest[uint64]{
+			input:   "C3010203",
+			content: "010203",
+			items:   []uint64{1, 2, 3},
+			length:  3,
+		},
+		rawListTest[simplestruct]{
+			input:   "C6C20102C20304",
+			content: "C20102C20304",
+			items:   []simplestruct{{1, "\x02"}, {3, "\x04"}},
+			length:  2,
+		},
+		rawListTest[string]{
+			input:   "F83C836161618362626283636363836464648365656583666666836767678368686883696969836A6A6A836B6B6B836C6C6C836D6D6D836E6E6E836F6F6F",
+			content: "836161618362626283636363836464648365656583666666836767678368686883696969836A6A6A836B6B6B836C6C6C836D6D6D836E6E6E836F6F6F",
+			items:   []string{"aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg", "hhh", "iii", "jjj", "kkk", "lll", "mmm", "nnn", "ooo"},
+			length:  15,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name(), test.run)
+	}
+}
+
+func TestRawListEmpty(t *testing.T) {
+	// zero value list
+	var rl RawList[uint64]
+	b, _ := EncodeToBytes(&rl)
+	if !bytes.Equal(b, unhex("C0")) {
+		t.Fatalf("empty RawList has wrong encoding %x", b)
+	}
+	if !rl.Empty() {
+		t.Fatal("list should be Empty")
+	}
+	if rl.Len() != 0 {
+		t.Fatalf("empty list has Len %d", rl.Len())
+	}
+	if rl.Size() != 1 {
+		t.Fatalf("empty list has Size %d", rl.Size())
+	}
+	if len(rl.Content()) > 0 {
+		t.Fatalf("empty list has non-empty Content")
+	}
+	if !bytes.Equal(rl.Bytes(), []byte{0xC0}) {
+		t.Fatalf("empty list has wrong encoding")
+	}
+
+	// nil pointer
+	var nilptr *RawList[uint64]
+	b, _ = EncodeToBytes(nilptr)
+	if !bytes.Equal(b, unhex("C0")) {
+		t.Fatalf("nil pointer to RawList has wrong encoding %x", b)
+	}
+}
+
+// This checks that *RawList works in an 'optional' context.
+func TestRawListOptional(t *testing.T) {
+	type foo struct {
+		L *RawList[uint64] `rlp:"optional"`
+	}
+	// nil pointer encoding
+	var empty foo
+	b, _ := EncodeToBytes(empty)
+	if !bytes.Equal(b, unhex("C0")) {
+		t.Fatalf("nil pointer to RawList has wrong encoding %x", b)
+	}
+	// decoding
+	var dec foo
+	if err := DecodeBytes(unhex("C0"), &dec); err != nil {
+		t.Fatal(err)
+	}
+	if dec.L != nil {
+		t.Fatal("rawlist was decoded as non-nil")
+	}
+}
+
+func TestRawListAppend(t *testing.T) {
+	var rl RawList[simplestruct]
+
+	v1 := simplestruct{1, "one"}
+	v2 := simplestruct{2, "two"}
+	if err := rl.Append(v1); err != nil {
+		t.Fatal("append 1 failed:", err)
+	}
+	if err := rl.Append(v2); err != nil {
+		t.Fatal("append 2 failed:", err)
+	}
+
+	if rl.Len() != 2 {
+		t.Fatalf("wrong Len %d", rl.Len())
+	}
+	if rl.Size() != 13 {
+		t.Fatalf("wrong Size %d", rl.Size())
+	}
+	if !bytes.Equal(rl.Content(), unhex("C501836F6E65 C5028374776F")) {
+		t.Fatalf("wrong Content %x", rl.Content())
+	}
+	encoded, _ := EncodeToBytes(&rl)
+	if !bytes.Equal(encoded, unhex("CC C501836F6E65 C5028374776F")) {
+		t.Fatalf("wrong encoding %x", encoded)
+	}
+}
+
 func TestCountValues(t *testing.T) {
 	tests := []struct {
 		input string // note: spaces in input are stripped by unhex