triedb/pathdb: improve the performance of parse index block (#32219)

The implementation of `parseIndexBlock` used a reverse loop with slice
appends to build the restart points, which was less cache-friendly and
involved unnecessary allocations and operations. In this PR we change
the implementation to read and validate the restart points in one single
forward loop.

Here is the benchmark test:

```bash
go test -benchmem -bench=BenchmarkParseIndexBlock ./triedb/pathdb/
```

The result as below:

```
benchmark                      old ns/op     new ns/op     delta
BenchmarkParseIndexBlock-8     52.9          37.5          -29.05%
```

about 29% improvements

---------

Signed-off-by: jsvisa <delweng@gmail.com>
This commit is contained in:
Delweng 2025-07-17 11:07:22 +08:00 committed by GitHub
parent becca46010
commit a487729d83
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 33 additions and 18 deletions

View file

@ -116,34 +116,31 @@ func parseIndexBlock(blob []byte) ([]uint16, []byte, error) {
if len(blob) < 1 {
return nil, nil, fmt.Errorf("corrupted index block, len: %d", len(blob))
}
restartLen := blob[len(blob)-1]
restartLen := int(blob[len(blob)-1])
if restartLen == 0 {
return nil, nil, errors.New("corrupted index block, no restart")
}
tailLen := int(restartLen)*2 + 1
tailLen := restartLen*2 + 1
if len(blob) < tailLen {
return nil, nil, fmt.Errorf("truncated restarts, size: %d, restarts: %d", len(blob), restartLen)
}
restarts := make([]uint16, 0, restartLen)
for i := int(restartLen); i > 0; i-- {
restart := binary.BigEndian.Uint16(blob[len(blob)-1-2*i:])
restarts = append(restarts, restart)
}
// Validate that restart points are strictly ordered and within the valid
restarts := make([]uint16, restartLen)
dataEnd := len(blob) - tailLen
// Extract and validate that restart points are strictly ordered and within the valid
// data range.
var prev uint16
for i := 0; i < len(restarts); i++ {
if i != 0 {
if restarts[i] <= prev {
return nil, nil, fmt.Errorf("restart out of order, prev: %d, next: %d", prev, restarts[i])
}
for i := 0; i < restartLen; i++ {
off := dataEnd + 2*i
restarts[i] = binary.BigEndian.Uint16(blob[off : off+2])
if i > 0 && restarts[i] <= restarts[i-1] {
return nil, nil, fmt.Errorf("restart out of order, prev: %d, next: %d", restarts[i-1], restarts[i])
}
if int(restarts[i]) >= len(blob)-tailLen {
return nil, nil, fmt.Errorf("invalid restart position, restart: %d, size: %d", restarts[i], len(blob)-tailLen)
if int(restarts[i]) >= dataEnd {
return nil, nil, fmt.Errorf("invalid restart position, restart: %d, size: %d", restarts[i], dataEnd)
}
prev = restarts[i]
}
return restarts, blob[:len(blob)-tailLen], nil
return restarts, blob[:dataEnd], nil
}
// blockReader is the reader to access the element within a block.

View file

@ -214,3 +214,21 @@ func TestCorruptedIndexBlock(t *testing.T) {
t.Fatal("Corrupted index block data is not detected")
}
}
// BenchmarkParseIndexBlock benchmarks the performance of parseIndexBlock.
func BenchmarkParseIndexBlock(b *testing.B) {
// Generate a realistic index block blob
bw, _ := newBlockWriter(nil, newIndexBlockDesc(0))
for i := 0; i < 4096; i++ {
bw.append(uint64(i * 2))
}
blob := bw.finish()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _, err := parseIndexBlock(blob)
if err != nil {
b.Fatalf("parseIndexBlock failed: %v", err)
}
}
}