internal/build: add IPFS CID computation

Compute CIDv1 and base58 multihash for files using only stdlib.
Enables content-addressed verification of release archives.
This commit is contained in:
Hydepwns 2026-01-23 14:51:40 +01:00
parent 9a8e14e77e
commit 533f51746c
2 changed files with 325 additions and 0 deletions

127
internal/build/cid.go Normal file
View file

@ -0,0 +1,127 @@
// Copyright 2024 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package build
import (
"crypto/sha256"
"encoding/base32"
"io"
"math/big"
"os"
"strings"
)
// CID represents an IPFS Content Identifier for raw file content.
type CID struct {
// V1 is the CIDv1 with raw codec: bafkrei... (base32lower, 59 chars)
// This is the canonical format for raw binary content.
V1 string
// Multihash is the raw SHA256 multihash (base58btc encoded): Qm... (46 chars)
// Note: This is NOT a valid CIDv0 for raw content (CIDv0 requires dag-pb codec).
// However, it's included for compatibility with tools that expect Qm... format.
// To get the actual content, use the V1 CID or convert: ipfs cid format -v 1 <multihash>
Multihash string
}
// ComputeFileCID computes the IPFS CID for a file's raw content.
//
// The CID is computed using SHA256 and the raw multicodec (0x55), which means
// the hash is of the file's exact bytes with no wrapping or chunking.
//
// Returns CIDv1 (bafkrei...) as the primary identifier, plus the base58-encoded
// multihash for compatibility with legacy tooling.
//
// Verify with: ipfs add --only-hash --raw-leaves -Q <file>
func ComputeFileCID(path string) (*CID, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
return ComputeCID(f)
}
// ComputeCID computes the IPFS CID from a reader's content.
func ComputeCID(r io.Reader) (*CID, error) {
h := sha256.New()
if _, err := io.Copy(h, r); err != nil {
return nil, err
}
digest := h.Sum(nil)
// Build multihash: 0x12 (SHA256) + 0x20 (32 bytes length) + digest
multihash := make([]byte, 0, 34)
multihash = append(multihash, 0x12) // SHA256 multicodec
multihash = append(multihash, 0x20) // 32 bytes
multihash = append(multihash, digest...)
// Base58-encoded multihash (Qm... format, for legacy compatibility)
mhBase58 := base58Encode(multihash)
// CIDv1 = 'b' + base32lower(0x01 + 0x55 + multihash)
// 0x01 = CIDv1, 0x55 = raw multicodec
cidv1Bytes := make([]byte, 0, 36)
cidv1Bytes = append(cidv1Bytes, 0x01) // CID version 1
cidv1Bytes = append(cidv1Bytes, 0x55) // raw codec
cidv1Bytes = append(cidv1Bytes, multihash...)
encoded := base32.StdEncoding.WithPadding(base32.NoPadding).EncodeToString(cidv1Bytes)
cidv1 := "b" + strings.ToLower(encoded)
return &CID{V1: cidv1, Multihash: mhBase58}, nil
}
// base58Encode encodes bytes using Bitcoin's base58 alphabet.
// This is used for IPFS CIDv0 encoding.
func base58Encode(data []byte) string {
const alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
// Count leading zeros
var zeros int
for _, b := range data {
if b != 0 {
break
}
zeros++
}
// Convert to big integer
num := new(big.Int).SetBytes(data)
base := big.NewInt(58)
mod := new(big.Int)
// Build result in reverse
var result []byte
for num.Sign() > 0 {
num.DivMod(num, base, mod)
result = append(result, alphabet[mod.Int64()])
}
// Add leading '1's for each leading zero byte
for i := 0; i < zeros; i++ {
result = append(result, '1')
}
// Reverse the result
for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 {
result[i], result[j] = result[j], result[i]
}
return string(result)
}

198
internal/build/cid_test.go Normal file
View file

@ -0,0 +1,198 @@
// Copyright 2024 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package build
import (
"bytes"
"os"
"strings"
"testing"
)
func TestBase58Encode(t *testing.T) {
tests := []struct {
input []byte
expected string
}{
{[]byte{}, ""},
{[]byte{0}, "1"},
{[]byte{0, 0, 0}, "111"},
{[]byte("Hello World!"), "2NEpo7TZRRrLZSi2U"},
}
for _, tt := range tests {
result := base58Encode(tt.input)
if result != tt.expected {
t.Errorf("base58Encode(%v) = %q, want %q", tt.input, result, tt.expected)
}
}
}
func TestComputeCID(t *testing.T) {
tests := []struct {
name string
content []byte
wantV1Start string
wantMHStart string
wantV1Len int
wantMHLen int
}{
{
name: "empty content",
content: []byte{},
wantV1Start: "bafkrei",
wantMHStart: "Qm",
wantV1Len: 59,
wantMHLen: 46,
},
{
name: "hello world",
content: []byte("hello world"),
wantV1Start: "bafkrei",
wantMHStart: "Qm",
wantV1Len: 59,
wantMHLen: 46,
},
{
name: "binary content",
content: []byte{0x00, 0x01, 0x02, 0x03, 0xff, 0xfe, 0xfd},
wantV1Start: "bafkrei",
wantMHStart: "Qm",
wantV1Len: 59,
wantMHLen: 46,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
cid, err := ComputeCID(bytes.NewReader(tt.content))
if err != nil {
t.Fatalf("ComputeCID() error = %v", err)
}
// Check CIDv1 format
if !strings.HasPrefix(cid.V1, tt.wantV1Start) {
t.Errorf("V1 = %q, want prefix %q", cid.V1, tt.wantV1Start)
}
if len(cid.V1) != tt.wantV1Len {
t.Errorf("V1 length = %d, want %d", len(cid.V1), tt.wantV1Len)
}
// Check multihash format
if !strings.HasPrefix(cid.Multihash, tt.wantMHStart) {
t.Errorf("Multihash = %q, want prefix %q", cid.Multihash, tt.wantMHStart)
}
if len(cid.Multihash) != tt.wantMHLen {
t.Errorf("Multihash length = %d, want %d", len(cid.Multihash), tt.wantMHLen)
}
// CIDv1 should be lowercase
if cid.V1 != strings.ToLower(cid.V1) {
t.Errorf("V1 should be lowercase: %q", cid.V1)
}
})
}
}
func TestComputeCIDDeterministic(t *testing.T) {
content := []byte("deterministic test content")
cid1, err := ComputeCID(bytes.NewReader(content))
if err != nil {
t.Fatalf("ComputeCID() error = %v", err)
}
cid2, err := ComputeCID(bytes.NewReader(content))
if err != nil {
t.Fatalf("ComputeCID() error = %v", err)
}
if cid1.V1 != cid2.V1 {
t.Errorf("V1 not deterministic: %q != %q", cid1.V1, cid2.V1)
}
if cid1.Multihash != cid2.Multihash {
t.Errorf("Multihash not deterministic: %q != %q", cid1.Multihash, cid2.Multihash)
}
}
// TestKnownCID verifies against a known IPFS CID.
// Verified with: echo -n "hello" | ipfs add --only-hash --raw-leaves -Q
// Output: bafkreibm6jg3ux5qumhcn2b3flc3tyu6dmlb4xa7u5bf44yegnrjhc4yeq
func TestKnownCID(t *testing.T) {
content := []byte("hello")
cid, err := ComputeCID(bytes.NewReader(content))
if err != nil {
t.Fatalf("ComputeCID() error = %v", err)
}
// This is the CIDv1 for raw "hello" bytes
// Verified with: echo -n "hello" | ipfs add --only-hash --raw-leaves -Q
expectedV1 := "bafkreibm6jg3ux5qumhcn2b3flc3tyu6dmlb4xa7u5bf44yegnrjhc4yeq"
if cid.V1 != expectedV1 {
t.Errorf("V1 for 'hello' = %q, want %q", cid.V1, expectedV1)
}
t.Logf("V1 (CIDv1): %s", cid.V1)
t.Logf("Multihash: %s", cid.Multihash)
}
// TestEmptyContent verifies the CID for empty content.
// Verified with: echo -n "" | ipfs add --only-hash --raw-leaves -Q
// Output: bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku
func TestEmptyContent(t *testing.T) {
content := []byte{}
cid, err := ComputeCID(bytes.NewReader(content))
if err != nil {
t.Fatalf("ComputeCID() error = %v", err)
}
// This is the CIDv1 for empty content (SHA256 of nothing)
expectedV1 := "bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku"
if cid.V1 != expectedV1 {
t.Errorf("V1 for empty = %q, want %q", cid.V1, expectedV1)
}
t.Logf("V1 (CIDv1): %s", cid.V1)
t.Logf("Multihash: %s", cid.Multihash)
}
// TestReadmeFile verifies CID computation on an actual file in the repo.
// Run: ipfs add --only-hash --raw-leaves -Q ../../README.md
// to get the expected CID for comparison.
func TestReadmeFile(t *testing.T) {
// This test only runs if the README.md exists (it should in the repo)
path := "../../README.md"
if _, err := os.Stat(path); os.IsNotExist(err) {
t.Skip("README.md not found, skipping file test")
}
cid, err := ComputeFileCID(path)
if err != nil {
t.Fatalf("ComputeFileCID() error = %v", err)
}
// Just verify it produces valid-looking CIDs
if !strings.HasPrefix(cid.V1, "bafkrei") {
t.Errorf("V1 should start with bafkrei: %s", cid.V1)
}
if !strings.HasPrefix(cid.Multihash, "Qm") {
t.Errorf("Multihash should start with Qm: %s", cid.Multihash)
}
t.Logf("README.md CIDv1: %s", cid.V1)
t.Logf("To verify: ipfs add --only-hash --raw-leaves -Q ../../README.md")
}