From 533f51746c2da13670c05d4c2abf82892126e807 Mon Sep 17 00:00:00 2001 From: Hydepwns Date: Fri, 23 Jan 2026 14:51:40 +0100 Subject: [PATCH] internal/build: add IPFS CID computation Compute CIDv1 and base58 multihash for files using only stdlib. Enables content-addressed verification of release archives. --- internal/build/cid.go | 127 ++++++++++++++++++++++++ internal/build/cid_test.go | 198 +++++++++++++++++++++++++++++++++++++ 2 files changed, 325 insertions(+) create mode 100644 internal/build/cid.go create mode 100644 internal/build/cid_test.go diff --git a/internal/build/cid.go b/internal/build/cid.go new file mode 100644 index 0000000000..22d68fe539 --- /dev/null +++ b/internal/build/cid.go @@ -0,0 +1,127 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package build + +import ( + "crypto/sha256" + "encoding/base32" + "io" + "math/big" + "os" + "strings" +) + +// CID represents an IPFS Content Identifier for raw file content. +type CID struct { + // V1 is the CIDv1 with raw codec: bafkrei... (base32lower, 59 chars) + // This is the canonical format for raw binary content. + V1 string + + // Multihash is the raw SHA256 multihash (base58btc encoded): Qm... (46 chars) + // Note: This is NOT a valid CIDv0 for raw content (CIDv0 requires dag-pb codec). + // However, it's included for compatibility with tools that expect Qm... format. + // To get the actual content, use the V1 CID or convert: ipfs cid format -v 1 + Multihash string +} + +// ComputeFileCID computes the IPFS CID for a file's raw content. +// +// The CID is computed using SHA256 and the raw multicodec (0x55), which means +// the hash is of the file's exact bytes with no wrapping or chunking. +// +// Returns CIDv1 (bafkrei...) as the primary identifier, plus the base58-encoded +// multihash for compatibility with legacy tooling. +// +// Verify with: ipfs add --only-hash --raw-leaves -Q +func ComputeFileCID(path string) (*CID, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + return ComputeCID(f) +} + +// ComputeCID computes the IPFS CID from a reader's content. +func ComputeCID(r io.Reader) (*CID, error) { + h := sha256.New() + if _, err := io.Copy(h, r); err != nil { + return nil, err + } + digest := h.Sum(nil) + + // Build multihash: 0x12 (SHA256) + 0x20 (32 bytes length) + digest + multihash := make([]byte, 0, 34) + multihash = append(multihash, 0x12) // SHA256 multicodec + multihash = append(multihash, 0x20) // 32 bytes + multihash = append(multihash, digest...) + + // Base58-encoded multihash (Qm... format, for legacy compatibility) + mhBase58 := base58Encode(multihash) + + // CIDv1 = 'b' + base32lower(0x01 + 0x55 + multihash) + // 0x01 = CIDv1, 0x55 = raw multicodec + cidv1Bytes := make([]byte, 0, 36) + cidv1Bytes = append(cidv1Bytes, 0x01) // CID version 1 + cidv1Bytes = append(cidv1Bytes, 0x55) // raw codec + cidv1Bytes = append(cidv1Bytes, multihash...) + + encoded := base32.StdEncoding.WithPadding(base32.NoPadding).EncodeToString(cidv1Bytes) + cidv1 := "b" + strings.ToLower(encoded) + + return &CID{V1: cidv1, Multihash: mhBase58}, nil +} + +// base58Encode encodes bytes using Bitcoin's base58 alphabet. +// This is used for IPFS CIDv0 encoding. +func base58Encode(data []byte) string { + const alphabet = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz" + + // Count leading zeros + var zeros int + for _, b := range data { + if b != 0 { + break + } + zeros++ + } + + // Convert to big integer + num := new(big.Int).SetBytes(data) + base := big.NewInt(58) + mod := new(big.Int) + + // Build result in reverse + var result []byte + for num.Sign() > 0 { + num.DivMod(num, base, mod) + result = append(result, alphabet[mod.Int64()]) + } + + // Add leading '1's for each leading zero byte + for i := 0; i < zeros; i++ { + result = append(result, '1') + } + + // Reverse the result + for i, j := 0, len(result)-1; i < j; i, j = i+1, j-1 { + result[i], result[j] = result[j], result[i] + } + + return string(result) +} diff --git a/internal/build/cid_test.go b/internal/build/cid_test.go new file mode 100644 index 0000000000..33063b5774 --- /dev/null +++ b/internal/build/cid_test.go @@ -0,0 +1,198 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package build + +import ( + "bytes" + "os" + "strings" + "testing" +) + +func TestBase58Encode(t *testing.T) { + tests := []struct { + input []byte + expected string + }{ + {[]byte{}, ""}, + {[]byte{0}, "1"}, + {[]byte{0, 0, 0}, "111"}, + {[]byte("Hello World!"), "2NEpo7TZRRrLZSi2U"}, + } + + for _, tt := range tests { + result := base58Encode(tt.input) + if result != tt.expected { + t.Errorf("base58Encode(%v) = %q, want %q", tt.input, result, tt.expected) + } + } +} + +func TestComputeCID(t *testing.T) { + tests := []struct { + name string + content []byte + wantV1Start string + wantMHStart string + wantV1Len int + wantMHLen int + }{ + { + name: "empty content", + content: []byte{}, + wantV1Start: "bafkrei", + wantMHStart: "Qm", + wantV1Len: 59, + wantMHLen: 46, + }, + { + name: "hello world", + content: []byte("hello world"), + wantV1Start: "bafkrei", + wantMHStart: "Qm", + wantV1Len: 59, + wantMHLen: 46, + }, + { + name: "binary content", + content: []byte{0x00, 0x01, 0x02, 0x03, 0xff, 0xfe, 0xfd}, + wantV1Start: "bafkrei", + wantMHStart: "Qm", + wantV1Len: 59, + wantMHLen: 46, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cid, err := ComputeCID(bytes.NewReader(tt.content)) + if err != nil { + t.Fatalf("ComputeCID() error = %v", err) + } + + // Check CIDv1 format + if !strings.HasPrefix(cid.V1, tt.wantV1Start) { + t.Errorf("V1 = %q, want prefix %q", cid.V1, tt.wantV1Start) + } + if len(cid.V1) != tt.wantV1Len { + t.Errorf("V1 length = %d, want %d", len(cid.V1), tt.wantV1Len) + } + + // Check multihash format + if !strings.HasPrefix(cid.Multihash, tt.wantMHStart) { + t.Errorf("Multihash = %q, want prefix %q", cid.Multihash, tt.wantMHStart) + } + if len(cid.Multihash) != tt.wantMHLen { + t.Errorf("Multihash length = %d, want %d", len(cid.Multihash), tt.wantMHLen) + } + + // CIDv1 should be lowercase + if cid.V1 != strings.ToLower(cid.V1) { + t.Errorf("V1 should be lowercase: %q", cid.V1) + } + }) + } +} + +func TestComputeCIDDeterministic(t *testing.T) { + content := []byte("deterministic test content") + + cid1, err := ComputeCID(bytes.NewReader(content)) + if err != nil { + t.Fatalf("ComputeCID() error = %v", err) + } + + cid2, err := ComputeCID(bytes.NewReader(content)) + if err != nil { + t.Fatalf("ComputeCID() error = %v", err) + } + + if cid1.V1 != cid2.V1 { + t.Errorf("V1 not deterministic: %q != %q", cid1.V1, cid2.V1) + } + if cid1.Multihash != cid2.Multihash { + t.Errorf("Multihash not deterministic: %q != %q", cid1.Multihash, cid2.Multihash) + } +} + +// TestKnownCID verifies against a known IPFS CID. +// Verified with: echo -n "hello" | ipfs add --only-hash --raw-leaves -Q +// Output: bafkreibm6jg3ux5qumhcn2b3flc3tyu6dmlb4xa7u5bf44yegnrjhc4yeq +func TestKnownCID(t *testing.T) { + content := []byte("hello") + cid, err := ComputeCID(bytes.NewReader(content)) + if err != nil { + t.Fatalf("ComputeCID() error = %v", err) + } + + // This is the CIDv1 for raw "hello" bytes + // Verified with: echo -n "hello" | ipfs add --only-hash --raw-leaves -Q + expectedV1 := "bafkreibm6jg3ux5qumhcn2b3flc3tyu6dmlb4xa7u5bf44yegnrjhc4yeq" + if cid.V1 != expectedV1 { + t.Errorf("V1 for 'hello' = %q, want %q", cid.V1, expectedV1) + } + + t.Logf("V1 (CIDv1): %s", cid.V1) + t.Logf("Multihash: %s", cid.Multihash) +} + +// TestEmptyContent verifies the CID for empty content. +// Verified with: echo -n "" | ipfs add --only-hash --raw-leaves -Q +// Output: bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku +func TestEmptyContent(t *testing.T) { + content := []byte{} + cid, err := ComputeCID(bytes.NewReader(content)) + if err != nil { + t.Fatalf("ComputeCID() error = %v", err) + } + + // This is the CIDv1 for empty content (SHA256 of nothing) + expectedV1 := "bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku" + if cid.V1 != expectedV1 { + t.Errorf("V1 for empty = %q, want %q", cid.V1, expectedV1) + } + + t.Logf("V1 (CIDv1): %s", cid.V1) + t.Logf("Multihash: %s", cid.Multihash) +} + +// TestReadmeFile verifies CID computation on an actual file in the repo. +// Run: ipfs add --only-hash --raw-leaves -Q ../../README.md +// to get the expected CID for comparison. +func TestReadmeFile(t *testing.T) { + // This test only runs if the README.md exists (it should in the repo) + path := "../../README.md" + if _, err := os.Stat(path); os.IsNotExist(err) { + t.Skip("README.md not found, skipping file test") + } + + cid, err := ComputeFileCID(path) + if err != nil { + t.Fatalf("ComputeFileCID() error = %v", err) + } + + // Just verify it produces valid-looking CIDs + if !strings.HasPrefix(cid.V1, "bafkrei") { + t.Errorf("V1 should start with bafkrei: %s", cid.V1) + } + if !strings.HasPrefix(cid.Multihash, "Qm") { + t.Errorf("Multihash should start with Qm: %s", cid.Multihash) + } + + t.Logf("README.md CIDv1: %s", cid.V1) + t.Logf("To verify: ipfs add --only-hash --raw-leaves -Q ../../README.md") +}