diff --git a/accounts/accounts.go b/accounts/accounts.go index 6249beed0d..375470b7ae 100644 --- a/accounts/accounts.go +++ b/accounts/accounts.go @@ -196,7 +196,7 @@ func TextHash(data []byte) []byte { // This gives context to the signed message and prevents signing of transactions. func TextAndHash(data []byte) ([]byte, string) { msg := fmt.Sprintf("\x19Ethereum Signed Message:\n%d%s", len(data), data) - hasher := keccak.NewLegacyKeccak256() + hasher := keccak.NewFastKeccak() hasher.Write([]byte(msg)) return hasher.Sum(nil), msg } diff --git a/cmd/evm/internal/t8ntool/execution.go b/cmd/evm/internal/t8ntool/execution.go index 532d6e6b94..58c06ffa51 100644 --- a/cmd/evm/internal/t8ntool/execution.go +++ b/cmd/evm/internal/t8ntool/execution.go @@ -418,7 +418,7 @@ func MakePreState(db ethdb.Database, accounts types.GenesisAlloc, isBintrie bool } func rlpHash(x any) (h common.Hash) { - hw := keccak.NewLegacyKeccak256() + hw := keccak.NewFastKeccak() rlp.Encode(hw, x) hw.Sum(h[:0]) return h diff --git a/common/bitutil/bitutil.go b/common/bitutil/bitutil.go index 99a8c2ee18..578da1cf49 100644 --- a/common/bitutil/bitutil.go +++ b/common/bitutil/bitutil.go @@ -8,6 +8,7 @@ package bitutil import ( + "crypto/subtle" "runtime" "unsafe" ) @@ -15,6 +16,19 @@ import ( const wordSize = int(unsafe.Sizeof(uintptr(0))) const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x" +// XORBytes xors the bytes in a and b. The destination is assumed to have enough +// space. Returns the number of bytes xor'd. +// +// If dst does not have length at least n, +// XORBytes panics without writing anything to dst. +// +// dst and x or y may overlap exactly or not at all, +// otherwise XORBytes may panic. +// +// Deprecated: use crypto/subtle.XORBytes +func XORBytes(dst, a, b []byte) int { + return subtle.XORBytes(dst, a, b) +} // ANDBytes ands the bytes in a and b. The destination is assumed to have enough // space. Returns the number of bytes and'd. diff --git a/common/bitutil/bitutil_test.go b/common/bitutil/bitutil_test.go index 0cad12bda7..1748029794 100644 --- a/common/bitutil/bitutil_test.go +++ b/common/bitutil/bitutil_test.go @@ -11,6 +11,45 @@ import ( "testing" ) +// Tests that bitwise XOR works for various alignments. +func TestXOR(t *testing.T) { + for alignP := 0; alignP < 2; alignP++ { + for alignQ := 0; alignQ < 2; alignQ++ { + for alignD := 0; alignD < 2; alignD++ { + p := make([]byte, 1023)[alignP:] + q := make([]byte, 1023)[alignQ:] + + for i := 0; i < len(p); i++ { + p[i] = byte(i) + } + for i := 0; i < len(q); i++ { + q[i] = byte(len(q) - i) + } + d1 := make([]byte, 1023+alignD)[alignD:] + d2 := make([]byte, 1023+alignD)[alignD:] + + XORBytes(d1, p, q) + naiveXOR(d2, p, q) + if !bytes.Equal(d1, d2) { + t.Error("not equal", d1, d2) + } + } + } + } +} + +// naiveXOR xors bytes one by one. +func naiveXOR(dst, a, b []byte) int { + n := len(a) + if len(b) < n { + n = len(b) + } + for i := 0; i < n; i++ { + dst[i] = a[i] ^ b[i] + } + return n +} + // Tests that bitwise AND works for various alignments. func TestAND(t *testing.T) { for alignP := 0; alignP < 2; alignP++ { @@ -85,6 +124,32 @@ func TestTest(t *testing.T) { } } +// Benchmarks the potentially optimized XOR performance. +func BenchmarkFastXOR1KB(b *testing.B) { benchmarkFastXOR(b, 1024) } +func BenchmarkFastXOR2KB(b *testing.B) { benchmarkFastXOR(b, 2048) } +func BenchmarkFastXOR4KB(b *testing.B) { benchmarkFastXOR(b, 4096) } + +func benchmarkFastXOR(b *testing.B, size int) { + p, q := make([]byte, size), make([]byte, size) + + for i := 0; i < b.N; i++ { + XORBytes(p, p, q) + } +} + +// Benchmarks the baseline XOR performance. +func BenchmarkBaseXOR1KB(b *testing.B) { benchmarkBaseXOR(b, 1024) } +func BenchmarkBaseXOR2KB(b *testing.B) { benchmarkBaseXOR(b, 2048) } +func BenchmarkBaseXOR4KB(b *testing.B) { benchmarkBaseXOR(b, 4096) } + +func benchmarkBaseXOR(b *testing.B, size int) { + p, q := make([]byte, size), make([]byte, size) + + for i := 0; i < b.N; i++ { + naiveXOR(p, p, q) + } +} + // Benchmarks the potentially optimized AND performance. func BenchmarkFastAND1KB(b *testing.B) { benchmarkFastAND(b, 1024) } func BenchmarkFastAND2KB(b *testing.B) { benchmarkFastAND(b, 2048) } diff --git a/common/types.go b/common/types.go index 308b8ed879..6200a82ab2 100644 --- a/common/types.go +++ b/common/types.go @@ -271,7 +271,7 @@ func (a *Address) checksumHex() []byte { buf := a.hex() // compute checksum - sha := keccak.NewLegacyKeccak256() + sha := keccak.NewFastKeccak() sha.Write(buf[2:]) hash := sha.Sum(nil) for i := 2; i < len(buf); i++ { diff --git a/consensus/clique/clique.go b/consensus/clique/clique.go index 87cd407a71..570161fc95 100644 --- a/consensus/clique/clique.go +++ b/consensus/clique/clique.go @@ -37,7 +37,6 @@ import ( "github.com/ethereum/go-ethereum/core/types" "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/crypto" - "github.com/ethereum/go-ethereum/crypto/keccak" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/params" @@ -642,9 +641,11 @@ func (c *Clique) Close() error { // SealHash returns the hash of a block prior to it being sealed. func SealHash(header *types.Header) (hash common.Hash) { - hasher := keccak.NewLegacyKeccak256() + hasher := crypto.NewKeccakState() + defer crypto.ReturnToPool(hasher) + encodeSigHeader(hasher, header) - hasher.(crypto.KeccakState).Read(hash[:]) + hasher.Sum(hash[:0]) return hash } diff --git a/consensus/ethash/consensus.go b/consensus/ethash/consensus.go index f90001fc1a..737d2079f3 100644 --- a/consensus/ethash/consensus.go +++ b/consensus/ethash/consensus.go @@ -527,7 +527,7 @@ func (ethash *Ethash) FinalizeAndAssemble(chain consensus.ChainHeaderReader, hea // SealHash returns the hash of a block prior to it being sealed. func (ethash *Ethash) SealHash(header *types.Header) (hash common.Hash) { - hasher := keccak.NewLegacyKeccak256() + hasher := keccak.NewFastKeccak() enc := []interface{}{ header.ParentHash, diff --git a/core/rawdb/accessors_chain_test.go b/core/rawdb/accessors_chain_test.go index 280fc21e8f..751b7e353a 100644 --- a/core/rawdb/accessors_chain_test.go +++ b/core/rawdb/accessors_chain_test.go @@ -69,7 +69,7 @@ func TestBodyStorage(t *testing.T) { // Create a test body to move around the database and make sure it's really new body := &types.Body{Uncles: []*types.Header{{Extra: []byte("test header")}}} - hasher := keccak.NewLegacyKeccak256() + hasher := keccak.NewFastKeccak() rlp.Encode(hasher, body) hash := common.BytesToHash(hasher.Sum(nil)) diff --git a/core/rlp_test.go b/core/rlp_test.go index f3655bf533..5cf774044a 100644 --- a/core/rlp_test.go +++ b/core/rlp_test.go @@ -147,7 +147,7 @@ func BenchmarkHashing(b *testing.B) { blockRlp, _ = rlp.EncodeToBytes(block) } var got common.Hash - var hasher = keccak.NewLegacyKeccak256() + var hasher = keccak.NewFastKeccak() b.Run("iteratorhashing", func(b *testing.B) { for b.Loop() { var hash common.Hash diff --git a/core/state_processor_test.go b/core/state_processor_test.go index 3bf372800b..bcf2ece73d 100644 --- a/core/state_processor_test.go +++ b/core/state_processor_test.go @@ -398,7 +398,7 @@ func GenerateBadBlock(parent *types.Block, engine consensus.Engine, txs types.Tr var receipts []*types.Receipt // The post-state result doesn't need to be correct (this is a bad block), but we do need something there // Preferably something unique. So let's use a combo of blocknum + txhash - hasher := keccak.NewLegacyKeccak256() + hasher := keccak.NewFastKeccak() hasher.Write(header.Number.Bytes()) var cumulativeGas uint64 var nBlobs int diff --git a/core/types/bloom9.go b/core/types/bloom9.go index 1d57e8e4bc..7473426414 100644 --- a/core/types/bloom9.go +++ b/core/types/bloom9.go @@ -23,7 +23,7 @@ import ( "github.com/ethereum/go-ethereum/common/bitutil" "github.com/ethereum/go-ethereum/common/hexutil" - "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/crypto/keccak" ) type bytesBacked interface { @@ -141,7 +141,7 @@ func Bloom9(data []byte) []byte { // bloomValues returns the bytes (index-value pairs) to set for the given data func bloomValues(data []byte, hashbuf *[6]byte) (uint, byte, uint, byte, uint, byte) { - sha := hasherPool.Get().(crypto.KeccakState) + sha := hasherPool.Get().(keccak.KeccakState) sha.Reset() sha.Write(data) sha.Read(hashbuf[:]) diff --git a/core/types/hashing.go b/core/types/hashing.go index 98fe64e15a..2b50cf92b2 100644 --- a/core/types/hashing.go +++ b/core/types/hashing.go @@ -24,6 +24,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/crypto/keccak" "github.com/ethereum/go-ethereum/rlp" ) @@ -55,7 +56,7 @@ func getPooledBuffer(size uint64) ([]byte, *bytes.Buffer, error) { // rlpHash encodes x and hashes the encoded bytes. func rlpHash(x interface{}) (h common.Hash) { - sha := hasherPool.Get().(crypto.KeccakState) + sha := hasherPool.Get().(keccak.KeccakState) defer hasherPool.Put(sha) sha.Reset() rlp.Encode(sha, x) @@ -66,7 +67,7 @@ func rlpHash(x interface{}) (h common.Hash) { // prefixedRlpHash writes the prefix into the hasher before rlp-encoding x. // It's used for typed transactions. func prefixedRlpHash(prefix byte, x interface{}) (h common.Hash) { - sha := hasherPool.Get().(crypto.KeccakState) + sha := hasherPool.Get().(keccak.KeccakState) defer hasherPool.Put(sha) sha.Reset() sha.Write([]byte{prefix}) diff --git a/crypto/crypto.go b/crypto/crypto.go index db6b6ee071..138630fe00 100644 --- a/crypto/crypto.go +++ b/crypto/crypto.go @@ -24,13 +24,13 @@ import ( "encoding/hex" "errors" "fmt" - "hash" "io" "math/big" "os" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/math" + "github.com/ethereum/go-ethereum/crypto/keccak" "github.com/ethereum/go-ethereum/rlp" ) @@ -59,16 +59,8 @@ type EllipticCurve interface { Unmarshal(data []byte) (x, y *big.Int) } -// KeccakState wraps sha3.state. In addition to the usual hash methods, it also supports -// Read to get a variable amount of data from the hash state. Read is faster than Sum -// because it doesn't copy the internal state, but also modifies the internal state. -type KeccakState interface { - hash.Hash - Read([]byte) (int, error) -} - // HashData hashes the provided data using the KeccakState and returns a 32 byte hash -func HashData(kh KeccakState, data []byte) (h common.Hash) { +func HashData(kh keccak.KeccakState, data []byte) (h common.Hash) { kh.Reset() kh.Write(data) kh.Read(h[:]) diff --git a/crypto/keccak.go b/crypto/keccak.go index 3fafddc92e..97aefa4ede 100644 --- a/crypto/keccak.go +++ b/crypto/keccak.go @@ -26,38 +26,40 @@ import ( ) // NewKeccakState creates a new KeccakState -func NewKeccakState() KeccakState { - return keccak.NewLegacyKeccak256().(KeccakState) +func NewKeccakState() keccak.KeccakState { + h := hasherPool.Get().(keccak.KeccakState) + h.Reset() + return h } +func ReturnToPool(h keccak.KeccakState) { hasherPool.Put(h) } + var hasherPool = sync.Pool{ New: func() any { - return keccak.NewLegacyKeccak256().(KeccakState) + return keccak.NewFastKeccak() }, } // Keccak256 calculates and returns the Keccak256 hash of the input data. func Keccak256(data ...[]byte) []byte { b := make([]byte, 32) - d := hasherPool.Get().(KeccakState) - d.Reset() + d := NewKeccakState() for _, b := range data { d.Write(b) } d.Read(b) - hasherPool.Put(d) + ReturnToPool(d) return b } // Keccak256Hash calculates and returns the Keccak256 hash of the input data, // converting it to an internal Hash data structure. func Keccak256Hash(data ...[]byte) (h common.Hash) { - d := hasherPool.Get().(KeccakState) - d.Reset() + d := NewKeccakState() for _, b := range data { d.Write(b) } - d.Read(h[:]) - hasherPool.Put(d) + d.Read(h[:]) //nolint:errcheck + ReturnToPool(d) return h } diff --git a/crypto/keccak/hashes.go b/crypto/keccak/hashes.go deleted file mode 100644 index c78c5fe992..0000000000 --- a/crypto/keccak/hashes.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package keccak - -// This file provides functions for creating instances of the SHA-3 -// and SHAKE hash functions, as well as utility functions for hashing -// bytes. - -import ( - "hash" -) - -const ( - dsbyteSHA3 = 0b00000110 - dsbyteKeccak = 0b00000001 - dsbyteShake = 0b00011111 - dsbyteCShake = 0b00000100 - - // rateK[c] is the rate in bytes for Keccak[c] where c is the capacity in - // bits. Given the sponge size is 1600 bits, the rate is 1600 - c bits. - rateK256 = (1600 - 256) / 8 - rateK448 = (1600 - 448) / 8 - rateK512 = (1600 - 512) / 8 - rateK768 = (1600 - 768) / 8 - rateK1024 = (1600 - 1024) / 8 -) - -// NewLegacyKeccak256 creates a new Keccak-256 hash. -// -// Only use this function if you require compatibility with an existing cryptosystem -// that uses non-standard padding. All other users should use New256 instead. -func NewLegacyKeccak256() hash.Hash { - return &state{rate: rateK512, outputLen: 32, dsbyte: dsbyteKeccak} -} - -// NewLegacyKeccak512 creates a new Keccak-512 hash. -// -// Only use this function if you require compatibility with an existing cryptosystem -// that uses non-standard padding. All other users should use New512 instead. -func NewLegacyKeccak512() hash.Hash { - return &state{rate: rateK1024, outputLen: 64, dsbyte: dsbyteKeccak} -} diff --git a/crypto/keccak/keccaf_arm64.s b/crypto/keccak/keccaf_arm64.s new file mode 100644 index 0000000000..21af4540f9 --- /dev/null +++ b/crypto/keccak/keccaf_arm64.s @@ -0,0 +1,338 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !purego + +#include "textflag.h" + +// func keccakF1600(a *[200]byte) +TEXT ·keccakF1600(SB), $200-8 + MOVD a+0(FP), R0 + MOVD $round_consts<>(SB), R1 + MOVD $24, R2 // counter for loop + + VLD1.P 16(R0), [V0.D1, V1.D1] + VLD1.P 16(R0), [V2.D1, V3.D1] + VLD1.P 16(R0), [V4.D1, V5.D1] + VLD1.P 16(R0), [V6.D1, V7.D1] + VLD1.P 16(R0), [V8.D1, V9.D1] + VLD1.P 16(R0), [V10.D1, V11.D1] + VLD1.P 16(R0), [V12.D1, V13.D1] + VLD1.P 16(R0), [V14.D1, V15.D1] + VLD1.P 16(R0), [V16.D1, V17.D1] + VLD1.P 16(R0), [V18.D1, V19.D1] + VLD1.P 16(R0), [V20.D1, V21.D1] + VLD1.P 16(R0), [V22.D1, V23.D1] + VLD1 (R0), [V24.D1] + + SUB $192, R0, R0 + +loop: + // theta + VEOR3 V20.B16, V15.B16, V10.B16, V25.B16 + VEOR3 V21.B16, V16.B16, V11.B16, V26.B16 + VEOR3 V22.B16, V17.B16, V12.B16, V27.B16 + VEOR3 V23.B16, V18.B16, V13.B16, V28.B16 + VEOR3 V24.B16, V19.B16, V14.B16, V29.B16 + VEOR3 V25.B16, V5.B16, V0.B16, V25.B16 + VEOR3 V26.B16, V6.B16, V1.B16, V26.B16 + VEOR3 V27.B16, V7.B16, V2.B16, V27.B16 + VEOR3 V28.B16, V8.B16, V3.B16, V28.B16 + VEOR3 V29.B16, V9.B16, V4.B16, V29.B16 + + VRAX1 V27.D2, V25.D2, V30.D2 + VRAX1 V28.D2, V26.D2, V31.D2 + VRAX1 V29.D2, V27.D2, V27.D2 + VRAX1 V25.D2, V28.D2, V28.D2 + VRAX1 V26.D2, V29.D2, V29.D2 + + // theta and rho and Pi + VEOR V29.B16, V0.B16, V0.B16 + + VXAR $63, V30.D2, V1.D2, V25.D2 + + VXAR $20, V30.D2, V6.D2, V1.D2 + VXAR $44, V28.D2, V9.D2, V6.D2 + VXAR $3, V31.D2, V22.D2, V9.D2 + VXAR $25, V28.D2, V14.D2, V22.D2 + VXAR $46, V29.D2, V20.D2, V14.D2 + + VXAR $2, V31.D2, V2.D2, V26.D2 + + VXAR $21, V31.D2, V12.D2, V2.D2 + VXAR $39, V27.D2, V13.D2, V12.D2 + VXAR $56, V28.D2, V19.D2, V13.D2 + VXAR $8, V27.D2, V23.D2, V19.D2 + VXAR $23, V29.D2, V15.D2, V23.D2 + + VXAR $37, V28.D2, V4.D2, V15.D2 + + VXAR $50, V28.D2, V24.D2, V28.D2 + VXAR $62, V30.D2, V21.D2, V24.D2 + VXAR $9, V27.D2, V8.D2, V8.D2 + VXAR $19, V30.D2, V16.D2, V4.D2 + VXAR $28, V29.D2, V5.D2, V16.D2 + + VXAR $36, V27.D2, V3.D2, V5.D2 + + VXAR $43, V27.D2, V18.D2, V27.D2 + VXAR $49, V31.D2, V17.D2, V3.D2 + VXAR $54, V30.D2, V11.D2, V30.D2 + VXAR $58, V31.D2, V7.D2, V31.D2 + VXAR $61, V29.D2, V10.D2, V29.D2 + + // chi and iota + VBCAX V8.B16, V22.B16, V26.B16, V20.B16 + VBCAX V22.B16, V23.B16, V8.B16, V21.B16 + VBCAX V23.B16, V24.B16, V22.B16, V22.B16 + VBCAX V24.B16, V26.B16, V23.B16, V23.B16 + VBCAX V26.B16, V8.B16, V24.B16, V24.B16 + + VLD1R.P 8(R1), [V26.D2] + + VBCAX V3.B16, V19.B16, V30.B16, V17.B16 + VBCAX V19.B16, V15.B16, V3.B16, V18.B16 + VBCAX V15.B16, V16.B16, V19.B16, V19.B16 + VBCAX V16.B16, V30.B16, V15.B16, V15.B16 + VBCAX V30.B16, V3.B16, V16.B16, V16.B16 + + VBCAX V31.B16, V12.B16, V25.B16, V10.B16 + VBCAX V12.B16, V13.B16, V31.B16, V11.B16 + VBCAX V13.B16, V14.B16, V12.B16, V12.B16 + VBCAX V14.B16, V25.B16, V13.B16, V13.B16 + VBCAX V25.B16, V31.B16, V14.B16, V14.B16 + + VBCAX V4.B16, V9.B16, V29.B16, V7.B16 + VBCAX V9.B16, V5.B16, V4.B16, V8.B16 + VBCAX V5.B16, V6.B16, V9.B16, V9.B16 + VBCAX V6.B16, V29.B16, V5.B16, V5.B16 + VBCAX V29.B16, V4.B16, V6.B16, V6.B16 + + VBCAX V28.B16, V0.B16, V27.B16, V3.B16 + VBCAX V0.B16, V1.B16, V28.B16, V4.B16 + + VBCAX V1.B16, V2.B16, V0.B16, V0.B16 // iota (chi part) + + VBCAX V2.B16, V27.B16, V1.B16, V1.B16 + VBCAX V27.B16, V28.B16, V2.B16, V2.B16 + + VEOR V26.B16, V0.B16, V0.B16 // iota + + SUB $1, R2, R2 + CBNZ R2, loop + + VST1.P [V0.D1, V1.D1], 16(R0) + VST1.P [V2.D1, V3.D1], 16(R0) + VST1.P [V4.D1, V5.D1], 16(R0) + VST1.P [V6.D1, V7.D1], 16(R0) + VST1.P [V8.D1, V9.D1], 16(R0) + VST1.P [V10.D1, V11.D1], 16(R0) + VST1.P [V12.D1, V13.D1], 16(R0) + VST1.P [V14.D1, V15.D1], 16(R0) + VST1.P [V16.D1, V17.D1], 16(R0) + VST1.P [V18.D1, V19.D1], 16(R0) + VST1.P [V20.D1, V21.D1], 16(R0) + VST1.P [V22.D1, V23.D1], 16(R0) + VST1 [V24.D1], (R0) + + RET + +// func xorAndPermute(state *[200]byte, buf *byte) +// Loads state, XORs a full rate (136 bytes = 17 lanes) of data, then runs keccakF1600. +// Eliminates one state store+load cycle per block vs separate xorIn + keccakF1600. +TEXT ·xorAndPermute(SB), $200-16 + MOVD state+0(FP), R0 + MOVD buf+8(FP), R3 + MOVD $round_consts<>(SB), R1 + MOVD $24, R2 + + // Load state and XOR data for lanes 0-15 (8 pairs × 16 bytes = 128 bytes) + VLD1.P 16(R0), [V0.D1, V1.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V0.B16, V0.B16 + VEOR V26.B16, V1.B16, V1.B16 + + VLD1.P 16(R0), [V2.D1, V3.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V2.B16, V2.B16 + VEOR V26.B16, V3.B16, V3.B16 + + VLD1.P 16(R0), [V4.D1, V5.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V4.B16, V4.B16 + VEOR V26.B16, V5.B16, V5.B16 + + VLD1.P 16(R0), [V6.D1, V7.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V6.B16, V6.B16 + VEOR V26.B16, V7.B16, V7.B16 + + VLD1.P 16(R0), [V8.D1, V9.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V8.B16, V8.B16 + VEOR V26.B16, V9.B16, V9.B16 + + VLD1.P 16(R0), [V10.D1, V11.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V10.B16, V10.B16 + VEOR V26.B16, V11.B16, V11.B16 + + VLD1.P 16(R0), [V12.D1, V13.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V12.B16, V12.B16 + VEOR V26.B16, V13.B16, V13.B16 + + VLD1.P 16(R0), [V14.D1, V15.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V14.B16, V14.B16 + VEOR V26.B16, V15.B16, V15.B16 + + // Lane 16-17: XOR only lane 16 (last data lane, 8 bytes at data offset 128) + VLD1.P 16(R0), [V16.D1, V17.D1] + VLD1 (R3), [V25.D1] + VEOR V25.B16, V16.B16, V16.B16 + + // Remaining state lanes 18-24 (no data to XOR) + VLD1.P 16(R0), [V18.D1, V19.D1] + VLD1.P 16(R0), [V20.D1, V21.D1] + VLD1.P 16(R0), [V22.D1, V23.D1] + VLD1 (R0), [V24.D1] + + SUB $192, R0, R0 + +loop_xp: + // theta + VEOR3 V20.B16, V15.B16, V10.B16, V25.B16 + VEOR3 V21.B16, V16.B16, V11.B16, V26.B16 + VEOR3 V22.B16, V17.B16, V12.B16, V27.B16 + VEOR3 V23.B16, V18.B16, V13.B16, V28.B16 + VEOR3 V24.B16, V19.B16, V14.B16, V29.B16 + VEOR3 V25.B16, V5.B16, V0.B16, V25.B16 + VEOR3 V26.B16, V6.B16, V1.B16, V26.B16 + VEOR3 V27.B16, V7.B16, V2.B16, V27.B16 + VEOR3 V28.B16, V8.B16, V3.B16, V28.B16 + VEOR3 V29.B16, V9.B16, V4.B16, V29.B16 + + VRAX1 V27.D2, V25.D2, V30.D2 + VRAX1 V28.D2, V26.D2, V31.D2 + VRAX1 V29.D2, V27.D2, V27.D2 + VRAX1 V25.D2, V28.D2, V28.D2 + VRAX1 V26.D2, V29.D2, V29.D2 + + // theta and rho and Pi + VEOR V29.B16, V0.B16, V0.B16 + + VXAR $63, V30.D2, V1.D2, V25.D2 + + VXAR $20, V30.D2, V6.D2, V1.D2 + VXAR $44, V28.D2, V9.D2, V6.D2 + VXAR $3, V31.D2, V22.D2, V9.D2 + VXAR $25, V28.D2, V14.D2, V22.D2 + VXAR $46, V29.D2, V20.D2, V14.D2 + + VXAR $2, V31.D2, V2.D2, V26.D2 + + VXAR $21, V31.D2, V12.D2, V2.D2 + VXAR $39, V27.D2, V13.D2, V12.D2 + VXAR $56, V28.D2, V19.D2, V13.D2 + VXAR $8, V27.D2, V23.D2, V19.D2 + VXAR $23, V29.D2, V15.D2, V23.D2 + + VXAR $37, V28.D2, V4.D2, V15.D2 + + VXAR $50, V28.D2, V24.D2, V28.D2 + VXAR $62, V30.D2, V21.D2, V24.D2 + VXAR $9, V27.D2, V8.D2, V8.D2 + VXAR $19, V30.D2, V16.D2, V4.D2 + VXAR $28, V29.D2, V5.D2, V16.D2 + + VXAR $36, V27.D2, V3.D2, V5.D2 + + VXAR $43, V27.D2, V18.D2, V27.D2 + VXAR $49, V31.D2, V17.D2, V3.D2 + VXAR $54, V30.D2, V11.D2, V30.D2 + VXAR $58, V31.D2, V7.D2, V31.D2 + VXAR $61, V29.D2, V10.D2, V29.D2 + + // chi and iota + VBCAX V8.B16, V22.B16, V26.B16, V20.B16 + VBCAX V22.B16, V23.B16, V8.B16, V21.B16 + VBCAX V23.B16, V24.B16, V22.B16, V22.B16 + VBCAX V24.B16, V26.B16, V23.B16, V23.B16 + VBCAX V26.B16, V8.B16, V24.B16, V24.B16 + + VLD1R.P 8(R1), [V26.D2] + + VBCAX V3.B16, V19.B16, V30.B16, V17.B16 + VBCAX V19.B16, V15.B16, V3.B16, V18.B16 + VBCAX V15.B16, V16.B16, V19.B16, V19.B16 + VBCAX V16.B16, V30.B16, V15.B16, V15.B16 + VBCAX V30.B16, V3.B16, V16.B16, V16.B16 + + VBCAX V31.B16, V12.B16, V25.B16, V10.B16 + VBCAX V12.B16, V13.B16, V31.B16, V11.B16 + VBCAX V13.B16, V14.B16, V12.B16, V12.B16 + VBCAX V14.B16, V25.B16, V13.B16, V13.B16 + VBCAX V25.B16, V31.B16, V14.B16, V14.B16 + + VBCAX V4.B16, V9.B16, V29.B16, V7.B16 + VBCAX V9.B16, V5.B16, V4.B16, V8.B16 + VBCAX V5.B16, V6.B16, V9.B16, V9.B16 + VBCAX V6.B16, V29.B16, V5.B16, V5.B16 + VBCAX V29.B16, V4.B16, V6.B16, V6.B16 + + VBCAX V28.B16, V0.B16, V27.B16, V3.B16 + VBCAX V0.B16, V1.B16, V28.B16, V4.B16 + + VBCAX V1.B16, V2.B16, V0.B16, V0.B16 // iota (chi part) + + VBCAX V2.B16, V27.B16, V1.B16, V1.B16 + VBCAX V27.B16, V28.B16, V2.B16, V2.B16 + + VEOR V26.B16, V0.B16, V0.B16 // iota + + SUB $1, R2, R2 + CBNZ R2, loop_xp + + VST1.P [V0.D1, V1.D1], 16(R0) + VST1.P [V2.D1, V3.D1], 16(R0) + VST1.P [V4.D1, V5.D1], 16(R0) + VST1.P [V6.D1, V7.D1], 16(R0) + VST1.P [V8.D1, V9.D1], 16(R0) + VST1.P [V10.D1, V11.D1], 16(R0) + VST1.P [V12.D1, V13.D1], 16(R0) + VST1.P [V14.D1, V15.D1], 16(R0) + VST1.P [V16.D1, V17.D1], 16(R0) + VST1.P [V18.D1, V19.D1], 16(R0) + VST1.P [V20.D1, V21.D1], 16(R0) + VST1.P [V22.D1, V23.D1], 16(R0) + VST1 [V24.D1], (R0) + + RET + +DATA round_consts<>+0x00(SB)/8, $0x0000000000000001 +DATA round_consts<>+0x08(SB)/8, $0x0000000000008082 +DATA round_consts<>+0x10(SB)/8, $0x800000000000808a +DATA round_consts<>+0x18(SB)/8, $0x8000000080008000 +DATA round_consts<>+0x20(SB)/8, $0x000000000000808b +DATA round_consts<>+0x28(SB)/8, $0x0000000080000001 +DATA round_consts<>+0x30(SB)/8, $0x8000000080008081 +DATA round_consts<>+0x38(SB)/8, $0x8000000000008009 +DATA round_consts<>+0x40(SB)/8, $0x000000000000008a +DATA round_consts<>+0x48(SB)/8, $0x0000000000000088 +DATA round_consts<>+0x50(SB)/8, $0x0000000080008009 +DATA round_consts<>+0x58(SB)/8, $0x000000008000000a +DATA round_consts<>+0x60(SB)/8, $0x000000008000808b +DATA round_consts<>+0x68(SB)/8, $0x800000000000008b +DATA round_consts<>+0x70(SB)/8, $0x8000000000008089 +DATA round_consts<>+0x78(SB)/8, $0x8000000000008003 +DATA round_consts<>+0x80(SB)/8, $0x8000000000008002 +DATA round_consts<>+0x88(SB)/8, $0x8000000000000080 +DATA round_consts<>+0x90(SB)/8, $0x000000000000800a +DATA round_consts<>+0x98(SB)/8, $0x800000008000000a +DATA round_consts<>+0xA0(SB)/8, $0x8000000080008081 +DATA round_consts<>+0xA8(SB)/8, $0x8000000000008080 +DATA round_consts<>+0xB0(SB)/8, $0x0000000080000001 +DATA round_consts<>+0xB8(SB)/8, $0x8000000080008008 +GLOBL round_consts<>(SB), NOPTR|RODATA, $192 diff --git a/crypto/keccak/keccak.go b/crypto/keccak/keccak.go new file mode 100644 index 0000000000..79487509fb --- /dev/null +++ b/crypto/keccak/keccak.go @@ -0,0 +1,20 @@ +// Package keccak provides Keccak-256 hashing with platform-specific acceleration. +package keccak + +import "hash" + +// KeccakState wraps the keccak hasher. In addition to the usual hash methods, it also supports +// Read to get a variable amount of data from the hash state. Read is faster than Sum +// because it doesn't copy the internal state, but also modifies the internal state. +type KeccakState interface { + hash.Hash + Read([]byte) (int, error) +} + +const rate = 136 // sponge rate for Keccak-256: (1600 - 2*256) / 8 + +var _ KeccakState = (*Hasher)(nil) + +func NewFastKeccak() *Hasher { + return &Hasher{} +} \ No newline at end of file diff --git a/crypto/keccak/keccak_arm64.go b/crypto/keccak/keccak_arm64.go new file mode 100644 index 0000000000..aa3d1ea2db --- /dev/null +++ b/crypto/keccak/keccak_arm64.go @@ -0,0 +1,109 @@ +//go:build arm64 && !purego + +package keccak + +import ( + "runtime" + + "golang.org/x/crypto/sha3" + "golang.org/x/sys/cpu" +) + +// Apple Silicon always has Armv8.2-A SHA3 extensions (VEOR3, VRAX1, VXAR, VBCAX). +// On other ARM64 platforms, detect at runtime via CPU feature flags. +// When SHA3 is unavailable, falls back to x/crypto/sha3. +var useSHA3 = runtime.GOOS == "darwin" || runtime.GOOS == "ios" || cpu.ARM64.HasSHA3 + +//go:noescape +func keccakF1600(a *[200]byte) + +//go:noescape +func xorAndPermute(state *[200]byte, buf *byte) + +// Sum256 computes the Keccak-256 hash of data. Zero heap allocations when SHA3 is available. +func Sum256(data []byte) [32]byte { + if !useSHA3 { + return sum256XCrypto(data) + } + return sum256Sponge(data) +} + +func sum256XCrypto(data []byte) [32]byte { + h := sha3.NewLegacyKeccak256() + h.Write(data) + var out [32]byte + h.Sum(out[:0]) + return out +} + +// Hasher is a streaming Keccak-256 hasher. +// Uses NEON SHA3 assembly when available, x/crypto/sha3 otherwise. +type Hasher struct { + sponge + xc KeccakState // x/crypto fallback +} + +// Reset resets the hasher to its initial state. +func (h *Hasher) Reset() { + if useSHA3 { + h.sponge.Reset() + } else { + if h.xc == nil { + h.xc = sha3.NewLegacyKeccak256().(KeccakState) + } else { + h.xc.Reset() + } + } +} + +// Write absorbs data into the hasher. +// Panics if called after Read. +func (h *Hasher) Write(p []byte) (int, error) { + if !useSHA3 { + if h.xc == nil { + h.xc = sha3.NewLegacyKeccak256().(KeccakState) + } + return h.xc.Write(p) + } + return h.sponge.Write(p) +} + +// Sum256 finalizes and returns the 32-byte Keccak-256 digest. +// Does not modify the hasher state. +func (h *Hasher) Sum256() [32]byte { + if !useSHA3 { + if h.xc == nil { + return Sum256(nil) + } + var out [32]byte + h.xc.Sum(out[:0]) + return out + } + return h.sponge.Sum256() +} + +// Sum appends the current Keccak-256 digest to b and returns the resulting slice. +// Does not modify the hasher state. +func (h *Hasher) Sum(b []byte) []byte { + if !useSHA3 { + if h.xc == nil { + d := Sum256(nil) + return append(b, d[:]...) + } + return h.xc.Sum(b) + } + return h.sponge.Sum(b) +} + +// Read squeezes an arbitrary number of bytes from the sponge. +// On the first call, it pads and permutes, transitioning from absorbing to squeezing. +// Subsequent calls to Write will panic. It never returns an error. +func (h *Hasher) Read(out []byte) (int, error) { + if !useSHA3 { + if h.xc == nil { + h.xc = sha3.NewLegacyKeccak256().(KeccakState) + } + return h.xc.Read(out) + } + return h.sponge.Read(out) +} \ No newline at end of file diff --git a/crypto/keccak/keccak_asm.go b/crypto/keccak/keccak_asm.go new file mode 100644 index 0000000000..4cf52233e2 --- /dev/null +++ b/crypto/keccak/keccak_asm.go @@ -0,0 +1,133 @@ +//go:build (amd64 || arm64) && !purego + +package keccak + +import "unsafe" + +// sponge is the core Keccak-256 sponge state used by native (asm) implementations. +type sponge struct { + state [200]byte + buf [rate]byte + absorbed int + squeezing bool + readIdx int // index into state for next Read byte +} + +// Reset resets the sponge to its initial state. +func (s *sponge) Reset() { + s.state = [200]byte{} + s.absorbed = 0 + s.squeezing = false + s.readIdx = 0 +} + +// Write absorbs data into the sponge. +// Panics if called after Read. +func (s *sponge) Write(p []byte) (int, error) { + if s.squeezing { + panic("keccak: Write after Read") + } + n := len(p) + if s.absorbed > 0 { + x := copy(s.buf[s.absorbed:rate], p) + s.absorbed += x + p = p[x:] + if s.absorbed == rate { + xorAndPermute(&s.state, &s.buf[0]) + s.absorbed = 0 + } + } + + for len(p) >= rate { + xorAndPermute(&s.state, &p[0]) + p = p[rate:] + } + + if len(p) > 0 { + s.absorbed = copy(s.buf[:], p) + } + return n, nil +} + +// Sum256 finalizes and returns the 32-byte Keccak-256 digest. +// Does not modify the sponge state. +func (s *sponge) Sum256() [32]byte { + state := s.state + xorIn(&state, s.buf[:s.absorbed]) + state[s.absorbed] ^= 0x01 + state[rate-1] ^= 0x80 + keccakF1600(&state) + return [32]byte(state[:32]) +} + +// Sum appends the current Keccak-256 digest to b and returns the resulting slice. +// Does not modify the sponge state. +func (s *sponge) Sum(b []byte) []byte { + d := s.Sum256() + return append(b, d[:]...) +} + +// Size returns the number of bytes Sum will produce (32). +func (s *sponge) Size() int { return 32 } + +// BlockSize returns the sponge rate in bytes (136). +func (s *sponge) BlockSize() int { return rate } + +// Read squeezes an arbitrary number of bytes from the sponge. +// On the first call, it pads and permutes, transitioning from absorbing to squeezing. +// Subsequent calls to Write will panic. It never returns an error. +func (s *sponge) Read(out []byte) (int, error) { + if !s.squeezing { + s.padAndSqueeze() + } + + n := len(out) + for len(out) > 0 { + x := copy(out, s.state[s.readIdx:rate]) + s.readIdx += x + out = out[x:] + if s.readIdx == rate { + keccakF1600(&s.state) + s.readIdx = 0 + } + } + return n, nil +} + +func (s *sponge) padAndSqueeze() { + xorIn(&s.state, s.buf[:s.absorbed]) + s.state[s.absorbed] ^= 0x01 + s.state[rate-1] ^= 0x80 + keccakF1600(&s.state) + s.squeezing = true + s.readIdx = 0 +} + +// sum256Sponge computes Keccak-256 in one shot using the assembly permutation. +func sum256Sponge(data []byte) [32]byte { + var state [200]byte + + for len(data) >= rate { + xorAndPermute(&state, &data[0]) + data = data[rate:] + } + + xorIn(&state, data) + state[len(data)] ^= 0x01 + state[rate-1] ^= 0x80 + keccakF1600(&state) + + return [32]byte(state[:32]) +} + +func xorIn(state *[200]byte, data []byte) { + stateU64 := (*[25]uint64)(unsafe.Pointer(state)) + n := len(data) >> 3 + p := unsafe.Pointer(unsafe.SliceData(data)) + for i := range n { + stateU64[i] ^= *(*uint64)(unsafe.Add(p, uintptr(i)<<3)) + } + for i := n << 3; i < len(data); i++ { + state[i] ^= data[i] + } +} \ No newline at end of file diff --git a/crypto/keccak/keccak_default.go b/crypto/keccak/keccak_default.go new file mode 100644 index 0000000000..387fc4298a --- /dev/null +++ b/crypto/keccak/keccak_default.go @@ -0,0 +1,71 @@ +//go:build (!arm64 && !amd64) || purego + +package keccak + +import ( + "golang.org/x/crypto/sha3" +) + +// Sum256 computes the Keccak-256 hash of data. +// On non-arm64 platforms, delegates to x/crypto/sha3.NewLegacyKeccak256(). +func Sum256(data []byte) [32]byte { + h := sha3.NewLegacyKeccak256() + h.Write(data) + var out [32]byte + h.Sum(out[:0]) + return out +} + +// Hasher is a streaming Keccak-256 hasher wrapping x/crypto/sha3. +type Hasher struct { + h KeccakState +} + +func (h *Hasher) init() { + if h.h == nil { + h.h = sha3.NewLegacyKeccak256().(KeccakState) + } +} + +// Reset resets the hasher to its initial state. +func (h *Hasher) Reset() { + h.init() + h.h.Reset() +} + +// Write absorbs data into the hasher. +// Panics if called after Read. +func (h *Hasher) Write(p []byte) (int, error) { + h.init() + return h.h.Write(p) +} + +// Sum256 finalizes and returns the 32-byte Keccak-256 digest. +// Does not modify the hasher state. +func (h *Hasher) Sum256() [32]byte { + h.init() + var out [32]byte + h.h.Sum(out[:0]) + return out +} + +// Sum appends the current Keccak-256 digest to b and returns the resulting slice. +// Does not modify the hasher state. +func (h *Hasher) Sum(b []byte) []byte { + h.init() + return h.h.Sum(b) +} + +// Size returns the number of bytes Sum will produce (32). +func (h *Hasher) Size() int { return 32 } + +// BlockSize returns the sponge rate in bytes (136). +func (h *Hasher) BlockSize() int { return rate } + +// Read squeezes an arbitrary number of bytes from the sponge. +// On the first call, it pads and permutes, transitioning from absorbing to squeezing. +// Subsequent calls to Write will panic. It never returns an error. +func (h *Hasher) Read(out []byte) (int, error) { + h.init() + return h.h.Read(out) +} \ No newline at end of file diff --git a/crypto/keccak/keccak_test.go b/crypto/keccak/keccak_test.go new file mode 100644 index 0000000000..15932c3979 --- /dev/null +++ b/crypto/keccak/keccak_test.go @@ -0,0 +1,339 @@ +package keccak + +import ( + "bytes" + "encoding/hex" + "fmt" + "testing" + + "golang.org/x/crypto/sha3" +) + +func TestSum256Empty(t *testing.T) { + got := Sum256(nil) + // Known Keccak-256 of empty string. + want, _ := hex.DecodeString("c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470") + if !bytes.Equal(got[:], want) { + t.Fatalf("Sum256(nil) = %x, want %x", got, want) + } +} + +func TestSum256Hello(t *testing.T) { + got := Sum256([]byte("hello")) + want, _ := hex.DecodeString("1c8aff950685c2ed4bc3174f3472287b56d9517b9c948127319a09a7a36deac8") + if !bytes.Equal(got[:], want) { + t.Fatalf("Sum256(hello) = %x, want %x", got, want) + } +} + +func TestSum256LargeData(t *testing.T) { + // Test with data larger than one block (rate=136 bytes). + data := make([]byte, 500) + for i := range data { + data[i] = byte(i) + } + got := Sum256(data) + // Verify against streaming Hasher. + var h Hasher + h.Write(data) + want := h.Sum256() + if got != want { + t.Fatalf("Sum256 vs Hasher mismatch: %x vs %x", got, want) + } +} + +func TestHasherStreaming(t *testing.T) { + data := []byte("hello world, this is a longer test string for streaming keccak") + // All at once. + want := Sum256(data) + // Byte by byte. + var h Hasher + for _, b := range data { + h.Write([]byte{b}) + } + got := h.Sum256() + if got != want { + t.Fatalf("streaming byte-by-byte: %x vs %x", got, want) + } +} + +func TestHasherMultiBlock(t *testing.T) { + // Test with exactly 2 blocks + partial. + data := make([]byte, rate*2+50) + for i := range data { + data[i] = byte(i * 7) + } + want := Sum256(data) + // Write in chunks of 37 (not aligned to rate). + var h Hasher + for i := 0; i < len(data); i += 37 { + end := i + 37 + if end > len(data) { + end = len(data) + } + h.Write(data[i:end]) + } + got := h.Sum256() + if got != want { + t.Fatalf("multi-block streaming: %x vs %x", got, want) + } +} + +func TestReadMatchesSum256(t *testing.T) { + // Read of 32 bytes should produce the same result as Sum256. + data := []byte("hello") + var h Hasher + h.Write(data) + var got [32]byte + h.Read(got[:]) + want := Sum256(data) + if got != want { + t.Fatalf("Read(32) = %x, want %x", got, want) + } +} + +func TestReadMatchesXCrypto(t *testing.T) { + // Compare Read output against x/crypto/sha3 for various lengths. + for _, readLen := range []int{32, 64, 136, 200, 500} { + data := []byte("test data for read comparison") + ref := sha3.NewLegacyKeccak256() + ref.Write(data) + want := make([]byte, readLen) + ref.(KeccakState).Read(want) + + var h Hasher + h.Write(data) + got := make([]byte, readLen) + h.Read(got) + if !bytes.Equal(got, want) { + t.Fatalf("Read(%d) mismatch:\ngot: %x\nwant: %x", readLen, got, want) + } + } +} + +func TestReadMultipleCalls(t *testing.T) { + // Multiple Read calls should produce the same output as one large Read. + data := []byte("streaming read test") + + // One large read. + var h1 Hasher + h1.Write(data) + all := make([]byte, 300) + h1.Read(all) + + // Multiple small reads. + var h2 Hasher + h2.Write(data) + var parts []byte + for i := 0; i < 300; { + chunk := 37 + if i+chunk > 300 { + chunk = 300 - i + } + buf := make([]byte, chunk) + h2.Read(buf) + parts = append(parts, buf...) + i += chunk + } + if !bytes.Equal(all, parts) { + t.Fatalf("multi-read mismatch:\ngot: %x\nwant: %x", parts, all) + } +} + +func TestReadEmpty(t *testing.T) { + // Read from hasher with no data written. + ref := sha3.NewLegacyKeccak256() + want := make([]byte, 32) + ref.(KeccakState).Read(want) + + var h Hasher + got := make([]byte, 32) + h.Read(got) + if !bytes.Equal(got, want) { + t.Fatalf("Read empty mismatch:\ngot: %x\nwant: %x", got, want) + } +} + +func TestReadAfterReset(t *testing.T) { + var h Hasher + h.Write([]byte("first")) + h.Read(make([]byte, 32)) + + // Reset should allow Write again. + h.Reset() + h.Write([]byte("second")) + got := make([]byte, 32) + h.Read(got) + + want := Sum256([]byte("second")) + if !bytes.Equal(got, want[:]) { + t.Fatalf("Read after Reset mismatch:\ngot: %x\nwant: %x", got, want) + } +} + +func TestWriteAfterReadPanics(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Fatal("expected panic on Write after Read") + } + }() + var h Hasher + h.Write([]byte("data")) + h.Read(make([]byte, 32)) + h.Write([]byte("more")) // should panic +} + +func FuzzSum256(f *testing.F) { + f.Add([]byte(nil)) + f.Add([]byte("hello")) + f.Add([]byte("hello world, this is a longer test string for streaming keccak")) + f.Add(make([]byte, rate)) + f.Add(make([]byte, rate+1)) + f.Add(make([]byte, rate*3+50)) + + f.Fuzz(func(t *testing.T, data []byte) { + // Reference: x/crypto NewLegacyKeccak256. + ref := sha3.NewLegacyKeccak256() + ref.Write(data) + want := ref.Sum(nil) + + // Test Sum256. + got := Sum256(data) + if !bytes.Equal(got[:], want) { + t.Fatalf("Sum256 mismatch for len=%d\ngot: %x\nwant: %x", len(data), got, want) + } + + // Test streaming Hasher (write all at once). + var h Hasher + h.Write(data) + gotH := h.Sum256() + if !bytes.Equal(gotH[:], want) { + t.Fatalf("Hasher mismatch for len=%d\ngot: %x\nwant: %x", len(data), gotH, want) + } + + // Test streaming Hasher (byte-by-byte). + h.Reset() + for _, b := range data { + h.Write([]byte{b}) + } + gotS := h.Sum256() + if !bytes.Equal(gotS[:], want) { + t.Fatalf("Hasher byte-by-byte mismatch for len=%d\ngot: %x\nwant: %x", len(data), gotS, want) + } + + // Test Read (32 bytes) matches Sum256. + h.Reset() + h.Write(data) + gotRead := make([]byte, 32) + h.Read(gotRead) + if !bytes.Equal(gotRead, want) { + t.Fatalf("Read(32) mismatch for len=%d\ngot: %x\nwant: %x", len(data), gotRead, want) + } + + // Test Read (extended output) matches x/crypto. + ref.Reset() + ref.Write(data) + wantExt := make([]byte, 200) + ref.(KeccakState).Read(wantExt) + + h.Reset() + h.Write(data) + gotExt := make([]byte, 200) + h.Read(gotExt) + if !bytes.Equal(gotExt, wantExt) { + t.Fatalf("Read(200) mismatch for len=%d\ngot: %x\nwant: %x", len(data), gotExt, wantExt) + } + }) +} + +func BenchmarkSum256_500K(b *testing.B) { + data := make([]byte, 500*1024) + b.SetBytes(int64(len(data))) + b.ReportAllocs() + for b.Loop() { + Sum256(data) + } +} + +// Comparison benchmarks: faster_keccak vs golang.org/x/crypto/sha3. +var benchSizes = []int{32, 128, 256, 1024, 4096, 500 * 1024} + +func benchName(size int) string { + switch { + case size >= 1024: + return fmt.Sprintf("%dK", size/1024) + default: + return fmt.Sprintf("%dB", size) + } +} + +func BenchmarkFasterKeccak(b *testing.B) { + for _, size := range benchSizes { + data := make([]byte, size) + for i := range data { + data[i] = byte(i) + } + b.Run(benchName(size), func(b *testing.B) { + b.SetBytes(int64(size)) + b.ReportAllocs() + for b.Loop() { + Sum256(data) + } + }) + } +} + +func BenchmarkXCrypto(b *testing.B) { + for _, size := range benchSizes { + data := make([]byte, size) + for i := range data { + data[i] = byte(i) + } + b.Run(benchName(size), func(b *testing.B) { + b.SetBytes(int64(size)) + b.ReportAllocs() + h := sha3.NewLegacyKeccak256() + for b.Loop() { + h.Reset() + h.Write(data) + h.Sum(nil) + } + }) + } +} + +func BenchmarkFasterKeccakHasher(b *testing.B) { + for _, size := range benchSizes { + data := make([]byte, size) + for i := range data { + data[i] = byte(i) + } + b.Run(benchName(size), func(b *testing.B) { + b.SetBytes(int64(size)) + b.ReportAllocs() + var h Hasher + for b.Loop() { + h.Reset() + h.Write(data) + h.Sum256() + } + }) + } +} + +// BenchmarkKeccakStreaming_Sha3 benchmarks the standard sha3 streaming hasher (Reset+Write+Read). +func BenchmarkKeccakStreaming_Sha3(b *testing.B) { + data := make([]byte, 32) + for i := range data { + data[i] = byte(i) + } + h := sha3.NewLegacyKeccak256().(KeccakState) + var buf [32]byte + b.SetBytes(int64(len(data))) + b.ReportAllocs() + for b.Loop() { + h.Reset() + h.Write(data) + h.Read(buf[:]) + } +} \ No newline at end of file diff --git a/crypto/keccak/keccakf.go b/crypto/keccak/keccakf.go deleted file mode 100644 index 82694fa4a3..0000000000 --- a/crypto/keccak/keccakf.go +++ /dev/null @@ -1,414 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !amd64 || purego || !gc - -package keccak - -import "math/bits" - -// rc stores the round constants for use in the ι step. -var rc = [24]uint64{ - 0x0000000000000001, - 0x0000000000008082, - 0x800000000000808A, - 0x8000000080008000, - 0x000000000000808B, - 0x0000000080000001, - 0x8000000080008081, - 0x8000000000008009, - 0x000000000000008A, - 0x0000000000000088, - 0x0000000080008009, - 0x000000008000000A, - 0x000000008000808B, - 0x800000000000008B, - 0x8000000000008089, - 0x8000000000008003, - 0x8000000000008002, - 0x8000000000000080, - 0x000000000000800A, - 0x800000008000000A, - 0x8000000080008081, - 0x8000000000008080, - 0x0000000080000001, - 0x8000000080008008, -} - -// keccakF1600 applies the Keccak permutation to a 1600b-wide -// state represented as a slice of 25 uint64s. -func keccakF1600(a *[25]uint64) { - // Implementation translated from Keccak-inplace.c - // in the keccak reference code. - var t, bc0, bc1, bc2, bc3, bc4, d0, d1, d2, d3, d4 uint64 - - for i := 0; i < 24; i += 4 { - // Combines the 5 steps in each round into 2 steps. - // Unrolls 4 rounds per loop and spreads some steps across rounds. - - // Round 1 - bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] - bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] - bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] - bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] - bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] - d0 = bc4 ^ (bc1<<1 | bc1>>63) - d1 = bc0 ^ (bc2<<1 | bc2>>63) - d2 = bc1 ^ (bc3<<1 | bc3>>63) - d3 = bc2 ^ (bc4<<1 | bc4>>63) - d4 = bc3 ^ (bc0<<1 | bc0>>63) - - bc0 = a[0] ^ d0 - t = a[6] ^ d1 - bc1 = bits.RotateLeft64(t, 44) - t = a[12] ^ d2 - bc2 = bits.RotateLeft64(t, 43) - t = a[18] ^ d3 - bc3 = bits.RotateLeft64(t, 21) - t = a[24] ^ d4 - bc4 = bits.RotateLeft64(t, 14) - a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i] - a[6] = bc1 ^ (bc3 &^ bc2) - a[12] = bc2 ^ (bc4 &^ bc3) - a[18] = bc3 ^ (bc0 &^ bc4) - a[24] = bc4 ^ (bc1 &^ bc0) - - t = a[10] ^ d0 - bc2 = bits.RotateLeft64(t, 3) - t = a[16] ^ d1 - bc3 = bits.RotateLeft64(t, 45) - t = a[22] ^ d2 - bc4 = bits.RotateLeft64(t, 61) - t = a[3] ^ d3 - bc0 = bits.RotateLeft64(t, 28) - t = a[9] ^ d4 - bc1 = bits.RotateLeft64(t, 20) - a[10] = bc0 ^ (bc2 &^ bc1) - a[16] = bc1 ^ (bc3 &^ bc2) - a[22] = bc2 ^ (bc4 &^ bc3) - a[3] = bc3 ^ (bc0 &^ bc4) - a[9] = bc4 ^ (bc1 &^ bc0) - - t = a[20] ^ d0 - bc4 = bits.RotateLeft64(t, 18) - t = a[1] ^ d1 - bc0 = bits.RotateLeft64(t, 1) - t = a[7] ^ d2 - bc1 = bits.RotateLeft64(t, 6) - t = a[13] ^ d3 - bc2 = bits.RotateLeft64(t, 25) - t = a[19] ^ d4 - bc3 = bits.RotateLeft64(t, 8) - a[20] = bc0 ^ (bc2 &^ bc1) - a[1] = bc1 ^ (bc3 &^ bc2) - a[7] = bc2 ^ (bc4 &^ bc3) - a[13] = bc3 ^ (bc0 &^ bc4) - a[19] = bc4 ^ (bc1 &^ bc0) - - t = a[5] ^ d0 - bc1 = bits.RotateLeft64(t, 36) - t = a[11] ^ d1 - bc2 = bits.RotateLeft64(t, 10) - t = a[17] ^ d2 - bc3 = bits.RotateLeft64(t, 15) - t = a[23] ^ d3 - bc4 = bits.RotateLeft64(t, 56) - t = a[4] ^ d4 - bc0 = bits.RotateLeft64(t, 27) - a[5] = bc0 ^ (bc2 &^ bc1) - a[11] = bc1 ^ (bc3 &^ bc2) - a[17] = bc2 ^ (bc4 &^ bc3) - a[23] = bc3 ^ (bc0 &^ bc4) - a[4] = bc4 ^ (bc1 &^ bc0) - - t = a[15] ^ d0 - bc3 = bits.RotateLeft64(t, 41) - t = a[21] ^ d1 - bc4 = bits.RotateLeft64(t, 2) - t = a[2] ^ d2 - bc0 = bits.RotateLeft64(t, 62) - t = a[8] ^ d3 - bc1 = bits.RotateLeft64(t, 55) - t = a[14] ^ d4 - bc2 = bits.RotateLeft64(t, 39) - a[15] = bc0 ^ (bc2 &^ bc1) - a[21] = bc1 ^ (bc3 &^ bc2) - a[2] = bc2 ^ (bc4 &^ bc3) - a[8] = bc3 ^ (bc0 &^ bc4) - a[14] = bc4 ^ (bc1 &^ bc0) - - // Round 2 - bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] - bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] - bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] - bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] - bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] - d0 = bc4 ^ (bc1<<1 | bc1>>63) - d1 = bc0 ^ (bc2<<1 | bc2>>63) - d2 = bc1 ^ (bc3<<1 | bc3>>63) - d3 = bc2 ^ (bc4<<1 | bc4>>63) - d4 = bc3 ^ (bc0<<1 | bc0>>63) - - bc0 = a[0] ^ d0 - t = a[16] ^ d1 - bc1 = bits.RotateLeft64(t, 44) - t = a[7] ^ d2 - bc2 = bits.RotateLeft64(t, 43) - t = a[23] ^ d3 - bc3 = bits.RotateLeft64(t, 21) - t = a[14] ^ d4 - bc4 = bits.RotateLeft64(t, 14) - a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+1] - a[16] = bc1 ^ (bc3 &^ bc2) - a[7] = bc2 ^ (bc4 &^ bc3) - a[23] = bc3 ^ (bc0 &^ bc4) - a[14] = bc4 ^ (bc1 &^ bc0) - - t = a[20] ^ d0 - bc2 = bits.RotateLeft64(t, 3) - t = a[11] ^ d1 - bc3 = bits.RotateLeft64(t, 45) - t = a[2] ^ d2 - bc4 = bits.RotateLeft64(t, 61) - t = a[18] ^ d3 - bc0 = bits.RotateLeft64(t, 28) - t = a[9] ^ d4 - bc1 = bits.RotateLeft64(t, 20) - a[20] = bc0 ^ (bc2 &^ bc1) - a[11] = bc1 ^ (bc3 &^ bc2) - a[2] = bc2 ^ (bc4 &^ bc3) - a[18] = bc3 ^ (bc0 &^ bc4) - a[9] = bc4 ^ (bc1 &^ bc0) - - t = a[15] ^ d0 - bc4 = bits.RotateLeft64(t, 18) - t = a[6] ^ d1 - bc0 = bits.RotateLeft64(t, 1) - t = a[22] ^ d2 - bc1 = bits.RotateLeft64(t, 6) - t = a[13] ^ d3 - bc2 = bits.RotateLeft64(t, 25) - t = a[4] ^ d4 - bc3 = bits.RotateLeft64(t, 8) - a[15] = bc0 ^ (bc2 &^ bc1) - a[6] = bc1 ^ (bc3 &^ bc2) - a[22] = bc2 ^ (bc4 &^ bc3) - a[13] = bc3 ^ (bc0 &^ bc4) - a[4] = bc4 ^ (bc1 &^ bc0) - - t = a[10] ^ d0 - bc1 = bits.RotateLeft64(t, 36) - t = a[1] ^ d1 - bc2 = bits.RotateLeft64(t, 10) - t = a[17] ^ d2 - bc3 = bits.RotateLeft64(t, 15) - t = a[8] ^ d3 - bc4 = bits.RotateLeft64(t, 56) - t = a[24] ^ d4 - bc0 = bits.RotateLeft64(t, 27) - a[10] = bc0 ^ (bc2 &^ bc1) - a[1] = bc1 ^ (bc3 &^ bc2) - a[17] = bc2 ^ (bc4 &^ bc3) - a[8] = bc3 ^ (bc0 &^ bc4) - a[24] = bc4 ^ (bc1 &^ bc0) - - t = a[5] ^ d0 - bc3 = bits.RotateLeft64(t, 41) - t = a[21] ^ d1 - bc4 = bits.RotateLeft64(t, 2) - t = a[12] ^ d2 - bc0 = bits.RotateLeft64(t, 62) - t = a[3] ^ d3 - bc1 = bits.RotateLeft64(t, 55) - t = a[19] ^ d4 - bc2 = bits.RotateLeft64(t, 39) - a[5] = bc0 ^ (bc2 &^ bc1) - a[21] = bc1 ^ (bc3 &^ bc2) - a[12] = bc2 ^ (bc4 &^ bc3) - a[3] = bc3 ^ (bc0 &^ bc4) - a[19] = bc4 ^ (bc1 &^ bc0) - - // Round 3 - bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] - bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] - bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] - bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] - bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] - d0 = bc4 ^ (bc1<<1 | bc1>>63) - d1 = bc0 ^ (bc2<<1 | bc2>>63) - d2 = bc1 ^ (bc3<<1 | bc3>>63) - d3 = bc2 ^ (bc4<<1 | bc4>>63) - d4 = bc3 ^ (bc0<<1 | bc0>>63) - - bc0 = a[0] ^ d0 - t = a[11] ^ d1 - bc1 = bits.RotateLeft64(t, 44) - t = a[22] ^ d2 - bc2 = bits.RotateLeft64(t, 43) - t = a[8] ^ d3 - bc3 = bits.RotateLeft64(t, 21) - t = a[19] ^ d4 - bc4 = bits.RotateLeft64(t, 14) - a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+2] - a[11] = bc1 ^ (bc3 &^ bc2) - a[22] = bc2 ^ (bc4 &^ bc3) - a[8] = bc3 ^ (bc0 &^ bc4) - a[19] = bc4 ^ (bc1 &^ bc0) - - t = a[15] ^ d0 - bc2 = bits.RotateLeft64(t, 3) - t = a[1] ^ d1 - bc3 = bits.RotateLeft64(t, 45) - t = a[12] ^ d2 - bc4 = bits.RotateLeft64(t, 61) - t = a[23] ^ d3 - bc0 = bits.RotateLeft64(t, 28) - t = a[9] ^ d4 - bc1 = bits.RotateLeft64(t, 20) - a[15] = bc0 ^ (bc2 &^ bc1) - a[1] = bc1 ^ (bc3 &^ bc2) - a[12] = bc2 ^ (bc4 &^ bc3) - a[23] = bc3 ^ (bc0 &^ bc4) - a[9] = bc4 ^ (bc1 &^ bc0) - - t = a[5] ^ d0 - bc4 = bits.RotateLeft64(t, 18) - t = a[16] ^ d1 - bc0 = bits.RotateLeft64(t, 1) - t = a[2] ^ d2 - bc1 = bits.RotateLeft64(t, 6) - t = a[13] ^ d3 - bc2 = bits.RotateLeft64(t, 25) - t = a[24] ^ d4 - bc3 = bits.RotateLeft64(t, 8) - a[5] = bc0 ^ (bc2 &^ bc1) - a[16] = bc1 ^ (bc3 &^ bc2) - a[2] = bc2 ^ (bc4 &^ bc3) - a[13] = bc3 ^ (bc0 &^ bc4) - a[24] = bc4 ^ (bc1 &^ bc0) - - t = a[20] ^ d0 - bc1 = bits.RotateLeft64(t, 36) - t = a[6] ^ d1 - bc2 = bits.RotateLeft64(t, 10) - t = a[17] ^ d2 - bc3 = bits.RotateLeft64(t, 15) - t = a[3] ^ d3 - bc4 = bits.RotateLeft64(t, 56) - t = a[14] ^ d4 - bc0 = bits.RotateLeft64(t, 27) - a[20] = bc0 ^ (bc2 &^ bc1) - a[6] = bc1 ^ (bc3 &^ bc2) - a[17] = bc2 ^ (bc4 &^ bc3) - a[3] = bc3 ^ (bc0 &^ bc4) - a[14] = bc4 ^ (bc1 &^ bc0) - - t = a[10] ^ d0 - bc3 = bits.RotateLeft64(t, 41) - t = a[21] ^ d1 - bc4 = bits.RotateLeft64(t, 2) - t = a[7] ^ d2 - bc0 = bits.RotateLeft64(t, 62) - t = a[18] ^ d3 - bc1 = bits.RotateLeft64(t, 55) - t = a[4] ^ d4 - bc2 = bits.RotateLeft64(t, 39) - a[10] = bc0 ^ (bc2 &^ bc1) - a[21] = bc1 ^ (bc3 &^ bc2) - a[7] = bc2 ^ (bc4 &^ bc3) - a[18] = bc3 ^ (bc0 &^ bc4) - a[4] = bc4 ^ (bc1 &^ bc0) - - // Round 4 - bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] - bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] - bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] - bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] - bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] - d0 = bc4 ^ (bc1<<1 | bc1>>63) - d1 = bc0 ^ (bc2<<1 | bc2>>63) - d2 = bc1 ^ (bc3<<1 | bc3>>63) - d3 = bc2 ^ (bc4<<1 | bc4>>63) - d4 = bc3 ^ (bc0<<1 | bc0>>63) - - bc0 = a[0] ^ d0 - t = a[1] ^ d1 - bc1 = bits.RotateLeft64(t, 44) - t = a[2] ^ d2 - bc2 = bits.RotateLeft64(t, 43) - t = a[3] ^ d3 - bc3 = bits.RotateLeft64(t, 21) - t = a[4] ^ d4 - bc4 = bits.RotateLeft64(t, 14) - a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+3] - a[1] = bc1 ^ (bc3 &^ bc2) - a[2] = bc2 ^ (bc4 &^ bc3) - a[3] = bc3 ^ (bc0 &^ bc4) - a[4] = bc4 ^ (bc1 &^ bc0) - - t = a[5] ^ d0 - bc2 = bits.RotateLeft64(t, 3) - t = a[6] ^ d1 - bc3 = bits.RotateLeft64(t, 45) - t = a[7] ^ d2 - bc4 = bits.RotateLeft64(t, 61) - t = a[8] ^ d3 - bc0 = bits.RotateLeft64(t, 28) - t = a[9] ^ d4 - bc1 = bits.RotateLeft64(t, 20) - a[5] = bc0 ^ (bc2 &^ bc1) - a[6] = bc1 ^ (bc3 &^ bc2) - a[7] = bc2 ^ (bc4 &^ bc3) - a[8] = bc3 ^ (bc0 &^ bc4) - a[9] = bc4 ^ (bc1 &^ bc0) - - t = a[10] ^ d0 - bc4 = bits.RotateLeft64(t, 18) - t = a[11] ^ d1 - bc0 = bits.RotateLeft64(t, 1) - t = a[12] ^ d2 - bc1 = bits.RotateLeft64(t, 6) - t = a[13] ^ d3 - bc2 = bits.RotateLeft64(t, 25) - t = a[14] ^ d4 - bc3 = bits.RotateLeft64(t, 8) - a[10] = bc0 ^ (bc2 &^ bc1) - a[11] = bc1 ^ (bc3 &^ bc2) - a[12] = bc2 ^ (bc4 &^ bc3) - a[13] = bc3 ^ (bc0 &^ bc4) - a[14] = bc4 ^ (bc1 &^ bc0) - - t = a[15] ^ d0 - bc1 = bits.RotateLeft64(t, 36) - t = a[16] ^ d1 - bc2 = bits.RotateLeft64(t, 10) - t = a[17] ^ d2 - bc3 = bits.RotateLeft64(t, 15) - t = a[18] ^ d3 - bc4 = bits.RotateLeft64(t, 56) - t = a[19] ^ d4 - bc0 = bits.RotateLeft64(t, 27) - a[15] = bc0 ^ (bc2 &^ bc1) - a[16] = bc1 ^ (bc3 &^ bc2) - a[17] = bc2 ^ (bc4 &^ bc3) - a[18] = bc3 ^ (bc0 &^ bc4) - a[19] = bc4 ^ (bc1 &^ bc0) - - t = a[20] ^ d0 - bc3 = bits.RotateLeft64(t, 41) - t = a[21] ^ d1 - bc4 = bits.RotateLeft64(t, 2) - t = a[22] ^ d2 - bc0 = bits.RotateLeft64(t, 62) - t = a[23] ^ d3 - bc1 = bits.RotateLeft64(t, 55) - t = a[24] ^ d4 - bc2 = bits.RotateLeft64(t, 39) - a[20] = bc0 ^ (bc2 &^ bc1) - a[21] = bc1 ^ (bc3 &^ bc2) - a[22] = bc2 ^ (bc4 &^ bc3) - a[23] = bc3 ^ (bc0 &^ bc4) - a[24] = bc4 ^ (bc1 &^ bc0) - } -} diff --git a/crypto/keccak/keccakf_amd64.go b/crypto/keccak/keccakf_amd64.go index cb6eca44c3..c34e3acd65 100644 --- a/crypto/keccak/keccakf_amd64.go +++ b/crypto/keccak/keccakf_amd64.go @@ -1,13 +1,19 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build amd64 && !purego && gc +//go:build amd64 && !purego package keccak -// This function is implemented in keccakf_amd64.s. +import "unsafe" //go:noescape +func keccakF1600(a *[200]byte) -func keccakF1600(a *[25]uint64) +// Sum256 computes the Keccak-256 hash of data. Zero heap allocations. +func Sum256(data []byte) [32]byte { return sum256Sponge(data) } + +// Hasher is a streaming Keccak-256 hasher. Designed for stack allocation. +type Hasher struct{ sponge } + +func xorAndPermute(state *[200]byte, buf *byte) { + xorIn(state, unsafe.Slice(buf, rate)) + keccakF1600(state) +} \ No newline at end of file diff --git a/crypto/keccak/keccakf_amd64.s b/crypto/keccak/keccakf_amd64.s index 99e2f16e97..85df986ef7 100644 --- a/crypto/keccak/keccakf_amd64.s +++ b/crypto/keccak/keccakf_amd64.s @@ -1,8 +1,8 @@ -// Code generated by command: go run keccakf_amd64_asm.go -out ../keccakf_amd64.s -pkg sha3. DO NOT EDIT. +// Code generated by command: go run keccakf_amd64_asm.go -out ../sha3_amd64.s. DO NOT EDIT. -//go:build amd64 && !purego && gc +//go:build !purego -// func keccakF1600(a *[25]uint64) +// func keccakF1600(a *[200]byte) TEXT ·keccakF1600(SB), $200-8 MOVQ a+0(FP), DI @@ -5417,3 +5417,4 @@ TEXT ·keccakF1600(SB), $200-8 NOTQ 136(DI) NOTQ 160(DI) RET + \ No newline at end of file diff --git a/crypto/keccak/sha3.go b/crypto/keccak/sha3.go deleted file mode 100644 index a554323244..0000000000 --- a/crypto/keccak/sha3.go +++ /dev/null @@ -1,244 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package keccak - -import ( - "crypto/subtle" - "encoding/binary" - "errors" - "unsafe" - - "golang.org/x/sys/cpu" -) - -// spongeDirection indicates the direction bytes are flowing through the sponge. -type spongeDirection int - -const ( - // spongeAbsorbing indicates that the sponge is absorbing input. - spongeAbsorbing spongeDirection = iota - // spongeSqueezing indicates that the sponge is being squeezed. - spongeSqueezing -) - -type state struct { - a [1600 / 8]byte // main state of the hash - - // a[n:rate] is the buffer. If absorbing, it's the remaining space to XOR - // into before running the permutation. If squeezing, it's the remaining - // output to produce before running the permutation. - n, rate int - - // dsbyte contains the "domain separation" bits and the first bit of - // the padding. Sections 6.1 and 6.2 of [1] separate the outputs of the - // SHA-3 and SHAKE functions by appending bitstrings to the message. - // Using a little-endian bit-ordering convention, these are "01" for SHA-3 - // and "1111" for SHAKE, or 00000010b and 00001111b, respectively. Then the - // padding rule from section 5.1 is applied to pad the message to a multiple - // of the rate, which involves adding a "1" bit, zero or more "0" bits, and - // a final "1" bit. We merge the first "1" bit from the padding into dsbyte, - // giving 00000110b (0x06) and 00011111b (0x1f). - // [1] http://csrc.nist.gov/publications/drafts/fips-202/fips_202_draft.pdf - // "Draft FIPS 202: SHA-3 Standard: Permutation-Based Hash and - // Extendable-Output Functions (May 2014)" - dsbyte byte - - outputLen int // the default output size in bytes - state spongeDirection // whether the sponge is absorbing or squeezing -} - -// BlockSize returns the rate of sponge underlying this hash function. -func (d *state) BlockSize() int { return d.rate } - -// Size returns the output size of the hash function in bytes. -func (d *state) Size() int { return d.outputLen } - -// Reset clears the internal state by zeroing the sponge state and -// the buffer indexes, and setting Sponge.state to absorbing. -func (d *state) Reset() { - // Zero the permutation's state. - for i := range d.a { - d.a[i] = 0 - } - d.state = spongeAbsorbing - d.n = 0 -} - -func (d *state) clone() *state { - ret := *d - return &ret -} - -// permute applies the KeccakF-1600 permutation. -func (d *state) permute() { - var a *[25]uint64 - if cpu.IsBigEndian { - a = new([25]uint64) - for i := range a { - a[i] = binary.LittleEndian.Uint64(d.a[i*8:]) - } - } else { - a = (*[25]uint64)(unsafe.Pointer(&d.a)) - } - - keccakF1600(a) - d.n = 0 - - if cpu.IsBigEndian { - for i := range a { - binary.LittleEndian.PutUint64(d.a[i*8:], a[i]) - } - } -} - -// pads appends the domain separation bits in dsbyte, applies -// the multi-bitrate 10..1 padding rule, and permutes the state. -func (d *state) padAndPermute() { - // Pad with this instance's domain-separator bits. We know that there's - // at least one byte of space in the sponge because, if it were full, - // permute would have been called to empty it. dsbyte also contains the - // first one bit for the padding. See the comment in the state struct. - d.a[d.n] ^= d.dsbyte - // This adds the final one bit for the padding. Because of the way that - // bits are numbered from the LSB upwards, the final bit is the MSB of - // the last byte. - d.a[d.rate-1] ^= 0x80 - // Apply the permutation - d.permute() - d.state = spongeSqueezing -} - -// Write absorbs more data into the hash's state. It panics if any -// output has already been read. -func (d *state) Write(p []byte) (n int, err error) { - if d.state != spongeAbsorbing { - panic("sha3: Write after Read") - } - - n = len(p) - - for len(p) > 0 { - x := subtle.XORBytes(d.a[d.n:d.rate], d.a[d.n:d.rate], p) - d.n += x - p = p[x:] - - // If the sponge is full, apply the permutation. - if d.n == d.rate { - d.permute() - } - } - - return -} - -// Read squeezes an arbitrary number of bytes from the sponge. -func (d *state) Read(out []byte) (n int, err error) { - // If we're still absorbing, pad and apply the permutation. - if d.state == spongeAbsorbing { - d.padAndPermute() - } - - n = len(out) - - // Now, do the squeezing. - for len(out) > 0 { - // Apply the permutation if we've squeezed the sponge dry. - if d.n == d.rate { - d.permute() - } - - x := copy(out, d.a[d.n:d.rate]) - d.n += x - out = out[x:] - } - - return -} - -// Sum applies padding to the hash state and then squeezes out the desired -// number of output bytes. It panics if any output has already been read. -func (d *state) Sum(in []byte) []byte { - if d.state != spongeAbsorbing { - panic("sha3: Sum after Read") - } - - // Make a copy of the original hash so that caller can keep writing - // and summing. - dup := d.clone() - hash := make([]byte, dup.outputLen, 64) // explicit cap to allow stack allocation - dup.Read(hash) - return append(in, hash...) -} - -const ( - magicSHA3 = "sha\x08" - magicShake = "sha\x09" - magicCShake = "sha\x0a" - magicKeccak = "sha\x0b" - // magic || rate || main state || n || sponge direction - marshaledSize = len(magicSHA3) + 1 + 200 + 1 + 1 -) - -func (d *state) MarshalBinary() ([]byte, error) { - return d.AppendBinary(make([]byte, 0, marshaledSize)) -} - -func (d *state) AppendBinary(b []byte) ([]byte, error) { - switch d.dsbyte { - case dsbyteSHA3: - b = append(b, magicSHA3...) - case dsbyteShake: - b = append(b, magicShake...) - case dsbyteCShake: - b = append(b, magicCShake...) - case dsbyteKeccak: - b = append(b, magicKeccak...) - default: - panic("unknown dsbyte") - } - // rate is at most 168, and n is at most rate. - b = append(b, byte(d.rate)) - b = append(b, d.a[:]...) - b = append(b, byte(d.n), byte(d.state)) - return b, nil -} - -func (d *state) UnmarshalBinary(b []byte) error { - if len(b) != marshaledSize { - return errors.New("sha3: invalid hash state") - } - - magic := string(b[:len(magicSHA3)]) - b = b[len(magicSHA3):] - switch { - case magic == magicSHA3 && d.dsbyte == dsbyteSHA3: - case magic == magicShake && d.dsbyte == dsbyteShake: - case magic == magicCShake && d.dsbyte == dsbyteCShake: - case magic == magicKeccak && d.dsbyte == dsbyteKeccak: - default: - return errors.New("sha3: invalid hash state identifier") - } - - rate := int(b[0]) - b = b[1:] - if rate != d.rate { - return errors.New("sha3: invalid hash state function") - } - - copy(d.a[:], b) - b = b[len(d.a):] - - n, state := int(b[0]), spongeDirection(b[1]) - if n > d.rate { - return errors.New("sha3: invalid hash state") - } - d.n = n - if state != spongeAbsorbing && state != spongeSqueezing { - return errors.New("sha3: invalid hash state") - } - d.state = state - - return nil -} diff --git a/crypto/keccak/sha3_test.go b/crypto/keccak/sha3_test.go deleted file mode 100644 index 28a20ec72d..0000000000 --- a/crypto/keccak/sha3_test.go +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package keccak - -// Tests include all the ShortMsgKATs provided by the Keccak team at -// https://github.com/gvanas/KeccakCodePackage -// -// They only include the zero-bit case of the bitwise testvectors -// published by NIST in the draft of FIPS-202. - -import ( - "bytes" - "compress/flate" - "encoding" - "encoding/hex" - "encoding/json" - "hash" - "math/rand" - "os" - "strings" - "testing" -) - -const ( - testString = "brekeccakkeccak koax koax" - katFilename = "testdata/keccakKats.json.deflate" -) - -// testDigests contains functions returning hash.Hash instances -// with output-length equal to the KAT length for SHA-3, Keccak -// and SHAKE instances. -var testDigests = map[string]func() hash.Hash{ - "Keccak-256": NewLegacyKeccak256, - "Keccak-512": NewLegacyKeccak512, -} - -// decodeHex converts a hex-encoded string into a raw byte string. -func decodeHex(s string) []byte { - b, err := hex.DecodeString(s) - if err != nil { - panic(err) - } - return b -} - -// structs used to marshal JSON test-cases. -type KeccakKats struct { - Kats map[string][]struct { - Digest string `json:"digest"` - Length int64 `json:"length"` - Message string `json:"message"` - - // Defined only for cSHAKE - N string `json:"N"` - S string `json:"S"` - } -} - -// TestKeccakKats tests the SHA-3 and Shake implementations against all the -// ShortMsgKATs from https://github.com/gvanas/KeccakCodePackage -// (The testvectors are stored in keccakKats.json.deflate due to their length.) -func TestKeccakKats(t *testing.T) { - // Read the KATs. - deflated, err := os.Open(katFilename) - if err != nil { - t.Errorf("error opening %s: %s", katFilename, err) - } - file := flate.NewReader(deflated) - dec := json.NewDecoder(file) - var katSet KeccakKats - err = dec.Decode(&katSet) - if err != nil { - t.Errorf("error decoding KATs: %s", err) - } - - for algo, function := range testDigests { - d := function() - for _, kat := range katSet.Kats[algo] { - d.Reset() - in, err := hex.DecodeString(kat.Message) - if err != nil { - t.Errorf("error decoding KAT: %s", err) - } - d.Write(in[:kat.Length/8]) - got := strings.ToUpper(hex.EncodeToString(d.Sum(nil))) - if got != kat.Digest { - t.Errorf("function=%s, length=%d\nmessage:\n %s\ngot:\n %s\nwanted:\n %s", - algo, kat.Length, kat.Message, got, kat.Digest) - t.Logf("wanted %+v", kat) - t.FailNow() - } - continue - } - } -} - -// TestKeccak does a basic test of the non-standardized Keccak hash functions. -func TestKeccak(t *testing.T) { - tests := []struct { - fn func() hash.Hash - data []byte - want string - }{ - { - NewLegacyKeccak256, - []byte("abc"), - "4e03657aea45a94fc7d47ba826c8d667c0d1e6e33a64a036ec44f58fa12d6c45", - }, - { - NewLegacyKeccak512, - []byte("abc"), - "18587dc2ea106b9a1563e32b3312421ca164c7f1f07bc922a9c83d77cea3a1e5d0c69910739025372dc14ac9642629379540c17e2a65b19d77aa511a9d00bb96", - }, - } - - for _, u := range tests { - h := u.fn() - h.Write(u.data) - got := h.Sum(nil) - want := decodeHex(u.want) - if !bytes.Equal(got, want) { - t.Errorf("unexpected hash for size %d: got '%x' want '%s'", h.Size()*8, got, u.want) - } - } -} - -// TestUnalignedWrite tests that writing data in an arbitrary pattern with -// small input buffers. -func TestUnalignedWrite(t *testing.T) { - buf := sequentialBytes(0x10000) - for alg, df := range testDigests { - d := df() - d.Reset() - d.Write(buf) - want := d.Sum(nil) - d.Reset() - for i := 0; i < len(buf); { - // Cycle through offsets which make a 137 byte sequence. - // Because 137 is prime this sequence should exercise all corner cases. - offsets := [17]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1} - for _, j := range offsets { - if v := len(buf) - i; v < j { - j = v - } - d.Write(buf[i : i+j]) - i += j - } - } - got := d.Sum(nil) - if !bytes.Equal(got, want) { - t.Errorf("Unaligned writes, alg=%s\ngot %q, want %q", alg, got, want) - } - } -} - -// sequentialBytes produces a buffer of size consecutive bytes 0x00, 0x01, ..., used for testing. -// -// The alignment of each slice is intentionally randomized to detect alignment -// issues in the implementation. See https://golang.org/issue/37644. -// Ideally, the compiler should fuzz the alignment itself. -// (See https://golang.org/issue/35128.) -func sequentialBytes(size int) []byte { - alignmentOffset := rand.Intn(8) - result := make([]byte, size+alignmentOffset)[alignmentOffset:] - for i := range result { - result[i] = byte(i) - } - return result -} - -func TestMarshalUnmarshal(t *testing.T) { - t.Run("Keccak-256", func(t *testing.T) { testMarshalUnmarshal(t, NewLegacyKeccak256()) }) - t.Run("Keccak-512", func(t *testing.T) { testMarshalUnmarshal(t, NewLegacyKeccak512()) }) -} - -// TODO(filippo): move this to crypto/internal/cryptotest. -func testMarshalUnmarshal(t *testing.T, h hash.Hash) { - buf := make([]byte, 200) - rand.Read(buf) - n := rand.Intn(200) - h.Write(buf) - want := h.Sum(nil) - h.Reset() - h.Write(buf[:n]) - b, err := h.(encoding.BinaryMarshaler).MarshalBinary() - if err != nil { - t.Errorf("MarshalBinary: %v", err) - } - h.Write(bytes.Repeat([]byte{0}, 200)) - if err := h.(encoding.BinaryUnmarshaler).UnmarshalBinary(b); err != nil { - t.Errorf("UnmarshalBinary: %v", err) - } - h.Write(buf[n:]) - got := h.Sum(nil) - if !bytes.Equal(got, want) { - t.Errorf("got %x, want %x", got, want) - } -} - -// BenchmarkPermutationFunction measures the speed of the permutation function -// with no input data. -func BenchmarkPermutationFunction(b *testing.B) { - b.SetBytes(int64(200)) - var lanes [25]uint64 - for i := 0; i < b.N; i++ { - keccakF1600(&lanes) - } -} diff --git a/crypto/keccak/testdata/keccakKats.json.deflate b/crypto/keccak/testdata/keccakKats.json.deflate deleted file mode 100644 index 7a94c2f8bc..0000000000 Binary files a/crypto/keccak/testdata/keccakKats.json.deflate and /dev/null differ diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index b11ad4e78a..4fdc681b00 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -55,7 +55,7 @@ func TestHashing(t *testing.T) { } var want, got string var old = func() { - hasher := keccak.NewLegacyKeccak256() + hasher := keccak.NewFastKeccak() for i := 0; i < len(bytecodes); i++ { hasher.Reset() hasher.Write(bytecodes[i]) @@ -88,7 +88,7 @@ func BenchmarkHashing(b *testing.B) { bytecodes[i] = buf } var old = func() { - hasher := keccak.NewLegacyKeccak256() + hasher := keccak.NewFastKeccak() for i := 0; i < len(bytecodes); i++ { hasher.Reset() hasher.Write(bytecodes[i]) diff --git a/internal/blocktest/test_hash.go b/internal/blocktest/test_hash.go index 5e5b1202de..e3f04f35fe 100644 --- a/internal/blocktest/test_hash.go +++ b/internal/blocktest/test_hash.go @@ -39,7 +39,7 @@ type testHasher struct { // NewHasher returns a new testHasher instance. func NewHasher() *testHasher { - return &testHasher{hasher: keccak.NewLegacyKeccak256()} + return &testHasher{hasher: keccak.NewFastKeccak()} } // Reset resets the hash state. diff --git a/p2p/dnsdisc/tree.go b/p2p/dnsdisc/tree.go index a629d6291f..70e32b841d 100644 --- a/p2p/dnsdisc/tree.go +++ b/p2p/dnsdisc/tree.go @@ -262,7 +262,7 @@ const ( ) func subdomain(e entry) string { - h := keccak.NewLegacyKeccak256() + h := keccak.NewFastKeccak() io.WriteString(h, e.String()) return b32format.EncodeToString(h.Sum(nil)[:16]) } @@ -272,7 +272,7 @@ func (e *rootEntry) String() string { } func (e *rootEntry) sigHash() []byte { - h := keccak.NewLegacyKeccak256() + h := keccak.NewFastKeccak() fmt.Fprintf(h, rootPrefix+" e=%s l=%s seq=%d", e.eroot, e.lroot, e.seq) return h.Sum(nil) } diff --git a/p2p/enode/idscheme.go b/p2p/enode/idscheme.go index 313815c465..5e33f7d1ac 100644 --- a/p2p/enode/idscheme.go +++ b/p2p/enode/idscheme.go @@ -49,7 +49,7 @@ func SignV4(r *enr.Record, privkey *ecdsa.PrivateKey) error { cpy.Set(enr.ID("v4")) cpy.Set(Secp256k1(privkey.PublicKey)) - h := keccak.NewLegacyKeccak256() + h := keccak.NewFastKeccak() rlp.Encode(h, cpy.AppendElements(nil)) sig, err := crypto.Sign(h.Sum(nil), privkey) if err != nil { @@ -70,7 +70,7 @@ func (V4ID) Verify(r *enr.Record, sig []byte) error { return errors.New("invalid public key") } - h := keccak.NewLegacyKeccak256() + h := keccak.NewFastKeccak() rlp.Encode(h, r.AppendElements(nil)) if !crypto.VerifySignature(entry, h.Sum(nil), sig) { return enr.ErrInvalidSig diff --git a/p2p/rlpx/rlpx.go b/p2p/rlpx/rlpx.go index 40a5c38fcb..8da40cdcbe 100644 --- a/p2p/rlpx/rlpx.go +++ b/p2p/rlpx/rlpx.go @@ -486,10 +486,10 @@ func (h *handshakeState) secrets(auth, authResp []byte) (Secrets, error) { } // setup sha3 instances for the MACs - mac1 := keccak.NewLegacyKeccak256() + mac1 := keccak.NewFastKeccak() mac1.Write(xor(s.MAC, h.respNonce)) mac1.Write(auth) - mac2 := keccak.NewLegacyKeccak256() + mac2 := keccak.NewFastKeccak() mac2.Write(xor(s.MAC, h.initNonce)) mac2.Write(authResp) if h.initiator { diff --git a/tests/state_test_util.go b/tests/state_test_util.go index 7525081f84..ea89a23f29 100644 --- a/tests/state_test_util.go +++ b/tests/state_test_util.go @@ -496,7 +496,7 @@ func (tx *stTransaction) toMessage(ps stPostState, baseFee *big.Int) (*core.Mess } func rlpHash(x interface{}) (h common.Hash) { - hw := keccak.NewLegacyKeccak256() + hw := keccak.NewFastKeccak() rlp.Encode(hw, x) hw.Sum(h[:0]) return h diff --git a/trie/hasher.go b/trie/hasher.go index a2a1f5b662..9f84cd960d 100644 --- a/trie/hasher.go +++ b/trie/hasher.go @@ -22,13 +22,14 @@ import ( "sync" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/crypto/keccak" "github.com/ethereum/go-ethereum/rlp" ) // hasher is a type used for the trie Hash operation. A hasher has some // internal preallocated temp space type hasher struct { - sha crypto.KeccakState + sha keccak.KeccakState tmp []byte encbuf rlp.EncoderBuffer parallel bool // Whether to use parallel threads when hashing diff --git a/trie/trie_test.go b/trie/trie_test.go index 3423cde59c..a18b351d13 100644 --- a/trie/trie_test.go +++ b/trie/trie_test.go @@ -968,7 +968,7 @@ func TestCommitSequenceStackTrie(t *testing.T) { prng := rand.New(rand.NewSource(int64(count))) // This spongeDb is used to check the sequence of disk-db-writes s := &spongeDb{ - sponge: keccak.NewLegacyKeccak256(), + sponge: keccak.NewFastKeccak(), id: "a", values: make(map[string]string), } @@ -977,7 +977,7 @@ func TestCommitSequenceStackTrie(t *testing.T) { // Another sponge is used for the stacktrie commits stackTrieSponge := &spongeDb{ - sponge: keccak.NewLegacyKeccak256(), + sponge: keccak.NewFastKeccak(), id: "b", values: make(map[string]string), } @@ -1040,7 +1040,7 @@ func TestCommitSequenceStackTrie(t *testing.T) { // not fit into 32 bytes, rlp-encoded. However, it's still the correct thing to do. func TestCommitSequenceSmallRoot(t *testing.T) { s := &spongeDb{ - sponge: keccak.NewLegacyKeccak256(), + sponge: keccak.NewFastKeccak(), id: "a", values: make(map[string]string), } @@ -1049,7 +1049,7 @@ func TestCommitSequenceSmallRoot(t *testing.T) { // Another sponge is used for the stacktrie commits stackTrieSponge := &spongeDb{ - sponge: keccak.NewLegacyKeccak256(), + sponge: keccak.NewFastKeccak(), id: "b", values: make(map[string]string), }