This commit is contained in:
Sahil Sojitra 2026-05-21 21:55:10 -07:00 committed by GitHub
commit d8a8e7d0a1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 5762 additions and 6368 deletions

View file

@ -37,7 +37,6 @@ import (
"github.com/ethereum/go-ethereum/core/types/bal"
"github.com/ethereum/go-ethereum/core/vm"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/crypto/keccak"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/params"
@ -627,9 +626,11 @@ func (c *Clique) Close() error {
// SealHash returns the hash of a block prior to it being sealed.
func SealHash(header *types.Header) (hash common.Hash) {
hasher := keccak.NewLegacyKeccak256()
hasher := crypto.NewKeccakState()
defer crypto.ReturnToPool(hasher)
encodeSigHeader(hasher, header)
hasher.(crypto.KeccakState).Read(hash[:])
hasher.Sum(hash[:0])
return hash
}

View file

@ -23,7 +23,7 @@ import (
"github.com/ethereum/go-ethereum/common/bitutil"
"github.com/ethereum/go-ethereum/common/hexutil"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/crypto/keccak"
)
type bytesBacked interface {
@ -141,7 +141,7 @@ func Bloom9(data []byte) []byte {
// bloomValues returns the bytes (index-value pairs) to set for the given data
func bloomValues(data []byte, hashbuf *[6]byte) (uint, byte, uint, byte, uint, byte) {
sha := hasherPool.Get().(crypto.KeccakState)
sha := hasherPool.Get().(keccak.KeccakState)
sha.Reset()
sha.Write(data)
sha.Read(hashbuf[:])

View file

@ -24,6 +24,7 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/crypto/keccak"
"github.com/ethereum/go-ethereum/rlp"
)
@ -55,7 +56,7 @@ func getPooledBuffer(size uint64) ([]byte, *bytes.Buffer, error) {
// rlpHash encodes x and hashes the encoded bytes.
func rlpHash(x interface{}) (h common.Hash) {
sha := hasherPool.Get().(crypto.KeccakState)
sha := hasherPool.Get().(keccak.KeccakState)
defer hasherPool.Put(sha)
sha.Reset()
rlp.Encode(sha, x)
@ -66,7 +67,7 @@ func rlpHash(x interface{}) (h common.Hash) {
// prefixedRlpHash writes the prefix into the hasher before rlp-encoding x.
// It's used for typed transactions.
func prefixedRlpHash(prefix byte, x interface{}) (h common.Hash) {
sha := hasherPool.Get().(crypto.KeccakState)
sha := hasherPool.Get().(keccak.KeccakState)
defer hasherPool.Put(sha)
sha.Reset()
sha.Write([]byte{prefix})

View file

@ -24,13 +24,13 @@ import (
"encoding/hex"
"errors"
"fmt"
"hash"
"io"
"math/big"
"os"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/math"
"github.com/ethereum/go-ethereum/crypto/keccak"
"github.com/ethereum/go-ethereum/rlp"
)
@ -59,16 +59,8 @@ type EllipticCurve interface {
Unmarshal(data []byte) (x, y *big.Int)
}
// KeccakState wraps sha3.state. In addition to the usual hash methods, it also supports
// Read to get a variable amount of data from the hash state. Read is faster than Sum
// because it doesn't copy the internal state, but also modifies the internal state.
type KeccakState interface {
hash.Hash
Read([]byte) (int, error)
}
// HashData hashes the provided data using the KeccakState and returns a 32 byte hash
func HashData(kh KeccakState, data []byte) (h common.Hash) {
func HashData(kh keccak.KeccakState, data []byte) (h common.Hash) {
kh.Reset()
kh.Write(data)
kh.Read(h[:])

View file

@ -26,38 +26,40 @@ import (
)
// NewKeccakState creates a new KeccakState
func NewKeccakState() KeccakState {
return keccak.NewLegacyKeccak256().(KeccakState)
func NewKeccakState() keccak.KeccakState {
h := hasherPool.Get().(keccak.KeccakState)
h.Reset()
return h
}
func ReturnToPool(h keccak.KeccakState) { hasherPool.Put(h) }
var hasherPool = sync.Pool{
New: func() any {
return keccak.NewLegacyKeccak256().(KeccakState)
return keccak.NewLegacyKeccak256()
},
}
// Keccak256 calculates and returns the Keccak256 hash of the input data.
func Keccak256(data ...[]byte) []byte {
b := make([]byte, 32)
d := hasherPool.Get().(KeccakState)
d.Reset()
d := NewKeccakState()
for _, b := range data {
d.Write(b)
}
d.Read(b)
hasherPool.Put(d)
ReturnToPool(d)
return b
}
// Keccak256Hash calculates and returns the Keccak256 hash of the input data,
// converting it to an internal Hash data structure.
func Keccak256Hash(data ...[]byte) (h common.Hash) {
d := hasherPool.Get().(KeccakState)
d.Reset()
d := NewKeccakState()
for _, b := range data {
d.Write(b)
}
d.Read(h[:])
hasherPool.Put(d)
d.Read(h[:]) //nolint:errcheck
ReturnToPool(d)
return h
}

View file

@ -1,44 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package keccak
// This file provides functions for creating instances of the SHA-3
// and SHAKE hash functions, as well as utility functions for hashing
// bytes.
import (
"hash"
)
const (
dsbyteSHA3 = 0b00000110
dsbyteKeccak = 0b00000001
dsbyteShake = 0b00011111
dsbyteCShake = 0b00000100
// rateK[c] is the rate in bytes for Keccak[c] where c is the capacity in
// bits. Given the sponge size is 1600 bits, the rate is 1600 - c bits.
rateK256 = (1600 - 256) / 8
rateK448 = (1600 - 448) / 8
rateK512 = (1600 - 512) / 8
rateK768 = (1600 - 768) / 8
rateK1024 = (1600 - 1024) / 8
)
// NewLegacyKeccak256 creates a new Keccak-256 hash.
//
// Only use this function if you require compatibility with an existing cryptosystem
// that uses non-standard padding. All other users should use New256 instead.
func NewLegacyKeccak256() hash.Hash {
return &state{rate: rateK512, outputLen: 32, dsbyte: dsbyteKeccak}
}
// NewLegacyKeccak512 creates a new Keccak-512 hash.
//
// Only use this function if you require compatibility with an existing cryptosystem
// that uses non-standard padding. All other users should use New512 instead.
func NewLegacyKeccak512() hash.Hash {
return &state{rate: rateK1024, outputLen: 64, dsbyte: dsbyteKeccak}
}

View file

@ -0,0 +1,226 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !purego
#include "textflag.h"
// func keccakF1600Sha3(a *[200]byte, buf *byte)
// When buf != nil, XORs rate bytes into state before permuting.
// When buf == nil, just permutes.
TEXT ·keccakF1600Sha3(SB), $200-16
MOVD a+0(FP), R0
MOVD buf+8(FP), R3
MOVD $round_consts<>(SB), R1
MOVD $24, R2 // counter for loop
CBZ R3, load_state
// XOR path: load state and XOR with buf (17 lanes = 136 bytes)
VLD1.P 16(R0), [V0.D1, V1.D1]
VLD1.P 16(R3), [V25.D1, V26.D1]
VEOR V25.B16, V0.B16, V0.B16
VEOR V26.B16, V1.B16, V1.B16
VLD1.P 16(R0), [V2.D1, V3.D1]
VLD1.P 16(R3), [V25.D1, V26.D1]
VEOR V25.B16, V2.B16, V2.B16
VEOR V26.B16, V3.B16, V3.B16
VLD1.P 16(R0), [V4.D1, V5.D1]
VLD1.P 16(R3), [V25.D1, V26.D1]
VEOR V25.B16, V4.B16, V4.B16
VEOR V26.B16, V5.B16, V5.B16
VLD1.P 16(R0), [V6.D1, V7.D1]
VLD1.P 16(R3), [V25.D1, V26.D1]
VEOR V25.B16, V6.B16, V6.B16
VEOR V26.B16, V7.B16, V7.B16
VLD1.P 16(R0), [V8.D1, V9.D1]
VLD1.P 16(R3), [V25.D1, V26.D1]
VEOR V25.B16, V8.B16, V8.B16
VEOR V26.B16, V9.B16, V9.B16
VLD1.P 16(R0), [V10.D1, V11.D1]
VLD1.P 16(R3), [V25.D1, V26.D1]
VEOR V25.B16, V10.B16, V10.B16
VEOR V26.B16, V11.B16, V11.B16
VLD1.P 16(R0), [V12.D1, V13.D1]
VLD1.P 16(R3), [V25.D1, V26.D1]
VEOR V25.B16, V12.B16, V12.B16
VEOR V26.B16, V13.B16, V13.B16
VLD1.P 16(R0), [V14.D1, V15.D1]
VLD1.P 16(R3), [V25.D1, V26.D1]
VEOR V25.B16, V14.B16, V14.B16
VEOR V26.B16, V15.B16, V15.B16
// Lane 16: last data lane (8 bytes at buf offset 128)
VLD1.P 16(R0), [V16.D1, V17.D1]
VLD1 (R3), [V25.D1]
VEOR V25.B16, V16.B16, V16.B16
// Remaining state lanes 18-24 (no data to XOR)
VLD1.P 16(R0), [V18.D1, V19.D1]
VLD1.P 16(R0), [V20.D1, V21.D1]
VLD1.P 16(R0), [V22.D1, V23.D1]
VLD1 (R0), [V24.D1]
SUB $192, R0, R0
B rounds
load_state:
VLD1.P 16(R0), [V0.D1, V1.D1]
VLD1.P 16(R0), [V2.D1, V3.D1]
VLD1.P 16(R0), [V4.D1, V5.D1]
VLD1.P 16(R0), [V6.D1, V7.D1]
VLD1.P 16(R0), [V8.D1, V9.D1]
VLD1.P 16(R0), [V10.D1, V11.D1]
VLD1.P 16(R0), [V12.D1, V13.D1]
VLD1.P 16(R0), [V14.D1, V15.D1]
VLD1.P 16(R0), [V16.D1, V17.D1]
VLD1.P 16(R0), [V18.D1, V19.D1]
VLD1.P 16(R0), [V20.D1, V21.D1]
VLD1.P 16(R0), [V22.D1, V23.D1]
VLD1 (R0), [V24.D1]
SUB $192, R0, R0
rounds:
// theta
VEOR3 V20.B16, V15.B16, V10.B16, V25.B16
VEOR3 V21.B16, V16.B16, V11.B16, V26.B16
VEOR3 V22.B16, V17.B16, V12.B16, V27.B16
VEOR3 V23.B16, V18.B16, V13.B16, V28.B16
VEOR3 V24.B16, V19.B16, V14.B16, V29.B16
VEOR3 V25.B16, V5.B16, V0.B16, V25.B16
VEOR3 V26.B16, V6.B16, V1.B16, V26.B16
VEOR3 V27.B16, V7.B16, V2.B16, V27.B16
VEOR3 V28.B16, V8.B16, V3.B16, V28.B16
VEOR3 V29.B16, V9.B16, V4.B16, V29.B16
VRAX1 V27.D2, V25.D2, V30.D2
VRAX1 V28.D2, V26.D2, V31.D2
VRAX1 V29.D2, V27.D2, V27.D2
VRAX1 V25.D2, V28.D2, V28.D2
VRAX1 V26.D2, V29.D2, V29.D2
// theta and rho and Pi
VEOR V29.B16, V0.B16, V0.B16
VXAR $63, V30.D2, V1.D2, V25.D2
VXAR $20, V30.D2, V6.D2, V1.D2
VXAR $44, V28.D2, V9.D2, V6.D2
VXAR $3, V31.D2, V22.D2, V9.D2
VXAR $25, V28.D2, V14.D2, V22.D2
VXAR $46, V29.D2, V20.D2, V14.D2
VXAR $2, V31.D2, V2.D2, V26.D2
VXAR $21, V31.D2, V12.D2, V2.D2
VXAR $39, V27.D2, V13.D2, V12.D2
VXAR $56, V28.D2, V19.D2, V13.D2
VXAR $8, V27.D2, V23.D2, V19.D2
VXAR $23, V29.D2, V15.D2, V23.D2
VXAR $37, V28.D2, V4.D2, V15.D2
VXAR $50, V28.D2, V24.D2, V28.D2
VXAR $62, V30.D2, V21.D2, V24.D2
VXAR $9, V27.D2, V8.D2, V8.D2
VXAR $19, V30.D2, V16.D2, V4.D2
VXAR $28, V29.D2, V5.D2, V16.D2
VXAR $36, V27.D2, V3.D2, V5.D2
VXAR $43, V27.D2, V18.D2, V27.D2
VXAR $49, V31.D2, V17.D2, V3.D2
VXAR $54, V30.D2, V11.D2, V30.D2
VXAR $58, V31.D2, V7.D2, V31.D2
VXAR $61, V29.D2, V10.D2, V29.D2
// chi and iota
VBCAX V8.B16, V22.B16, V26.B16, V20.B16
VBCAX V22.B16, V23.B16, V8.B16, V21.B16
VBCAX V23.B16, V24.B16, V22.B16, V22.B16
VBCAX V24.B16, V26.B16, V23.B16, V23.B16
VBCAX V26.B16, V8.B16, V24.B16, V24.B16
VLD1R.P 8(R1), [V26.D2]
VBCAX V3.B16, V19.B16, V30.B16, V17.B16
VBCAX V19.B16, V15.B16, V3.B16, V18.B16
VBCAX V15.B16, V16.B16, V19.B16, V19.B16
VBCAX V16.B16, V30.B16, V15.B16, V15.B16
VBCAX V30.B16, V3.B16, V16.B16, V16.B16
VBCAX V31.B16, V12.B16, V25.B16, V10.B16
VBCAX V12.B16, V13.B16, V31.B16, V11.B16
VBCAX V13.B16, V14.B16, V12.B16, V12.B16
VBCAX V14.B16, V25.B16, V13.B16, V13.B16
VBCAX V25.B16, V31.B16, V14.B16, V14.B16
VBCAX V4.B16, V9.B16, V29.B16, V7.B16
VBCAX V9.B16, V5.B16, V4.B16, V8.B16
VBCAX V5.B16, V6.B16, V9.B16, V9.B16
VBCAX V6.B16, V29.B16, V5.B16, V5.B16
VBCAX V29.B16, V4.B16, V6.B16, V6.B16
VBCAX V28.B16, V0.B16, V27.B16, V3.B16
VBCAX V0.B16, V1.B16, V28.B16, V4.B16
VBCAX V1.B16, V2.B16, V0.B16, V0.B16 // iota (chi part)
VBCAX V2.B16, V27.B16, V1.B16, V1.B16
VBCAX V27.B16, V28.B16, V2.B16, V2.B16
VEOR V26.B16, V0.B16, V0.B16 // iota
SUB $1, R2, R2
CBNZ R2, rounds
VST1.P [V0.D1, V1.D1], 16(R0)
VST1.P [V2.D1, V3.D1], 16(R0)
VST1.P [V4.D1, V5.D1], 16(R0)
VST1.P [V6.D1, V7.D1], 16(R0)
VST1.P [V8.D1, V9.D1], 16(R0)
VST1.P [V10.D1, V11.D1], 16(R0)
VST1.P [V12.D1, V13.D1], 16(R0)
VST1.P [V14.D1, V15.D1], 16(R0)
VST1.P [V16.D1, V17.D1], 16(R0)
VST1.P [V18.D1, V19.D1], 16(R0)
VST1.P [V20.D1, V21.D1], 16(R0)
VST1.P [V22.D1, V23.D1], 16(R0)
VST1 [V24.D1], (R0)
RET
DATA round_consts<>+0x00(SB)/8, $0x0000000000000001
DATA round_consts<>+0x08(SB)/8, $0x0000000000008082
DATA round_consts<>+0x10(SB)/8, $0x800000000000808a
DATA round_consts<>+0x18(SB)/8, $0x8000000080008000
DATA round_consts<>+0x20(SB)/8, $0x000000000000808b
DATA round_consts<>+0x28(SB)/8, $0x0000000080000001
DATA round_consts<>+0x30(SB)/8, $0x8000000080008081
DATA round_consts<>+0x38(SB)/8, $0x8000000000008009
DATA round_consts<>+0x40(SB)/8, $0x000000000000008a
DATA round_consts<>+0x48(SB)/8, $0x0000000000000088
DATA round_consts<>+0x50(SB)/8, $0x0000000080008009
DATA round_consts<>+0x58(SB)/8, $0x000000008000000a
DATA round_consts<>+0x60(SB)/8, $0x000000008000808b
DATA round_consts<>+0x68(SB)/8, $0x800000000000008b
DATA round_consts<>+0x70(SB)/8, $0x8000000000008089
DATA round_consts<>+0x78(SB)/8, $0x8000000000008003
DATA round_consts<>+0x80(SB)/8, $0x8000000000008002
DATA round_consts<>+0x88(SB)/8, $0x8000000000000080
DATA round_consts<>+0x90(SB)/8, $0x000000000000800a
DATA round_consts<>+0x98(SB)/8, $0x800000008000000a
DATA round_consts<>+0xA0(SB)/8, $0x8000000080008081
DATA round_consts<>+0xA8(SB)/8, $0x8000000000008080
DATA round_consts<>+0xB0(SB)/8, $0x0000000080000001
DATA round_consts<>+0xB8(SB)/8, $0x8000000080008008
GLOBL round_consts<>(SB), NOPTR|RODATA, $192

20
crypto/keccak/keccak.go Normal file
View file

@ -0,0 +1,20 @@
// Package keccak provides Keccak-256 hashing with platform-specific acceleration.
package keccak
import "hash"
// KeccakState wraps the keccak hasher. In addition to the usual hash methods, it also supports
// Read to get a variable amount of data from the hash state. Read is faster than Sum
// because it doesn't copy the internal state, but also modifies the internal state.
type KeccakState interface {
hash.Hash
Read([]byte) (int, error)
}
const rate = 136 // sponge rate for Keccak-256: (1600 - 2*256) / 8
var _ KeccakState = (*Hasher)(nil)
func NewLegacyKeccak256() *Hasher {
return &Hasher{}
}

View file

@ -0,0 +1,30 @@
//go:build arm64 && !purego
package keccak
import (
"runtime"
"golang.org/x/sys/cpu"
)
// Apple Silicon always has Armv8.2-A SHA3 extensions (VEOR3, VRAX1, VXAR, VBCAX).
// On other ARM64 platforms, detect at runtime via CPU feature flags.
// When SHA3 is unavailable, falls back to x/crypto/sha3.
func init() {
useASM = runtime.GOOS == "darwin" || runtime.GOOS == "ios" || cpu.ARM64.HasSHA3
}
// keccakF1600Sha3 permutes state. When buf != nil, it first XORs rate bytes
// of buf into state, saving one full memory pass.
//
//go:noescape
func keccakF1600Sha3(a *[200]byte, buf *byte)
func keccakF1600(a *[200]byte) {
keccakF1600Sha3(a, nil)
}
func xorAndPermute(state *[200]byte, buf *byte) {
keccakF1600Sha3(state, buf)
}

233
crypto/keccak/keccak_asm.go Normal file
View file

@ -0,0 +1,233 @@
//go:build (amd64 || arm64) && !purego
package keccak
import (
"encoding/binary"
"golang.org/x/crypto/sha3"
)
// useASM is set by platform-specific init to indicate hardware acceleration is available.
// When false, Sum256 and Hasher fall back to x/crypto/sha3.
var useASM bool
// sponge is the core Keccak-256 sponge state used by native (asm) implementations.
type sponge struct {
state [200]byte
buf [rate]byte
absorbed int
squeezing bool
readIdx int // index into state for next Read byte
}
// Reset resets the sponge to its initial state.
func (s *sponge) Reset() {
s.state = [200]byte{}
s.absorbed = 0
s.squeezing = false
s.readIdx = 0
}
// Write absorbs data into the sponge.
// Panics if called after Read.
func (s *sponge) Write(p []byte) (int, error) {
if s.squeezing {
panic("keccak: Write after Read")
}
n := len(p)
if s.absorbed > 0 {
x := copy(s.buf[s.absorbed:rate], p)
s.absorbed += x
p = p[x:]
if s.absorbed == rate {
xorAndPermute(&s.state, &s.buf[0])
s.absorbed = 0
}
}
for len(p) >= rate {
xorAndPermute(&s.state, &p[0])
p = p[rate:]
}
if len(p) > 0 {
s.absorbed = copy(s.buf[:], p)
}
return n, nil
}
// Sum256 finalizes and returns the 32-byte Keccak-256 digest.
// Does not modify the sponge state.
// Panics if called after Read.
func (s *sponge) Sum256() [32]byte {
if s.squeezing {
panic("keccak: Sum after Read")
}
state := s.state
xorIn(&state, s.buf[:s.absorbed])
state[s.absorbed] ^= 0x01
state[rate-1] ^= 0x80
keccakF1600(&state)
return [32]byte(state[:32])
}
// Sum appends the current Keccak-256 digest to b and returns the resulting slice.
// Does not modify the sponge state.
func (s *sponge) Sum(b []byte) []byte {
d := s.Sum256()
return append(b, d[:]...)
}
// Size returns the number of bytes Sum will produce (32).
func (s *sponge) Size() int { return 32 }
// BlockSize returns the sponge rate in bytes (136).
func (s *sponge) BlockSize() int { return rate }
// Read squeezes an arbitrary number of bytes from the sponge.
// On the first call, it pads and permutes, transitioning from absorbing to squeezing.
// Subsequent calls to Write will panic. It never returns an error.
func (s *sponge) Read(out []byte) (int, error) {
if !s.squeezing {
s.padAndSqueeze()
}
n := len(out)
for len(out) > 0 {
x := copy(out, s.state[s.readIdx:rate])
s.readIdx += x
out = out[x:]
if s.readIdx == rate {
keccakF1600(&s.state)
s.readIdx = 0
}
}
return n, nil
}
func (s *sponge) padAndSqueeze() {
xorIn(&s.state, s.buf[:s.absorbed])
s.state[s.absorbed] ^= 0x01
s.state[rate-1] ^= 0x80
keccakF1600(&s.state)
s.squeezing = true
s.readIdx = 0
}
// sum256Sponge computes Keccak-256 in one shot using the assembly permutation.
func sum256Sponge(data []byte) [32]byte {
var state [200]byte
for len(data) >= rate {
xorAndPermute(&state, &data[0])
data = data[rate:]
}
xorIn(&state, data)
state[len(data)] ^= 0x01
state[rate-1] ^= 0x80
keccakF1600(&state)
return [32]byte(state[:32])
}
// Sum256 computes the Keccak-256 hash of data. Zero heap allocations when hardware
// acceleration is available.
func Sum256(data []byte) [32]byte {
if !useASM {
return sum256XCrypto(data)
}
return sum256Sponge(data)
}
func sum256XCrypto(data []byte) [32]byte {
h := sha3.NewLegacyKeccak256()
h.Write(data)
var out [32]byte
h.Sum(out[:0])
return out
}
// Hasher is a streaming Keccak-256 hasher.
// Uses platform assembly when available, x/crypto/sha3 otherwise.
type Hasher struct {
sponge
xc KeccakState // x/crypto fallback
}
// Reset resets the hasher to its initial state.
func (h *Hasher) Reset() {
if useASM {
h.sponge.Reset()
} else {
if h.xc == nil {
h.xc = sha3.NewLegacyKeccak256().(KeccakState)
} else {
h.xc.Reset()
}
}
}
// Write absorbs data into the hasher.
// Panics if called after Read.
func (h *Hasher) Write(p []byte) (int, error) {
if !useASM {
if h.xc == nil {
h.xc = sha3.NewLegacyKeccak256().(KeccakState)
}
return h.xc.Write(p)
}
return h.sponge.Write(p)
}
// Sum256 finalizes and returns the 32-byte Keccak-256 digest.
// Does not modify the hasher state.
func (h *Hasher) Sum256() [32]byte {
if !useASM {
if h.xc == nil {
return Sum256(nil)
}
var out [32]byte
h.xc.Sum(out[:0])
return out
}
return h.sponge.Sum256()
}
// Sum appends the current Keccak-256 digest to b and returns the resulting slice.
// Does not modify the hasher state.
func (h *Hasher) Sum(b []byte) []byte {
if !useASM {
if h.xc == nil {
d := Sum256(nil)
return append(b, d[:]...)
}
return h.xc.Sum(b)
}
return h.sponge.Sum(b)
}
// Read squeezes an arbitrary number of bytes from the sponge.
// On the first call, it pads and permutes, transitioning from absorbing to squeezing.
// Subsequent calls to Write will panic. It never returns an error.
func (h *Hasher) Read(out []byte) (int, error) {
if !useASM {
if h.xc == nil {
h.xc = sha3.NewLegacyKeccak256().(KeccakState)
}
return h.xc.Read(out)
}
return h.sponge.Read(out)
}
// xorIn XORs data into the first len(data) bytes of state using uint64 loads.
func xorIn(state *[200]byte, data []byte) {
for i := 0; i+8 <= len(data); i += 8 {
v := binary.LittleEndian.Uint64(state[i:]) ^ binary.LittleEndian.Uint64(data[i:])
binary.LittleEndian.PutUint64(state[i:], v)
}
for i := len(data) &^ 7; i < len(data); i++ {
state[i] ^= data[i]
}
}

View file

@ -0,0 +1,71 @@
//go:build (!arm64 && !amd64) || purego
package keccak
import (
"golang.org/x/crypto/sha3"
)
// Sum256 computes the Keccak-256 hash of data.
// On non-arm64 platforms, delegates to x/crypto/sha3.NewLegacyKeccak256().
func Sum256(data []byte) [32]byte {
h := sha3.NewLegacyKeccak256()
h.Write(data)
var out [32]byte
h.Sum(out[:0])
return out
}
// Hasher is a streaming Keccak-256 hasher wrapping x/crypto/sha3.
type Hasher struct {
h KeccakState
}
func (h *Hasher) init() {
if h.h == nil {
h.h = sha3.NewLegacyKeccak256().(KeccakState)
}
}
// Reset resets the hasher to its initial state.
func (h *Hasher) Reset() {
h.init()
h.h.Reset()
}
// Write absorbs data into the hasher.
// Panics if called after Read.
func (h *Hasher) Write(p []byte) (int, error) {
h.init()
return h.h.Write(p)
}
// Sum256 finalizes and returns the 32-byte Keccak-256 digest.
// Does not modify the hasher state.
func (h *Hasher) Sum256() [32]byte {
h.init()
var out [32]byte
h.h.Sum(out[:0])
return out
}
// Sum appends the current Keccak-256 digest to b and returns the resulting slice.
// Does not modify the hasher state.
func (h *Hasher) Sum(b []byte) []byte {
h.init()
return h.h.Sum(b)
}
// Size returns the number of bytes Sum will produce (32).
func (h *Hasher) Size() int { return 32 }
// BlockSize returns the sponge rate in bytes (136).
func (h *Hasher) BlockSize() int { return rate }
// Read squeezes an arbitrary number of bytes from the sponge.
// On the first call, it pads and permutes, transitioning from absorbing to squeezing.
// Subsequent calls to Write will panic. It never returns an error.
func (h *Hasher) Read(out []byte) (int, error) {
h.init()
return h.h.Read(out)
}

View file

@ -0,0 +1,353 @@
package keccak
import (
"bytes"
"encoding/hex"
"fmt"
"testing"
"golang.org/x/crypto/sha3"
)
func TestSum256Empty(t *testing.T) {
got := Sum256(nil)
// Known Keccak-256 of empty string.
want, _ := hex.DecodeString("c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470")
if !bytes.Equal(got[:], want) {
t.Fatalf("Sum256(nil) = %x, want %x", got, want)
}
}
func TestSum256Hello(t *testing.T) {
got := Sum256([]byte("hello"))
want, _ := hex.DecodeString("1c8aff950685c2ed4bc3174f3472287b56d9517b9c948127319a09a7a36deac8")
if !bytes.Equal(got[:], want) {
t.Fatalf("Sum256(hello) = %x, want %x", got, want)
}
}
func TestSum256LargeData(t *testing.T) {
// Test with data larger than one block (rate=136 bytes).
data := make([]byte, 500)
for i := range data {
data[i] = byte(i)
}
got := Sum256(data)
// Verify against streaming Hasher.
var h Hasher
h.Write(data)
want := h.Sum256()
if got != want {
t.Fatalf("Sum256 vs Hasher mismatch: %x vs %x", got, want)
}
}
func TestHasherStreaming(t *testing.T) {
data := []byte("hello world, this is a longer test string for streaming keccak")
// All at once.
want := Sum256(data)
// Byte by byte.
var h Hasher
for _, b := range data {
h.Write([]byte{b})
}
got := h.Sum256()
if got != want {
t.Fatalf("streaming byte-by-byte: %x vs %x", got, want)
}
}
func TestHasherMultiBlock(t *testing.T) {
// Test with exactly 2 blocks + partial.
data := make([]byte, rate*2+50)
for i := range data {
data[i] = byte(i * 7)
}
want := Sum256(data)
// Write in chunks of 37 (not aligned to rate).
var h Hasher
for i := 0; i < len(data); i += 37 {
end := i + 37
if end > len(data) {
end = len(data)
}
h.Write(data[i:end])
}
got := h.Sum256()
if got != want {
t.Fatalf("multi-block streaming: %x vs %x", got, want)
}
}
func TestReadMatchesSum256(t *testing.T) {
// Read of 32 bytes should produce the same result as Sum256.
data := []byte("hello")
var h Hasher
h.Write(data)
var got [32]byte
h.Read(got[:])
want := Sum256(data)
if got != want {
t.Fatalf("Read(32) = %x, want %x", got, want)
}
}
func TestReadMatchesXCrypto(t *testing.T) {
// Compare Read output against x/crypto/sha3 for various lengths.
for _, readLen := range []int{32, 64, 136, 200, 500} {
data := []byte("test data for read comparison")
ref := sha3.NewLegacyKeccak256()
ref.Write(data)
want := make([]byte, readLen)
ref.(KeccakState).Read(want)
var h Hasher
h.Write(data)
got := make([]byte, readLen)
h.Read(got)
if !bytes.Equal(got, want) {
t.Fatalf("Read(%d) mismatch:\ngot: %x\nwant: %x", readLen, got, want)
}
}
}
func TestReadMultipleCalls(t *testing.T) {
// Multiple Read calls should produce the same output as one large Read.
data := []byte("streaming read test")
// One large read.
var h1 Hasher
h1.Write(data)
all := make([]byte, 300)
h1.Read(all)
// Multiple small reads.
var h2 Hasher
h2.Write(data)
var parts []byte
for i := 0; i < 300; {
chunk := 37
if i+chunk > 300 {
chunk = 300 - i
}
buf := make([]byte, chunk)
h2.Read(buf)
parts = append(parts, buf...)
i += chunk
}
if !bytes.Equal(all, parts) {
t.Fatalf("multi-read mismatch:\ngot: %x\nwant: %x", parts, all)
}
}
func TestReadEmpty(t *testing.T) {
// Read from hasher with no data written.
ref := sha3.NewLegacyKeccak256()
want := make([]byte, 32)
ref.(KeccakState).Read(want)
var h Hasher
got := make([]byte, 32)
h.Read(got)
if !bytes.Equal(got, want) {
t.Fatalf("Read empty mismatch:\ngot: %x\nwant: %x", got, want)
}
}
func TestReadAfterReset(t *testing.T) {
var h Hasher
h.Write([]byte("first"))
h.Read(make([]byte, 32))
// Reset should allow Write again.
h.Reset()
h.Write([]byte("second"))
got := make([]byte, 32)
h.Read(got)
want := Sum256([]byte("second"))
if !bytes.Equal(got, want[:]) {
t.Fatalf("Read after Reset mismatch:\ngot: %x\nwant: %x", got, want)
}
}
func TestWriteAfterReadPanics(t *testing.T) {
defer func() {
if r := recover(); r == nil {
t.Fatal("expected panic on Write after Read")
}
}()
var h Hasher
h.Write([]byte("data"))
h.Read(make([]byte, 32))
h.Write([]byte("more")) // should panic
}
func FuzzSum256(f *testing.F) {
f.Add([]byte(nil))
f.Add([]byte("hello"))
f.Add([]byte("hello world, this is a longer test string for streaming keccak"))
f.Add(make([]byte, rate))
f.Add(make([]byte, rate+1))
f.Add(make([]byte, rate*3+50))
f.Fuzz(func(t *testing.T, data []byte) {
// Reference: x/crypto NewLegacyKeccak256.
ref := sha3.NewLegacyKeccak256()
ref.Write(data)
want := ref.Sum(nil)
// Test Sum256.
got := Sum256(data)
if !bytes.Equal(got[:], want) {
t.Fatalf("Sum256 mismatch for len=%d\ngot: %x\nwant: %x", len(data), got, want)
}
// Test streaming Hasher (write all at once).
var h Hasher
h.Write(data)
gotH := h.Sum256()
if !bytes.Equal(gotH[:], want) {
t.Fatalf("Hasher mismatch for len=%d\ngot: %x\nwant: %x", len(data), gotH, want)
}
// Test streaming Hasher (byte-by-byte).
h.Reset()
for _, b := range data {
h.Write([]byte{b})
}
gotS := h.Sum256()
if !bytes.Equal(gotS[:], want) {
t.Fatalf("Hasher byte-by-byte mismatch for len=%d\ngot: %x\nwant: %x", len(data), gotS, want)
}
// Test Read (32 bytes) matches Sum256.
h.Reset()
h.Write(data)
gotRead := make([]byte, 32)
h.Read(gotRead)
if !bytes.Equal(gotRead, want) {
t.Fatalf("Read(32) mismatch for len=%d\ngot: %x\nwant: %x", len(data), gotRead, want)
}
// Test Read (extended output) matches x/crypto.
ref.Reset()
ref.Write(data)
wantExt := make([]byte, 200)
ref.(KeccakState).Read(wantExt)
h.Reset()
h.Write(data)
gotExt := make([]byte, 200)
h.Read(gotExt)
if !bytes.Equal(gotExt, wantExt) {
t.Fatalf("Read(200) mismatch for len=%d\ngot: %x\nwant: %x", len(data), gotExt, wantExt)
}
})
}
// Comparison benchmarks: faster_keccak vs golang.org/x/crypto/sha3.
var benchSizes = []int{32, 128, 256, 1024, 4096, 500 * 1024}
func benchName(size int) string {
if size >= 1024 {
return fmt.Sprintf("%dK", size/1024)
}
return fmt.Sprintf("%dB", size)
}
// BenchmarkKeccak256Sum tests Sum256 with local faster_keccak implementation.
func BenchmarkKeccak256Sum(b *testing.B) {
for _, size := range benchSizes {
data := make([]byte, size)
for i := range data {
data[i] = byte(i)
}
b.Run("FasterKeccak/"+benchName(size), func(b *testing.B) {
b.SetBytes(int64(size))
b.ReportAllocs()
for b.Loop() {
Sum256(data)
}
})
}
}
// BenchmarkKeccak256Stdlib tests Sum256 with golang.org/x/crypto/sha3 standard library.
func BenchmarkKeccak256Stdlib(b *testing.B) {
for _, size := range benchSizes {
data := make([]byte, size)
for i := range data {
data[i] = byte(i)
}
b.Run("StdLib/"+benchName(size), func(b *testing.B) {
b.SetBytes(int64(size))
b.ReportAllocs()
h := sha3.NewLegacyKeccak256()
for b.Loop() {
h.Reset()
h.Write(data)
h.Sum(nil)
}
})
}
}
// BenchmarkKeccak256Hasher tests Hasher.Sum256() with local faster_keccak implementation.
func BenchmarkKeccak256Hasher(b *testing.B) {
for _, size := range benchSizes {
data := make([]byte, size)
for i := range data {
data[i] = byte(i)
}
b.Run("FasterKeccak/"+benchName(size), func(b *testing.B) {
b.SetBytes(int64(size))
b.ReportAllocs()
var h Hasher
for b.Loop() {
h.Reset()
h.Write(data)
h.Sum256()
}
})
}
}
// BenchmarkKeccak256HasherStdlib tests Hasher API with golang.org/x/crypto/sha3 standard library.
func BenchmarkKeccak256HasherStdlib(b *testing.B) {
for _, size := range benchSizes {
data := make([]byte, size)
for i := range data {
data[i] = byte(i)
}
b.Run("StdLib/"+benchName(size), func(b *testing.B) {
b.SetBytes(int64(size))
b.ReportAllocs()
h := sha3.NewLegacyKeccak256().(KeccakState)
var buf [32]byte
for b.Loop() {
h.Reset()
h.Write(data)
h.Read(buf[:])
}
})
}
}
// BenchmarkKeccakStreaming benchmarks the streaming hasher (Reset+Write+Read).
// Use with benchstat: go test -bench=BenchmarkKeccakStreaming -benchmem ./... | benchstat
func BenchmarkKeccakStreaming(b *testing.B) {
data := make([]byte, 32)
for i := range data {
data[i] = byte(i)
}
var h Hasher
var buf [32]byte
b.SetBytes(int64(len(data)))
b.ReportAllocs()
for b.Loop() {
h.Reset()
h.Write(data)
h.Read(buf[:])
}
}

View file

@ -1,414 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !amd64 || purego || !gc
package keccak
import "math/bits"
// rc stores the round constants for use in the ι step.
var rc = [24]uint64{
0x0000000000000001,
0x0000000000008082,
0x800000000000808A,
0x8000000080008000,
0x000000000000808B,
0x0000000080000001,
0x8000000080008081,
0x8000000000008009,
0x000000000000008A,
0x0000000000000088,
0x0000000080008009,
0x000000008000000A,
0x000000008000808B,
0x800000000000008B,
0x8000000000008089,
0x8000000000008003,
0x8000000000008002,
0x8000000000000080,
0x000000000000800A,
0x800000008000000A,
0x8000000080008081,
0x8000000000008080,
0x0000000080000001,
0x8000000080008008,
}
// keccakF1600 applies the Keccak permutation to a 1600b-wide
// state represented as a slice of 25 uint64s.
func keccakF1600(a *[25]uint64) {
// Implementation translated from Keccak-inplace.c
// in the keccak reference code.
var t, bc0, bc1, bc2, bc3, bc4, d0, d1, d2, d3, d4 uint64
for i := 0; i < 24; i += 4 {
// Combines the 5 steps in each round into 2 steps.
// Unrolls 4 rounds per loop and spreads some steps across rounds.
// Round 1
bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]
bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]
bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]
bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]
bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]
d0 = bc4 ^ (bc1<<1 | bc1>>63)
d1 = bc0 ^ (bc2<<1 | bc2>>63)
d2 = bc1 ^ (bc3<<1 | bc3>>63)
d3 = bc2 ^ (bc4<<1 | bc4>>63)
d4 = bc3 ^ (bc0<<1 | bc0>>63)
bc0 = a[0] ^ d0
t = a[6] ^ d1
bc1 = bits.RotateLeft64(t, 44)
t = a[12] ^ d2
bc2 = bits.RotateLeft64(t, 43)
t = a[18] ^ d3
bc3 = bits.RotateLeft64(t, 21)
t = a[24] ^ d4
bc4 = bits.RotateLeft64(t, 14)
a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i]
a[6] = bc1 ^ (bc3 &^ bc2)
a[12] = bc2 ^ (bc4 &^ bc3)
a[18] = bc3 ^ (bc0 &^ bc4)
a[24] = bc4 ^ (bc1 &^ bc0)
t = a[10] ^ d0
bc2 = bits.RotateLeft64(t, 3)
t = a[16] ^ d1
bc3 = bits.RotateLeft64(t, 45)
t = a[22] ^ d2
bc4 = bits.RotateLeft64(t, 61)
t = a[3] ^ d3
bc0 = bits.RotateLeft64(t, 28)
t = a[9] ^ d4
bc1 = bits.RotateLeft64(t, 20)
a[10] = bc0 ^ (bc2 &^ bc1)
a[16] = bc1 ^ (bc3 &^ bc2)
a[22] = bc2 ^ (bc4 &^ bc3)
a[3] = bc3 ^ (bc0 &^ bc4)
a[9] = bc4 ^ (bc1 &^ bc0)
t = a[20] ^ d0
bc4 = bits.RotateLeft64(t, 18)
t = a[1] ^ d1
bc0 = bits.RotateLeft64(t, 1)
t = a[7] ^ d2
bc1 = bits.RotateLeft64(t, 6)
t = a[13] ^ d3
bc2 = bits.RotateLeft64(t, 25)
t = a[19] ^ d4
bc3 = bits.RotateLeft64(t, 8)
a[20] = bc0 ^ (bc2 &^ bc1)
a[1] = bc1 ^ (bc3 &^ bc2)
a[7] = bc2 ^ (bc4 &^ bc3)
a[13] = bc3 ^ (bc0 &^ bc4)
a[19] = bc4 ^ (bc1 &^ bc0)
t = a[5] ^ d0
bc1 = bits.RotateLeft64(t, 36)
t = a[11] ^ d1
bc2 = bits.RotateLeft64(t, 10)
t = a[17] ^ d2
bc3 = bits.RotateLeft64(t, 15)
t = a[23] ^ d3
bc4 = bits.RotateLeft64(t, 56)
t = a[4] ^ d4
bc0 = bits.RotateLeft64(t, 27)
a[5] = bc0 ^ (bc2 &^ bc1)
a[11] = bc1 ^ (bc3 &^ bc2)
a[17] = bc2 ^ (bc4 &^ bc3)
a[23] = bc3 ^ (bc0 &^ bc4)
a[4] = bc4 ^ (bc1 &^ bc0)
t = a[15] ^ d0
bc3 = bits.RotateLeft64(t, 41)
t = a[21] ^ d1
bc4 = bits.RotateLeft64(t, 2)
t = a[2] ^ d2
bc0 = bits.RotateLeft64(t, 62)
t = a[8] ^ d3
bc1 = bits.RotateLeft64(t, 55)
t = a[14] ^ d4
bc2 = bits.RotateLeft64(t, 39)
a[15] = bc0 ^ (bc2 &^ bc1)
a[21] = bc1 ^ (bc3 &^ bc2)
a[2] = bc2 ^ (bc4 &^ bc3)
a[8] = bc3 ^ (bc0 &^ bc4)
a[14] = bc4 ^ (bc1 &^ bc0)
// Round 2
bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]
bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]
bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]
bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]
bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]
d0 = bc4 ^ (bc1<<1 | bc1>>63)
d1 = bc0 ^ (bc2<<1 | bc2>>63)
d2 = bc1 ^ (bc3<<1 | bc3>>63)
d3 = bc2 ^ (bc4<<1 | bc4>>63)
d4 = bc3 ^ (bc0<<1 | bc0>>63)
bc0 = a[0] ^ d0
t = a[16] ^ d1
bc1 = bits.RotateLeft64(t, 44)
t = a[7] ^ d2
bc2 = bits.RotateLeft64(t, 43)
t = a[23] ^ d3
bc3 = bits.RotateLeft64(t, 21)
t = a[14] ^ d4
bc4 = bits.RotateLeft64(t, 14)
a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+1]
a[16] = bc1 ^ (bc3 &^ bc2)
a[7] = bc2 ^ (bc4 &^ bc3)
a[23] = bc3 ^ (bc0 &^ bc4)
a[14] = bc4 ^ (bc1 &^ bc0)
t = a[20] ^ d0
bc2 = bits.RotateLeft64(t, 3)
t = a[11] ^ d1
bc3 = bits.RotateLeft64(t, 45)
t = a[2] ^ d2
bc4 = bits.RotateLeft64(t, 61)
t = a[18] ^ d3
bc0 = bits.RotateLeft64(t, 28)
t = a[9] ^ d4
bc1 = bits.RotateLeft64(t, 20)
a[20] = bc0 ^ (bc2 &^ bc1)
a[11] = bc1 ^ (bc3 &^ bc2)
a[2] = bc2 ^ (bc4 &^ bc3)
a[18] = bc3 ^ (bc0 &^ bc4)
a[9] = bc4 ^ (bc1 &^ bc0)
t = a[15] ^ d0
bc4 = bits.RotateLeft64(t, 18)
t = a[6] ^ d1
bc0 = bits.RotateLeft64(t, 1)
t = a[22] ^ d2
bc1 = bits.RotateLeft64(t, 6)
t = a[13] ^ d3
bc2 = bits.RotateLeft64(t, 25)
t = a[4] ^ d4
bc3 = bits.RotateLeft64(t, 8)
a[15] = bc0 ^ (bc2 &^ bc1)
a[6] = bc1 ^ (bc3 &^ bc2)
a[22] = bc2 ^ (bc4 &^ bc3)
a[13] = bc3 ^ (bc0 &^ bc4)
a[4] = bc4 ^ (bc1 &^ bc0)
t = a[10] ^ d0
bc1 = bits.RotateLeft64(t, 36)
t = a[1] ^ d1
bc2 = bits.RotateLeft64(t, 10)
t = a[17] ^ d2
bc3 = bits.RotateLeft64(t, 15)
t = a[8] ^ d3
bc4 = bits.RotateLeft64(t, 56)
t = a[24] ^ d4
bc0 = bits.RotateLeft64(t, 27)
a[10] = bc0 ^ (bc2 &^ bc1)
a[1] = bc1 ^ (bc3 &^ bc2)
a[17] = bc2 ^ (bc4 &^ bc3)
a[8] = bc3 ^ (bc0 &^ bc4)
a[24] = bc4 ^ (bc1 &^ bc0)
t = a[5] ^ d0
bc3 = bits.RotateLeft64(t, 41)
t = a[21] ^ d1
bc4 = bits.RotateLeft64(t, 2)
t = a[12] ^ d2
bc0 = bits.RotateLeft64(t, 62)
t = a[3] ^ d3
bc1 = bits.RotateLeft64(t, 55)
t = a[19] ^ d4
bc2 = bits.RotateLeft64(t, 39)
a[5] = bc0 ^ (bc2 &^ bc1)
a[21] = bc1 ^ (bc3 &^ bc2)
a[12] = bc2 ^ (bc4 &^ bc3)
a[3] = bc3 ^ (bc0 &^ bc4)
a[19] = bc4 ^ (bc1 &^ bc0)
// Round 3
bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]
bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]
bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]
bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]
bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]
d0 = bc4 ^ (bc1<<1 | bc1>>63)
d1 = bc0 ^ (bc2<<1 | bc2>>63)
d2 = bc1 ^ (bc3<<1 | bc3>>63)
d3 = bc2 ^ (bc4<<1 | bc4>>63)
d4 = bc3 ^ (bc0<<1 | bc0>>63)
bc0 = a[0] ^ d0
t = a[11] ^ d1
bc1 = bits.RotateLeft64(t, 44)
t = a[22] ^ d2
bc2 = bits.RotateLeft64(t, 43)
t = a[8] ^ d3
bc3 = bits.RotateLeft64(t, 21)
t = a[19] ^ d4
bc4 = bits.RotateLeft64(t, 14)
a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+2]
a[11] = bc1 ^ (bc3 &^ bc2)
a[22] = bc2 ^ (bc4 &^ bc3)
a[8] = bc3 ^ (bc0 &^ bc4)
a[19] = bc4 ^ (bc1 &^ bc0)
t = a[15] ^ d0
bc2 = bits.RotateLeft64(t, 3)
t = a[1] ^ d1
bc3 = bits.RotateLeft64(t, 45)
t = a[12] ^ d2
bc4 = bits.RotateLeft64(t, 61)
t = a[23] ^ d3
bc0 = bits.RotateLeft64(t, 28)
t = a[9] ^ d4
bc1 = bits.RotateLeft64(t, 20)
a[15] = bc0 ^ (bc2 &^ bc1)
a[1] = bc1 ^ (bc3 &^ bc2)
a[12] = bc2 ^ (bc4 &^ bc3)
a[23] = bc3 ^ (bc0 &^ bc4)
a[9] = bc4 ^ (bc1 &^ bc0)
t = a[5] ^ d0
bc4 = bits.RotateLeft64(t, 18)
t = a[16] ^ d1
bc0 = bits.RotateLeft64(t, 1)
t = a[2] ^ d2
bc1 = bits.RotateLeft64(t, 6)
t = a[13] ^ d3
bc2 = bits.RotateLeft64(t, 25)
t = a[24] ^ d4
bc3 = bits.RotateLeft64(t, 8)
a[5] = bc0 ^ (bc2 &^ bc1)
a[16] = bc1 ^ (bc3 &^ bc2)
a[2] = bc2 ^ (bc4 &^ bc3)
a[13] = bc3 ^ (bc0 &^ bc4)
a[24] = bc4 ^ (bc1 &^ bc0)
t = a[20] ^ d0
bc1 = bits.RotateLeft64(t, 36)
t = a[6] ^ d1
bc2 = bits.RotateLeft64(t, 10)
t = a[17] ^ d2
bc3 = bits.RotateLeft64(t, 15)
t = a[3] ^ d3
bc4 = bits.RotateLeft64(t, 56)
t = a[14] ^ d4
bc0 = bits.RotateLeft64(t, 27)
a[20] = bc0 ^ (bc2 &^ bc1)
a[6] = bc1 ^ (bc3 &^ bc2)
a[17] = bc2 ^ (bc4 &^ bc3)
a[3] = bc3 ^ (bc0 &^ bc4)
a[14] = bc4 ^ (bc1 &^ bc0)
t = a[10] ^ d0
bc3 = bits.RotateLeft64(t, 41)
t = a[21] ^ d1
bc4 = bits.RotateLeft64(t, 2)
t = a[7] ^ d2
bc0 = bits.RotateLeft64(t, 62)
t = a[18] ^ d3
bc1 = bits.RotateLeft64(t, 55)
t = a[4] ^ d4
bc2 = bits.RotateLeft64(t, 39)
a[10] = bc0 ^ (bc2 &^ bc1)
a[21] = bc1 ^ (bc3 &^ bc2)
a[7] = bc2 ^ (bc4 &^ bc3)
a[18] = bc3 ^ (bc0 &^ bc4)
a[4] = bc4 ^ (bc1 &^ bc0)
// Round 4
bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20]
bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21]
bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22]
bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23]
bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24]
d0 = bc4 ^ (bc1<<1 | bc1>>63)
d1 = bc0 ^ (bc2<<1 | bc2>>63)
d2 = bc1 ^ (bc3<<1 | bc3>>63)
d3 = bc2 ^ (bc4<<1 | bc4>>63)
d4 = bc3 ^ (bc0<<1 | bc0>>63)
bc0 = a[0] ^ d0
t = a[1] ^ d1
bc1 = bits.RotateLeft64(t, 44)
t = a[2] ^ d2
bc2 = bits.RotateLeft64(t, 43)
t = a[3] ^ d3
bc3 = bits.RotateLeft64(t, 21)
t = a[4] ^ d4
bc4 = bits.RotateLeft64(t, 14)
a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+3]
a[1] = bc1 ^ (bc3 &^ bc2)
a[2] = bc2 ^ (bc4 &^ bc3)
a[3] = bc3 ^ (bc0 &^ bc4)
a[4] = bc4 ^ (bc1 &^ bc0)
t = a[5] ^ d0
bc2 = bits.RotateLeft64(t, 3)
t = a[6] ^ d1
bc3 = bits.RotateLeft64(t, 45)
t = a[7] ^ d2
bc4 = bits.RotateLeft64(t, 61)
t = a[8] ^ d3
bc0 = bits.RotateLeft64(t, 28)
t = a[9] ^ d4
bc1 = bits.RotateLeft64(t, 20)
a[5] = bc0 ^ (bc2 &^ bc1)
a[6] = bc1 ^ (bc3 &^ bc2)
a[7] = bc2 ^ (bc4 &^ bc3)
a[8] = bc3 ^ (bc0 &^ bc4)
a[9] = bc4 ^ (bc1 &^ bc0)
t = a[10] ^ d0
bc4 = bits.RotateLeft64(t, 18)
t = a[11] ^ d1
bc0 = bits.RotateLeft64(t, 1)
t = a[12] ^ d2
bc1 = bits.RotateLeft64(t, 6)
t = a[13] ^ d3
bc2 = bits.RotateLeft64(t, 25)
t = a[14] ^ d4
bc3 = bits.RotateLeft64(t, 8)
a[10] = bc0 ^ (bc2 &^ bc1)
a[11] = bc1 ^ (bc3 &^ bc2)
a[12] = bc2 ^ (bc4 &^ bc3)
a[13] = bc3 ^ (bc0 &^ bc4)
a[14] = bc4 ^ (bc1 &^ bc0)
t = a[15] ^ d0
bc1 = bits.RotateLeft64(t, 36)
t = a[16] ^ d1
bc2 = bits.RotateLeft64(t, 10)
t = a[17] ^ d2
bc3 = bits.RotateLeft64(t, 15)
t = a[18] ^ d3
bc4 = bits.RotateLeft64(t, 56)
t = a[19] ^ d4
bc0 = bits.RotateLeft64(t, 27)
a[15] = bc0 ^ (bc2 &^ bc1)
a[16] = bc1 ^ (bc3 &^ bc2)
a[17] = bc2 ^ (bc4 &^ bc3)
a[18] = bc3 ^ (bc0 &^ bc4)
a[19] = bc4 ^ (bc1 &^ bc0)
t = a[20] ^ d0
bc3 = bits.RotateLeft64(t, 41)
t = a[21] ^ d1
bc4 = bits.RotateLeft64(t, 2)
t = a[22] ^ d2
bc0 = bits.RotateLeft64(t, 62)
t = a[23] ^ d3
bc1 = bits.RotateLeft64(t, 55)
t = a[24] ^ d4
bc2 = bits.RotateLeft64(t, 39)
a[20] = bc0 ^ (bc2 &^ bc1)
a[21] = bc1 ^ (bc3 &^ bc2)
a[22] = bc2 ^ (bc4 &^ bc3)
a[23] = bc3 ^ (bc0 &^ bc4)
a[24] = bc4 ^ (bc1 &^ bc0)
}
}

View file

@ -1,13 +1,21 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build amd64 && !purego && gc
//go:build amd64 && !purego
package keccak
// This function is implemented in keccakf_amd64.s.
import "golang.org/x/sys/cpu"
func init() { useASM = cpu.X86.HasBMI1 && cpu.X86.HasBMI2 }
// keccakF1600BMI2 permutes state. When buf != nil, it first XORs rate bytes
// of buf into state, saving one full memory pass.
//
//go:noescape
func keccakF1600BMI2(a *[200]byte, buf *byte)
func keccakF1600(a *[25]uint64)
func keccakF1600(a *[200]byte) {
keccakF1600BMI2(a, nil)
}
func xorAndPermute(state *[200]byte, buf *byte) {
keccakF1600BMI2(state, buf)
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,244 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package keccak
import (
"crypto/subtle"
"encoding/binary"
"errors"
"unsafe"
"golang.org/x/sys/cpu"
)
// spongeDirection indicates the direction bytes are flowing through the sponge.
type spongeDirection int
const (
// spongeAbsorbing indicates that the sponge is absorbing input.
spongeAbsorbing spongeDirection = iota
// spongeSqueezing indicates that the sponge is being squeezed.
spongeSqueezing
)
type state struct {
a [1600 / 8]byte // main state of the hash
// a[n:rate] is the buffer. If absorbing, it's the remaining space to XOR
// into before running the permutation. If squeezing, it's the remaining
// output to produce before running the permutation.
n, rate int
// dsbyte contains the "domain separation" bits and the first bit of
// the padding. Sections 6.1 and 6.2 of [1] separate the outputs of the
// SHA-3 and SHAKE functions by appending bitstrings to the message.
// Using a little-endian bit-ordering convention, these are "01" for SHA-3
// and "1111" for SHAKE, or 00000010b and 00001111b, respectively. Then the
// padding rule from section 5.1 is applied to pad the message to a multiple
// of the rate, which involves adding a "1" bit, zero or more "0" bits, and
// a final "1" bit. We merge the first "1" bit from the padding into dsbyte,
// giving 00000110b (0x06) and 00011111b (0x1f).
// [1] http://csrc.nist.gov/publications/drafts/fips-202/fips_202_draft.pdf
// "Draft FIPS 202: SHA-3 Standard: Permutation-Based Hash and
// Extendable-Output Functions (May 2014)"
dsbyte byte
outputLen int // the default output size in bytes
state spongeDirection // whether the sponge is absorbing or squeezing
}
// BlockSize returns the rate of sponge underlying this hash function.
func (d *state) BlockSize() int { return d.rate }
// Size returns the output size of the hash function in bytes.
func (d *state) Size() int { return d.outputLen }
// Reset clears the internal state by zeroing the sponge state and
// the buffer indexes, and setting Sponge.state to absorbing.
func (d *state) Reset() {
// Zero the permutation's state.
for i := range d.a {
d.a[i] = 0
}
d.state = spongeAbsorbing
d.n = 0
}
func (d *state) clone() *state {
ret := *d
return &ret
}
// permute applies the KeccakF-1600 permutation.
func (d *state) permute() {
var a *[25]uint64
if cpu.IsBigEndian {
a = new([25]uint64)
for i := range a {
a[i] = binary.LittleEndian.Uint64(d.a[i*8:])
}
} else {
a = (*[25]uint64)(unsafe.Pointer(&d.a))
}
keccakF1600(a)
d.n = 0
if cpu.IsBigEndian {
for i := range a {
binary.LittleEndian.PutUint64(d.a[i*8:], a[i])
}
}
}
// pads appends the domain separation bits in dsbyte, applies
// the multi-bitrate 10..1 padding rule, and permutes the state.
func (d *state) padAndPermute() {
// Pad with this instance's domain-separator bits. We know that there's
// at least one byte of space in the sponge because, if it were full,
// permute would have been called to empty it. dsbyte also contains the
// first one bit for the padding. See the comment in the state struct.
d.a[d.n] ^= d.dsbyte
// This adds the final one bit for the padding. Because of the way that
// bits are numbered from the LSB upwards, the final bit is the MSB of
// the last byte.
d.a[d.rate-1] ^= 0x80
// Apply the permutation
d.permute()
d.state = spongeSqueezing
}
// Write absorbs more data into the hash's state. It panics if any
// output has already been read.
func (d *state) Write(p []byte) (n int, err error) {
if d.state != spongeAbsorbing {
panic("sha3: Write after Read")
}
n = len(p)
for len(p) > 0 {
x := subtle.XORBytes(d.a[d.n:d.rate], d.a[d.n:d.rate], p)
d.n += x
p = p[x:]
// If the sponge is full, apply the permutation.
if d.n == d.rate {
d.permute()
}
}
return
}
// Read squeezes an arbitrary number of bytes from the sponge.
func (d *state) Read(out []byte) (n int, err error) {
// If we're still absorbing, pad and apply the permutation.
if d.state == spongeAbsorbing {
d.padAndPermute()
}
n = len(out)
// Now, do the squeezing.
for len(out) > 0 {
// Apply the permutation if we've squeezed the sponge dry.
if d.n == d.rate {
d.permute()
}
x := copy(out, d.a[d.n:d.rate])
d.n += x
out = out[x:]
}
return
}
// Sum applies padding to the hash state and then squeezes out the desired
// number of output bytes. It panics if any output has already been read.
func (d *state) Sum(in []byte) []byte {
if d.state != spongeAbsorbing {
panic("sha3: Sum after Read")
}
// Make a copy of the original hash so that caller can keep writing
// and summing.
dup := d.clone()
hash := make([]byte, dup.outputLen, 64) // explicit cap to allow stack allocation
dup.Read(hash)
return append(in, hash...)
}
const (
magicSHA3 = "sha\x08"
magicShake = "sha\x09"
magicCShake = "sha\x0a"
magicKeccak = "sha\x0b"
// magic || rate || main state || n || sponge direction
marshaledSize = len(magicSHA3) + 1 + 200 + 1 + 1
)
func (d *state) MarshalBinary() ([]byte, error) {
return d.AppendBinary(make([]byte, 0, marshaledSize))
}
func (d *state) AppendBinary(b []byte) ([]byte, error) {
switch d.dsbyte {
case dsbyteSHA3:
b = append(b, magicSHA3...)
case dsbyteShake:
b = append(b, magicShake...)
case dsbyteCShake:
b = append(b, magicCShake...)
case dsbyteKeccak:
b = append(b, magicKeccak...)
default:
panic("unknown dsbyte")
}
// rate is at most 168, and n is at most rate.
b = append(b, byte(d.rate))
b = append(b, d.a[:]...)
b = append(b, byte(d.n), byte(d.state))
return b, nil
}
func (d *state) UnmarshalBinary(b []byte) error {
if len(b) != marshaledSize {
return errors.New("sha3: invalid hash state")
}
magic := string(b[:len(magicSHA3)])
b = b[len(magicSHA3):]
switch {
case magic == magicSHA3 && d.dsbyte == dsbyteSHA3:
case magic == magicShake && d.dsbyte == dsbyteShake:
case magic == magicCShake && d.dsbyte == dsbyteCShake:
case magic == magicKeccak && d.dsbyte == dsbyteKeccak:
default:
return errors.New("sha3: invalid hash state identifier")
}
rate := int(b[0])
b = b[1:]
if rate != d.rate {
return errors.New("sha3: invalid hash state function")
}
copy(d.a[:], b)
b = b[len(d.a):]
n, state := int(b[0]), spongeDirection(b[1])
if n > d.rate {
return errors.New("sha3: invalid hash state")
}
d.n = n
if state != spongeAbsorbing && state != spongeSqueezing {
return errors.New("sha3: invalid hash state")
}
d.state = state
return nil
}

View file

@ -1,210 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package keccak
// Tests include all the ShortMsgKATs provided by the Keccak team at
// https://github.com/gvanas/KeccakCodePackage
//
// They only include the zero-bit case of the bitwise testvectors
// published by NIST in the draft of FIPS-202.
import (
"bytes"
"compress/flate"
"encoding"
"encoding/hex"
"encoding/json"
"hash"
"math/rand"
"os"
"strings"
"testing"
)
const (
testString = "brekeccakkeccak koax koax"
katFilename = "testdata/keccakKats.json.deflate"
)
// testDigests contains functions returning hash.Hash instances
// with output-length equal to the KAT length for SHA-3, Keccak
// and SHAKE instances.
var testDigests = map[string]func() hash.Hash{
"Keccak-256": NewLegacyKeccak256,
"Keccak-512": NewLegacyKeccak512,
}
// decodeHex converts a hex-encoded string into a raw byte string.
func decodeHex(s string) []byte {
b, err := hex.DecodeString(s)
if err != nil {
panic(err)
}
return b
}
// structs used to marshal JSON test-cases.
type KeccakKats struct {
Kats map[string][]struct {
Digest string `json:"digest"`
Length int64 `json:"length"`
Message string `json:"message"`
// Defined only for cSHAKE
N string `json:"N"`
S string `json:"S"`
}
}
// TestKeccakKats tests the SHA-3 and Shake implementations against all the
// ShortMsgKATs from https://github.com/gvanas/KeccakCodePackage
// (The testvectors are stored in keccakKats.json.deflate due to their length.)
func TestKeccakKats(t *testing.T) {
// Read the KATs.
deflated, err := os.Open(katFilename)
if err != nil {
t.Errorf("error opening %s: %s", katFilename, err)
}
file := flate.NewReader(deflated)
dec := json.NewDecoder(file)
var katSet KeccakKats
err = dec.Decode(&katSet)
if err != nil {
t.Errorf("error decoding KATs: %s", err)
}
for algo, function := range testDigests {
d := function()
for _, kat := range katSet.Kats[algo] {
d.Reset()
in, err := hex.DecodeString(kat.Message)
if err != nil {
t.Errorf("error decoding KAT: %s", err)
}
d.Write(in[:kat.Length/8])
got := strings.ToUpper(hex.EncodeToString(d.Sum(nil)))
if got != kat.Digest {
t.Errorf("function=%s, length=%d\nmessage:\n %s\ngot:\n %s\nwanted:\n %s",
algo, kat.Length, kat.Message, got, kat.Digest)
t.Logf("wanted %+v", kat)
t.FailNow()
}
continue
}
}
}
// TestKeccak does a basic test of the non-standardized Keccak hash functions.
func TestKeccak(t *testing.T) {
tests := []struct {
fn func() hash.Hash
data []byte
want string
}{
{
NewLegacyKeccak256,
[]byte("abc"),
"4e03657aea45a94fc7d47ba826c8d667c0d1e6e33a64a036ec44f58fa12d6c45",
},
{
NewLegacyKeccak512,
[]byte("abc"),
"18587dc2ea106b9a1563e32b3312421ca164c7f1f07bc922a9c83d77cea3a1e5d0c69910739025372dc14ac9642629379540c17e2a65b19d77aa511a9d00bb96",
},
}
for _, u := range tests {
h := u.fn()
h.Write(u.data)
got := h.Sum(nil)
want := decodeHex(u.want)
if !bytes.Equal(got, want) {
t.Errorf("unexpected hash for size %d: got '%x' want '%s'", h.Size()*8, got, u.want)
}
}
}
// TestUnalignedWrite tests that writing data in an arbitrary pattern with
// small input buffers.
func TestUnalignedWrite(t *testing.T) {
buf := sequentialBytes(0x10000)
for alg, df := range testDigests {
d := df()
d.Reset()
d.Write(buf)
want := d.Sum(nil)
d.Reset()
for i := 0; i < len(buf); {
// Cycle through offsets which make a 137 byte sequence.
// Because 137 is prime this sequence should exercise all corner cases.
offsets := [17]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1}
for _, j := range offsets {
if v := len(buf) - i; v < j {
j = v
}
d.Write(buf[i : i+j])
i += j
}
}
got := d.Sum(nil)
if !bytes.Equal(got, want) {
t.Errorf("Unaligned writes, alg=%s\ngot %q, want %q", alg, got, want)
}
}
}
// sequentialBytes produces a buffer of size consecutive bytes 0x00, 0x01, ..., used for testing.
//
// The alignment of each slice is intentionally randomized to detect alignment
// issues in the implementation. See https://golang.org/issue/37644.
// Ideally, the compiler should fuzz the alignment itself.
// (See https://golang.org/issue/35128.)
func sequentialBytes(size int) []byte {
alignmentOffset := rand.Intn(8)
result := make([]byte, size+alignmentOffset)[alignmentOffset:]
for i := range result {
result[i] = byte(i)
}
return result
}
func TestMarshalUnmarshal(t *testing.T) {
t.Run("Keccak-256", func(t *testing.T) { testMarshalUnmarshal(t, NewLegacyKeccak256()) })
t.Run("Keccak-512", func(t *testing.T) { testMarshalUnmarshal(t, NewLegacyKeccak512()) })
}
// TODO(filippo): move this to crypto/internal/cryptotest.
func testMarshalUnmarshal(t *testing.T, h hash.Hash) {
buf := make([]byte, 200)
rand.Read(buf)
n := rand.Intn(200)
h.Write(buf)
want := h.Sum(nil)
h.Reset()
h.Write(buf[:n])
b, err := h.(encoding.BinaryMarshaler).MarshalBinary()
if err != nil {
t.Errorf("MarshalBinary: %v", err)
}
h.Write(bytes.Repeat([]byte{0}, 200))
if err := h.(encoding.BinaryUnmarshaler).UnmarshalBinary(b); err != nil {
t.Errorf("UnmarshalBinary: %v", err)
}
h.Write(buf[n:])
got := h.Sum(nil)
if !bytes.Equal(got, want) {
t.Errorf("got %x, want %x", got, want)
}
}
// BenchmarkPermutationFunction measures the speed of the permutation function
// with no input data.
func BenchmarkPermutationFunction(b *testing.B) {
b.SetBytes(int64(200))
var lanes [25]uint64
for i := 0; i < b.N; i++ {
keccakF1600(&lanes)
}
}

Binary file not shown.

View file

@ -0,0 +1,175 @@
//go:build ignore
// gen_keccakf_bmi2.go generates keccakf_amd64_bmi2.s — a BMI2-optimized
// Keccak-f[1600] permutation using RORXQ and ANDNQ.
// Fully unrolled (all 24 rounds).
//
// Key optimizations:
// - D values kept in registers (R14, R15, BP, SI, DX), not on stack
// - State alternates between the original array (DI) and a 200-byte stack
// buffer, avoiding a second 200-byte copy
// - Frame is only 200 bytes (25 × 8 for temp state)
// - Optional XOR-and-permute: when buf != nil, XORs rate bytes into state
// before permuting, eliminating one full memory pass
//
// Usage: go run gen_keccakf_bmi2.go
package main
import (
"fmt"
"os"
)
var rc = [24]uint64{
0x0000000000000001, 0x0000000000008082,
0x800000000000808a, 0x8000000080008000,
0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009,
0x000000000000008a, 0x0000000000000088,
0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b,
0x8000000000008089, 0x8000000000008003,
0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a,
0x8000000080008081, 0x8000000000008080,
0x0000000080000001, 0x8000000080008008,
}
type lane struct {
idx int // state lane index (024)
rot int // left-rotation amount
}
// Chi groups: each group reads 5 lanes (after theta+rho+pi)
// and produces 5 consecutive output lanes.
var groups = [5][5]lane{
{{0, 0}, {6, 44}, {12, 43}, {18, 21}, {24, 14}}, // → lanes 04
{{3, 28}, {9, 20}, {10, 3}, {16, 45}, {22, 61}}, // → lanes 59
{{1, 1}, {7, 6}, {13, 25}, {19, 8}, {20, 18}}, // → lanes 1014
{{4, 27}, {5, 36}, {11, 10}, {17, 15}, {23, 56}}, // → lanes 1519
{{2, 62}, {8, 55}, {14, 39}, {15, 41}, {21, 2}}, // → lanes 2024
}
// D-value registers, indexed by lane%5.
var dReg = [5]string{"R14", "R15", "BP", "SI", "DX"}
const (
fsize = 200
rateLanes = 17 // rate / 8 = 136 / 8 = 17 lanes
)
var p func(string, ...any)
func main() {
f, err := os.Create("keccakf_amd64_bmi2.s")
if err != nil {
panic(err)
}
defer f.Close()
p = func(format string, args ...any) { fmt.Fprintf(f, format+"\n", args...) }
p("// Code generated by gen_keccakf_bmi2.go. DO NOT EDIT.")
p("")
p("//go:build amd64 && !purego")
p("")
p("#include \"textflag.h\"")
p("")
// Single function: keccakF1600BMI2(a *[200]byte, buf *byte)
// When buf != nil, XORs rate bytes into state before permuting.
// When buf == nil, just permutes.
p("// func keccakF1600BMI2(a *[200]byte, buf *byte)")
p("TEXT ·keccakF1600BMI2(SB), NOSPLIT, $%d-16", fsize)
p("\tMOVQ a+0(FP), DI")
p("\tMOVQ buf+8(FP), BX")
p("\tTESTQ BX, BX")
p("\tJZ rounds")
p("")
p("\t// XOR %d lanes (%d bytes) of buf into state.", rateLanes, rateLanes*8)
for i := 0; i < rateLanes; i++ {
p("\tMOVQ %d(BX), AX", i*8)
p("\tXORQ AX, %d(DI)", i*8)
}
p("")
p("rounds:")
for round := 0; round < 24; round++ {
p("")
p("\t// Round %d", round)
srcArray := (round % 2) == 0
emitRound(srcArray, round)
}
p("\tRET")
}
// srcArray: true = source is array (DI), dest is stack (SP)
//
// false = source is stack (SP), dest is array (DI)
func emitRound(srcArray bool, round int) {
// Load round constant into R13.
p("\tMOVQ $0x%016x, R13", rc[round])
// Theta: 5 column parities → AX, BX, CX, DX, SI.
colR := [5]string{"AX", "BX", "CX", "DX", "SI"}
for c := 0; c < 5; c++ {
p("\tMOVQ %s, %s", off(c, srcArray), colR[c])
for r := 1; r < 5; r++ {
p("\tXORQ %s, %s", off(r*5+c, srcArray), colR[c])
}
}
// D values: D[x] = C[(x+4)%5] ^ rol(C[(x+1)%5], 1).
// D[0..2] go directly into R14, R15, BP (no conflicts).
for _, x := range []int{0, 1, 2} {
p("\tRORXQ $63, %s, %s", colR[(x+1)%5], dReg[x])
p("\tXORQ %s, %s", colR[(x+4)%5], dReg[x])
}
// D[3] and D[4] target SI and DX, which still hold column parities
// C[4] and C[3] needed as inputs, so compute via temps first.
p("\tRORXQ $63, SI, R8")
p("\tXORQ CX, R8")
p("\tRORXQ $63, AX, R9")
p("\tXORQ DX, R9")
p("\tMOVQ R8, SI") // SI = D[3]
p("\tMOVQ R9, DX") // DX = D[4]
// Five chi groups.
for g := 0; g < 5; g++ {
emitChi(g, srcArray, g == 0)
}
}
func emitChi(g int, srcArray, first bool) {
B := [5]string{"R8", "R9", "R10", "R11", "R12"}
// Load lane, XOR with D (register!), rotate.
for i := 0; i < 5; i++ {
l := groups[g][i]
p("\tMOVQ %s, %s", off(l.idx, srcArray), B[i])
p("\tXORQ %s, %s", dReg[l.idx%5], B[i])
if l.rot != 0 {
p("\tRORXQ $%d, %s, %s", 64-l.rot, B[i], B[i])
}
}
// Chi: out[j] = B[j] ^ (~B[(j+1)%5] & B[(j+2)%5]).
for j := 0; j < 5; j++ {
p("\tANDNQ %s, %s, AX", B[(j+2)%5], B[(j+1)%5])
p("\tXORQ %s, AX", B[j])
if first && j == 0 {
p("\tXORQ R13, AX")
}
p("\tMOVQ AX, %s", off(g*5+j, !srcArray))
}
}
// off returns the memory operand for lane idx.
func off(idx int, array bool) string {
o := idx * 8
if array {
return fmt.Sprintf("%d(DI)", o)
}
return fmt.Sprintf("%d(SP)", o)
}

View file

@ -21,6 +21,7 @@ package crypto
import (
"github.com/ProjectZKM/Ziren/crates/go-runtime/zkvm_runtime"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto/keccak"
)
// zirenKeccakState implements the KeccakState interface using the Ziren zkvm_runtime.
@ -31,7 +32,7 @@ type zirenKeccakState struct {
dirty bool // whether new data has been written since last hash
}
func newZirenKeccakState() KeccakState {
func newZirenKeccakState() keccak.KeccakState {
return &zirenKeccakState{
buf: make([]byte, 0, 512), // pre-allocate reasonable capacity
}
@ -85,7 +86,7 @@ func (s *zirenKeccakState) computeHashIfNeeded() {
// NewKeccakState creates a new KeccakState
// This uses a Ziren-optimized implementation that leverages the zkvm_runtime.Keccak256 system call.
func NewKeccakState() KeccakState {
func NewKeccakState() keccak.KeccakState {
return newZirenKeccakState()
}

View file

@ -22,13 +22,14 @@ import (
"sync"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/crypto/keccak"
"github.com/ethereum/go-ethereum/rlp"
)
// hasher is a type used for the trie Hash operation. A hasher has some
// internal preallocated temp space
type hasher struct {
sha crypto.KeccakState
sha keccak.KeccakState
tmp []byte
encbuf rlp.EncoderBuffer
parallel bool // Whether to use parallel threads when hashing