diff --git a/consensus/clique/clique.go b/consensus/clique/clique.go index f44afde241..70913092c9 100644 --- a/consensus/clique/clique.go +++ b/consensus/clique/clique.go @@ -37,7 +37,6 @@ import ( "github.com/ethereum/go-ethereum/core/types/bal" "github.com/ethereum/go-ethereum/core/vm" "github.com/ethereum/go-ethereum/crypto" - "github.com/ethereum/go-ethereum/crypto/keccak" "github.com/ethereum/go-ethereum/ethdb" "github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/params" @@ -627,9 +626,11 @@ func (c *Clique) Close() error { // SealHash returns the hash of a block prior to it being sealed. func SealHash(header *types.Header) (hash common.Hash) { - hasher := keccak.NewLegacyKeccak256() + hasher := crypto.NewKeccakState() + defer crypto.ReturnToPool(hasher) + encodeSigHeader(hasher, header) - hasher.(crypto.KeccakState).Read(hash[:]) + hasher.Sum(hash[:0]) return hash } diff --git a/core/types/bloom9.go b/core/types/bloom9.go index 1d57e8e4bc..7473426414 100644 --- a/core/types/bloom9.go +++ b/core/types/bloom9.go @@ -23,7 +23,7 @@ import ( "github.com/ethereum/go-ethereum/common/bitutil" "github.com/ethereum/go-ethereum/common/hexutil" - "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/crypto/keccak" ) type bytesBacked interface { @@ -141,7 +141,7 @@ func Bloom9(data []byte) []byte { // bloomValues returns the bytes (index-value pairs) to set for the given data func bloomValues(data []byte, hashbuf *[6]byte) (uint, byte, uint, byte, uint, byte) { - sha := hasherPool.Get().(crypto.KeccakState) + sha := hasherPool.Get().(keccak.KeccakState) sha.Reset() sha.Write(data) sha.Read(hashbuf[:]) diff --git a/core/types/hashing.go b/core/types/hashing.go index 98fe64e15a..2b50cf92b2 100644 --- a/core/types/hashing.go +++ b/core/types/hashing.go @@ -24,6 +24,7 @@ import ( "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/crypto/keccak" "github.com/ethereum/go-ethereum/rlp" ) @@ -55,7 +56,7 @@ func getPooledBuffer(size uint64) ([]byte, *bytes.Buffer, error) { // rlpHash encodes x and hashes the encoded bytes. func rlpHash(x interface{}) (h common.Hash) { - sha := hasherPool.Get().(crypto.KeccakState) + sha := hasherPool.Get().(keccak.KeccakState) defer hasherPool.Put(sha) sha.Reset() rlp.Encode(sha, x) @@ -66,7 +67,7 @@ func rlpHash(x interface{}) (h common.Hash) { // prefixedRlpHash writes the prefix into the hasher before rlp-encoding x. // It's used for typed transactions. func prefixedRlpHash(prefix byte, x interface{}) (h common.Hash) { - sha := hasherPool.Get().(crypto.KeccakState) + sha := hasherPool.Get().(keccak.KeccakState) defer hasherPool.Put(sha) sha.Reset() sha.Write([]byte{prefix}) diff --git a/crypto/crypto.go b/crypto/crypto.go index db6b6ee071..138630fe00 100644 --- a/crypto/crypto.go +++ b/crypto/crypto.go @@ -24,13 +24,13 @@ import ( "encoding/hex" "errors" "fmt" - "hash" "io" "math/big" "os" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/math" + "github.com/ethereum/go-ethereum/crypto/keccak" "github.com/ethereum/go-ethereum/rlp" ) @@ -59,16 +59,8 @@ type EllipticCurve interface { Unmarshal(data []byte) (x, y *big.Int) } -// KeccakState wraps sha3.state. In addition to the usual hash methods, it also supports -// Read to get a variable amount of data from the hash state. Read is faster than Sum -// because it doesn't copy the internal state, but also modifies the internal state. -type KeccakState interface { - hash.Hash - Read([]byte) (int, error) -} - // HashData hashes the provided data using the KeccakState and returns a 32 byte hash -func HashData(kh KeccakState, data []byte) (h common.Hash) { +func HashData(kh keccak.KeccakState, data []byte) (h common.Hash) { kh.Reset() kh.Write(data) kh.Read(h[:]) diff --git a/crypto/keccak.go b/crypto/keccak.go index 3fafddc92e..22298d334a 100644 --- a/crypto/keccak.go +++ b/crypto/keccak.go @@ -26,38 +26,40 @@ import ( ) // NewKeccakState creates a new KeccakState -func NewKeccakState() KeccakState { - return keccak.NewLegacyKeccak256().(KeccakState) +func NewKeccakState() keccak.KeccakState { + h := hasherPool.Get().(keccak.KeccakState) + h.Reset() + return h } +func ReturnToPool(h keccak.KeccakState) { hasherPool.Put(h) } + var hasherPool = sync.Pool{ New: func() any { - return keccak.NewLegacyKeccak256().(KeccakState) + return keccak.NewLegacyKeccak256() }, } // Keccak256 calculates and returns the Keccak256 hash of the input data. func Keccak256(data ...[]byte) []byte { b := make([]byte, 32) - d := hasherPool.Get().(KeccakState) - d.Reset() + d := NewKeccakState() for _, b := range data { d.Write(b) } d.Read(b) - hasherPool.Put(d) + ReturnToPool(d) return b } // Keccak256Hash calculates and returns the Keccak256 hash of the input data, // converting it to an internal Hash data structure. func Keccak256Hash(data ...[]byte) (h common.Hash) { - d := hasherPool.Get().(KeccakState) - d.Reset() + d := NewKeccakState() for _, b := range data { d.Write(b) } - d.Read(h[:]) - hasherPool.Put(d) + d.Read(h[:]) //nolint:errcheck + ReturnToPool(d) return h } diff --git a/crypto/keccak/hashes.go b/crypto/keccak/hashes.go deleted file mode 100644 index c78c5fe992..0000000000 --- a/crypto/keccak/hashes.go +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package keccak - -// This file provides functions for creating instances of the SHA-3 -// and SHAKE hash functions, as well as utility functions for hashing -// bytes. - -import ( - "hash" -) - -const ( - dsbyteSHA3 = 0b00000110 - dsbyteKeccak = 0b00000001 - dsbyteShake = 0b00011111 - dsbyteCShake = 0b00000100 - - // rateK[c] is the rate in bytes for Keccak[c] where c is the capacity in - // bits. Given the sponge size is 1600 bits, the rate is 1600 - c bits. - rateK256 = (1600 - 256) / 8 - rateK448 = (1600 - 448) / 8 - rateK512 = (1600 - 512) / 8 - rateK768 = (1600 - 768) / 8 - rateK1024 = (1600 - 1024) / 8 -) - -// NewLegacyKeccak256 creates a new Keccak-256 hash. -// -// Only use this function if you require compatibility with an existing cryptosystem -// that uses non-standard padding. All other users should use New256 instead. -func NewLegacyKeccak256() hash.Hash { - return &state{rate: rateK512, outputLen: 32, dsbyte: dsbyteKeccak} -} - -// NewLegacyKeccak512 creates a new Keccak-512 hash. -// -// Only use this function if you require compatibility with an existing cryptosystem -// that uses non-standard padding. All other users should use New512 instead. -func NewLegacyKeccak512() hash.Hash { - return &state{rate: rateK1024, outputLen: 64, dsbyte: dsbyteKeccak} -} diff --git a/crypto/keccak/keccaf_arm64.s b/crypto/keccak/keccaf_arm64.s new file mode 100644 index 0000000000..a5f4617fc0 --- /dev/null +++ b/crypto/keccak/keccaf_arm64.s @@ -0,0 +1,226 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !purego + +#include "textflag.h" + +// func keccakF1600Sha3(a *[200]byte, buf *byte) +// When buf != nil, XORs rate bytes into state before permuting. +// When buf == nil, just permutes. +TEXT ·keccakF1600Sha3(SB), $200-16 + MOVD a+0(FP), R0 + MOVD buf+8(FP), R3 + MOVD $round_consts<>(SB), R1 + MOVD $24, R2 // counter for loop + + CBZ R3, load_state + + // XOR path: load state and XOR with buf (17 lanes = 136 bytes) + VLD1.P 16(R0), [V0.D1, V1.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V0.B16, V0.B16 + VEOR V26.B16, V1.B16, V1.B16 + + VLD1.P 16(R0), [V2.D1, V3.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V2.B16, V2.B16 + VEOR V26.B16, V3.B16, V3.B16 + + VLD1.P 16(R0), [V4.D1, V5.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V4.B16, V4.B16 + VEOR V26.B16, V5.B16, V5.B16 + + VLD1.P 16(R0), [V6.D1, V7.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V6.B16, V6.B16 + VEOR V26.B16, V7.B16, V7.B16 + + VLD1.P 16(R0), [V8.D1, V9.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V8.B16, V8.B16 + VEOR V26.B16, V9.B16, V9.B16 + + VLD1.P 16(R0), [V10.D1, V11.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V10.B16, V10.B16 + VEOR V26.B16, V11.B16, V11.B16 + + VLD1.P 16(R0), [V12.D1, V13.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V12.B16, V12.B16 + VEOR V26.B16, V13.B16, V13.B16 + + VLD1.P 16(R0), [V14.D1, V15.D1] + VLD1.P 16(R3), [V25.D1, V26.D1] + VEOR V25.B16, V14.B16, V14.B16 + VEOR V26.B16, V15.B16, V15.B16 + + // Lane 16: last data lane (8 bytes at buf offset 128) + VLD1.P 16(R0), [V16.D1, V17.D1] + VLD1 (R3), [V25.D1] + VEOR V25.B16, V16.B16, V16.B16 + + // Remaining state lanes 18-24 (no data to XOR) + VLD1.P 16(R0), [V18.D1, V19.D1] + VLD1.P 16(R0), [V20.D1, V21.D1] + VLD1.P 16(R0), [V22.D1, V23.D1] + VLD1 (R0), [V24.D1] + + SUB $192, R0, R0 + B rounds + +load_state: + VLD1.P 16(R0), [V0.D1, V1.D1] + VLD1.P 16(R0), [V2.D1, V3.D1] + VLD1.P 16(R0), [V4.D1, V5.D1] + VLD1.P 16(R0), [V6.D1, V7.D1] + VLD1.P 16(R0), [V8.D1, V9.D1] + VLD1.P 16(R0), [V10.D1, V11.D1] + VLD1.P 16(R0), [V12.D1, V13.D1] + VLD1.P 16(R0), [V14.D1, V15.D1] + VLD1.P 16(R0), [V16.D1, V17.D1] + VLD1.P 16(R0), [V18.D1, V19.D1] + VLD1.P 16(R0), [V20.D1, V21.D1] + VLD1.P 16(R0), [V22.D1, V23.D1] + VLD1 (R0), [V24.D1] + + SUB $192, R0, R0 + +rounds: + // theta + VEOR3 V20.B16, V15.B16, V10.B16, V25.B16 + VEOR3 V21.B16, V16.B16, V11.B16, V26.B16 + VEOR3 V22.B16, V17.B16, V12.B16, V27.B16 + VEOR3 V23.B16, V18.B16, V13.B16, V28.B16 + VEOR3 V24.B16, V19.B16, V14.B16, V29.B16 + VEOR3 V25.B16, V5.B16, V0.B16, V25.B16 + VEOR3 V26.B16, V6.B16, V1.B16, V26.B16 + VEOR3 V27.B16, V7.B16, V2.B16, V27.B16 + VEOR3 V28.B16, V8.B16, V3.B16, V28.B16 + VEOR3 V29.B16, V9.B16, V4.B16, V29.B16 + + VRAX1 V27.D2, V25.D2, V30.D2 + VRAX1 V28.D2, V26.D2, V31.D2 + VRAX1 V29.D2, V27.D2, V27.D2 + VRAX1 V25.D2, V28.D2, V28.D2 + VRAX1 V26.D2, V29.D2, V29.D2 + + // theta and rho and Pi + VEOR V29.B16, V0.B16, V0.B16 + + VXAR $63, V30.D2, V1.D2, V25.D2 + + VXAR $20, V30.D2, V6.D2, V1.D2 + VXAR $44, V28.D2, V9.D2, V6.D2 + VXAR $3, V31.D2, V22.D2, V9.D2 + VXAR $25, V28.D2, V14.D2, V22.D2 + VXAR $46, V29.D2, V20.D2, V14.D2 + + VXAR $2, V31.D2, V2.D2, V26.D2 + + VXAR $21, V31.D2, V12.D2, V2.D2 + VXAR $39, V27.D2, V13.D2, V12.D2 + VXAR $56, V28.D2, V19.D2, V13.D2 + VXAR $8, V27.D2, V23.D2, V19.D2 + VXAR $23, V29.D2, V15.D2, V23.D2 + + VXAR $37, V28.D2, V4.D2, V15.D2 + + VXAR $50, V28.D2, V24.D2, V28.D2 + VXAR $62, V30.D2, V21.D2, V24.D2 + VXAR $9, V27.D2, V8.D2, V8.D2 + VXAR $19, V30.D2, V16.D2, V4.D2 + VXAR $28, V29.D2, V5.D2, V16.D2 + + VXAR $36, V27.D2, V3.D2, V5.D2 + + VXAR $43, V27.D2, V18.D2, V27.D2 + VXAR $49, V31.D2, V17.D2, V3.D2 + VXAR $54, V30.D2, V11.D2, V30.D2 + VXAR $58, V31.D2, V7.D2, V31.D2 + VXAR $61, V29.D2, V10.D2, V29.D2 + + // chi and iota + VBCAX V8.B16, V22.B16, V26.B16, V20.B16 + VBCAX V22.B16, V23.B16, V8.B16, V21.B16 + VBCAX V23.B16, V24.B16, V22.B16, V22.B16 + VBCAX V24.B16, V26.B16, V23.B16, V23.B16 + VBCAX V26.B16, V8.B16, V24.B16, V24.B16 + + VLD1R.P 8(R1), [V26.D2] + + VBCAX V3.B16, V19.B16, V30.B16, V17.B16 + VBCAX V19.B16, V15.B16, V3.B16, V18.B16 + VBCAX V15.B16, V16.B16, V19.B16, V19.B16 + VBCAX V16.B16, V30.B16, V15.B16, V15.B16 + VBCAX V30.B16, V3.B16, V16.B16, V16.B16 + + VBCAX V31.B16, V12.B16, V25.B16, V10.B16 + VBCAX V12.B16, V13.B16, V31.B16, V11.B16 + VBCAX V13.B16, V14.B16, V12.B16, V12.B16 + VBCAX V14.B16, V25.B16, V13.B16, V13.B16 + VBCAX V25.B16, V31.B16, V14.B16, V14.B16 + + VBCAX V4.B16, V9.B16, V29.B16, V7.B16 + VBCAX V9.B16, V5.B16, V4.B16, V8.B16 + VBCAX V5.B16, V6.B16, V9.B16, V9.B16 + VBCAX V6.B16, V29.B16, V5.B16, V5.B16 + VBCAX V29.B16, V4.B16, V6.B16, V6.B16 + + VBCAX V28.B16, V0.B16, V27.B16, V3.B16 + VBCAX V0.B16, V1.B16, V28.B16, V4.B16 + + VBCAX V1.B16, V2.B16, V0.B16, V0.B16 // iota (chi part) + + VBCAX V2.B16, V27.B16, V1.B16, V1.B16 + VBCAX V27.B16, V28.B16, V2.B16, V2.B16 + + VEOR V26.B16, V0.B16, V0.B16 // iota + + SUB $1, R2, R2 + CBNZ R2, rounds + + VST1.P [V0.D1, V1.D1], 16(R0) + VST1.P [V2.D1, V3.D1], 16(R0) + VST1.P [V4.D1, V5.D1], 16(R0) + VST1.P [V6.D1, V7.D1], 16(R0) + VST1.P [V8.D1, V9.D1], 16(R0) + VST1.P [V10.D1, V11.D1], 16(R0) + VST1.P [V12.D1, V13.D1], 16(R0) + VST1.P [V14.D1, V15.D1], 16(R0) + VST1.P [V16.D1, V17.D1], 16(R0) + VST1.P [V18.D1, V19.D1], 16(R0) + VST1.P [V20.D1, V21.D1], 16(R0) + VST1.P [V22.D1, V23.D1], 16(R0) + VST1 [V24.D1], (R0) + + RET + +DATA round_consts<>+0x00(SB)/8, $0x0000000000000001 +DATA round_consts<>+0x08(SB)/8, $0x0000000000008082 +DATA round_consts<>+0x10(SB)/8, $0x800000000000808a +DATA round_consts<>+0x18(SB)/8, $0x8000000080008000 +DATA round_consts<>+0x20(SB)/8, $0x000000000000808b +DATA round_consts<>+0x28(SB)/8, $0x0000000080000001 +DATA round_consts<>+0x30(SB)/8, $0x8000000080008081 +DATA round_consts<>+0x38(SB)/8, $0x8000000000008009 +DATA round_consts<>+0x40(SB)/8, $0x000000000000008a +DATA round_consts<>+0x48(SB)/8, $0x0000000000000088 +DATA round_consts<>+0x50(SB)/8, $0x0000000080008009 +DATA round_consts<>+0x58(SB)/8, $0x000000008000000a +DATA round_consts<>+0x60(SB)/8, $0x000000008000808b +DATA round_consts<>+0x68(SB)/8, $0x800000000000008b +DATA round_consts<>+0x70(SB)/8, $0x8000000000008089 +DATA round_consts<>+0x78(SB)/8, $0x8000000000008003 +DATA round_consts<>+0x80(SB)/8, $0x8000000000008002 +DATA round_consts<>+0x88(SB)/8, $0x8000000000000080 +DATA round_consts<>+0x90(SB)/8, $0x000000000000800a +DATA round_consts<>+0x98(SB)/8, $0x800000008000000a +DATA round_consts<>+0xA0(SB)/8, $0x8000000080008081 +DATA round_consts<>+0xA8(SB)/8, $0x8000000000008080 +DATA round_consts<>+0xB0(SB)/8, $0x0000000080000001 +DATA round_consts<>+0xB8(SB)/8, $0x8000000080008008 +GLOBL round_consts<>(SB), NOPTR|RODATA, $192 diff --git a/crypto/keccak/keccak.go b/crypto/keccak/keccak.go new file mode 100644 index 0000000000..97cd077312 --- /dev/null +++ b/crypto/keccak/keccak.go @@ -0,0 +1,20 @@ +// Package keccak provides Keccak-256 hashing with platform-specific acceleration. +package keccak + +import "hash" + +// KeccakState wraps the keccak hasher. In addition to the usual hash methods, it also supports +// Read to get a variable amount of data from the hash state. Read is faster than Sum +// because it doesn't copy the internal state, but also modifies the internal state. +type KeccakState interface { + hash.Hash + Read([]byte) (int, error) +} + +const rate = 136 // sponge rate for Keccak-256: (1600 - 2*256) / 8 + +var _ KeccakState = (*Hasher)(nil) + +func NewLegacyKeccak256() *Hasher { + return &Hasher{} +} diff --git a/crypto/keccak/keccak_arm64.go b/crypto/keccak/keccak_arm64.go new file mode 100644 index 0000000000..3b9c879fe2 --- /dev/null +++ b/crypto/keccak/keccak_arm64.go @@ -0,0 +1,30 @@ +//go:build arm64 && !purego + +package keccak + +import ( + "runtime" + + "golang.org/x/sys/cpu" +) + +// Apple Silicon always has Armv8.2-A SHA3 extensions (VEOR3, VRAX1, VXAR, VBCAX). +// On other ARM64 platforms, detect at runtime via CPU feature flags. +// When SHA3 is unavailable, falls back to x/crypto/sha3. +func init() { + useASM = runtime.GOOS == "darwin" || runtime.GOOS == "ios" || cpu.ARM64.HasSHA3 +} + +// keccakF1600Sha3 permutes state. When buf != nil, it first XORs rate bytes +// of buf into state, saving one full memory pass. +// +//go:noescape +func keccakF1600Sha3(a *[200]byte, buf *byte) + +func keccakF1600(a *[200]byte) { + keccakF1600Sha3(a, nil) +} + +func xorAndPermute(state *[200]byte, buf *byte) { + keccakF1600Sha3(state, buf) +} diff --git a/crypto/keccak/keccak_asm.go b/crypto/keccak/keccak_asm.go new file mode 100644 index 0000000000..37ce284f02 --- /dev/null +++ b/crypto/keccak/keccak_asm.go @@ -0,0 +1,233 @@ +//go:build (amd64 || arm64) && !purego + +package keccak + +import ( + "encoding/binary" + + "golang.org/x/crypto/sha3" +) + +// useASM is set by platform-specific init to indicate hardware acceleration is available. +// When false, Sum256 and Hasher fall back to x/crypto/sha3. +var useASM bool + +// sponge is the core Keccak-256 sponge state used by native (asm) implementations. +type sponge struct { + state [200]byte + buf [rate]byte + absorbed int + squeezing bool + readIdx int // index into state for next Read byte +} + +// Reset resets the sponge to its initial state. +func (s *sponge) Reset() { + s.state = [200]byte{} + s.absorbed = 0 + s.squeezing = false + s.readIdx = 0 +} + +// Write absorbs data into the sponge. +// Panics if called after Read. +func (s *sponge) Write(p []byte) (int, error) { + if s.squeezing { + panic("keccak: Write after Read") + } + n := len(p) + if s.absorbed > 0 { + x := copy(s.buf[s.absorbed:rate], p) + s.absorbed += x + p = p[x:] + if s.absorbed == rate { + xorAndPermute(&s.state, &s.buf[0]) + s.absorbed = 0 + } + } + + for len(p) >= rate { + xorAndPermute(&s.state, &p[0]) + p = p[rate:] + } + + if len(p) > 0 { + s.absorbed = copy(s.buf[:], p) + } + return n, nil +} + +// Sum256 finalizes and returns the 32-byte Keccak-256 digest. +// Does not modify the sponge state. +// Panics if called after Read. +func (s *sponge) Sum256() [32]byte { + if s.squeezing { + panic("keccak: Sum after Read") + } + state := s.state + xorIn(&state, s.buf[:s.absorbed]) + state[s.absorbed] ^= 0x01 + state[rate-1] ^= 0x80 + keccakF1600(&state) + return [32]byte(state[:32]) +} + +// Sum appends the current Keccak-256 digest to b and returns the resulting slice. +// Does not modify the sponge state. +func (s *sponge) Sum(b []byte) []byte { + d := s.Sum256() + return append(b, d[:]...) +} + +// Size returns the number of bytes Sum will produce (32). +func (s *sponge) Size() int { return 32 } + +// BlockSize returns the sponge rate in bytes (136). +func (s *sponge) BlockSize() int { return rate } + +// Read squeezes an arbitrary number of bytes from the sponge. +// On the first call, it pads and permutes, transitioning from absorbing to squeezing. +// Subsequent calls to Write will panic. It never returns an error. +func (s *sponge) Read(out []byte) (int, error) { + if !s.squeezing { + s.padAndSqueeze() + } + + n := len(out) + for len(out) > 0 { + x := copy(out, s.state[s.readIdx:rate]) + s.readIdx += x + out = out[x:] + if s.readIdx == rate { + keccakF1600(&s.state) + s.readIdx = 0 + } + } + return n, nil +} + +func (s *sponge) padAndSqueeze() { + xorIn(&s.state, s.buf[:s.absorbed]) + s.state[s.absorbed] ^= 0x01 + s.state[rate-1] ^= 0x80 + keccakF1600(&s.state) + s.squeezing = true + s.readIdx = 0 +} + +// sum256Sponge computes Keccak-256 in one shot using the assembly permutation. +func sum256Sponge(data []byte) [32]byte { + var state [200]byte + + for len(data) >= rate { + xorAndPermute(&state, &data[0]) + data = data[rate:] + } + + xorIn(&state, data) + state[len(data)] ^= 0x01 + state[rate-1] ^= 0x80 + keccakF1600(&state) + + return [32]byte(state[:32]) +} + +// Sum256 computes the Keccak-256 hash of data. Zero heap allocations when hardware +// acceleration is available. +func Sum256(data []byte) [32]byte { + if !useASM { + return sum256XCrypto(data) + } + return sum256Sponge(data) +} + +func sum256XCrypto(data []byte) [32]byte { + h := sha3.NewLegacyKeccak256() + h.Write(data) + var out [32]byte + h.Sum(out[:0]) + return out +} + +// Hasher is a streaming Keccak-256 hasher. +// Uses platform assembly when available, x/crypto/sha3 otherwise. +type Hasher struct { + sponge + xc KeccakState // x/crypto fallback +} + +// Reset resets the hasher to its initial state. +func (h *Hasher) Reset() { + if useASM { + h.sponge.Reset() + } else { + if h.xc == nil { + h.xc = sha3.NewLegacyKeccak256().(KeccakState) + } else { + h.xc.Reset() + } + } +} + +// Write absorbs data into the hasher. +// Panics if called after Read. +func (h *Hasher) Write(p []byte) (int, error) { + if !useASM { + if h.xc == nil { + h.xc = sha3.NewLegacyKeccak256().(KeccakState) + } + return h.xc.Write(p) + } + return h.sponge.Write(p) +} + +// Sum256 finalizes and returns the 32-byte Keccak-256 digest. +// Does not modify the hasher state. +func (h *Hasher) Sum256() [32]byte { + if !useASM { + if h.xc == nil { + return Sum256(nil) + } + var out [32]byte + h.xc.Sum(out[:0]) + return out + } + return h.sponge.Sum256() +} + +// Sum appends the current Keccak-256 digest to b and returns the resulting slice. +// Does not modify the hasher state. +func (h *Hasher) Sum(b []byte) []byte { + if !useASM { + if h.xc == nil { + d := Sum256(nil) + return append(b, d[:]...) + } + return h.xc.Sum(b) + } + return h.sponge.Sum(b) +} + +// Read squeezes an arbitrary number of bytes from the sponge. +// On the first call, it pads and permutes, transitioning from absorbing to squeezing. +// Subsequent calls to Write will panic. It never returns an error. +func (h *Hasher) Read(out []byte) (int, error) { + if !useASM { + if h.xc == nil { + h.xc = sha3.NewLegacyKeccak256().(KeccakState) + } + return h.xc.Read(out) + } + return h.sponge.Read(out) +} + +// xorIn XORs data into the first len(data) bytes of state using uint64 loads. +func xorIn(state *[200]byte, data []byte) { + for i := 0; i+8 <= len(data); i += 8 { + v := binary.LittleEndian.Uint64(state[i:]) ^ binary.LittleEndian.Uint64(data[i:]) + binary.LittleEndian.PutUint64(state[i:], v) + } + for i := len(data) &^ 7; i < len(data); i++ { + state[i] ^= data[i] + } +} diff --git a/crypto/keccak/keccak_default.go b/crypto/keccak/keccak_default.go new file mode 100644 index 0000000000..2e8eee382b --- /dev/null +++ b/crypto/keccak/keccak_default.go @@ -0,0 +1,71 @@ +//go:build (!arm64 && !amd64) || purego + +package keccak + +import ( + "golang.org/x/crypto/sha3" +) + +// Sum256 computes the Keccak-256 hash of data. +// On non-arm64 platforms, delegates to x/crypto/sha3.NewLegacyKeccak256(). +func Sum256(data []byte) [32]byte { + h := sha3.NewLegacyKeccak256() + h.Write(data) + var out [32]byte + h.Sum(out[:0]) + return out +} + +// Hasher is a streaming Keccak-256 hasher wrapping x/crypto/sha3. +type Hasher struct { + h KeccakState +} + +func (h *Hasher) init() { + if h.h == nil { + h.h = sha3.NewLegacyKeccak256().(KeccakState) + } +} + +// Reset resets the hasher to its initial state. +func (h *Hasher) Reset() { + h.init() + h.h.Reset() +} + +// Write absorbs data into the hasher. +// Panics if called after Read. +func (h *Hasher) Write(p []byte) (int, error) { + h.init() + return h.h.Write(p) +} + +// Sum256 finalizes and returns the 32-byte Keccak-256 digest. +// Does not modify the hasher state. +func (h *Hasher) Sum256() [32]byte { + h.init() + var out [32]byte + h.h.Sum(out[:0]) + return out +} + +// Sum appends the current Keccak-256 digest to b and returns the resulting slice. +// Does not modify the hasher state. +func (h *Hasher) Sum(b []byte) []byte { + h.init() + return h.h.Sum(b) +} + +// Size returns the number of bytes Sum will produce (32). +func (h *Hasher) Size() int { return 32 } + +// BlockSize returns the sponge rate in bytes (136). +func (h *Hasher) BlockSize() int { return rate } + +// Read squeezes an arbitrary number of bytes from the sponge. +// On the first call, it pads and permutes, transitioning from absorbing to squeezing. +// Subsequent calls to Write will panic. It never returns an error. +func (h *Hasher) Read(out []byte) (int, error) { + h.init() + return h.h.Read(out) +} diff --git a/crypto/keccak/keccak_test.go b/crypto/keccak/keccak_test.go new file mode 100644 index 0000000000..4a9875585c --- /dev/null +++ b/crypto/keccak/keccak_test.go @@ -0,0 +1,353 @@ +package keccak + +import ( + "bytes" + "encoding/hex" + "fmt" + "testing" + + "golang.org/x/crypto/sha3" +) + +func TestSum256Empty(t *testing.T) { + got := Sum256(nil) + // Known Keccak-256 of empty string. + want, _ := hex.DecodeString("c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470") + if !bytes.Equal(got[:], want) { + t.Fatalf("Sum256(nil) = %x, want %x", got, want) + } +} + +func TestSum256Hello(t *testing.T) { + got := Sum256([]byte("hello")) + want, _ := hex.DecodeString("1c8aff950685c2ed4bc3174f3472287b56d9517b9c948127319a09a7a36deac8") + if !bytes.Equal(got[:], want) { + t.Fatalf("Sum256(hello) = %x, want %x", got, want) + } +} + +func TestSum256LargeData(t *testing.T) { + // Test with data larger than one block (rate=136 bytes). + data := make([]byte, 500) + for i := range data { + data[i] = byte(i) + } + got := Sum256(data) + // Verify against streaming Hasher. + var h Hasher + h.Write(data) + want := h.Sum256() + if got != want { + t.Fatalf("Sum256 vs Hasher mismatch: %x vs %x", got, want) + } +} + +func TestHasherStreaming(t *testing.T) { + data := []byte("hello world, this is a longer test string for streaming keccak") + // All at once. + want := Sum256(data) + // Byte by byte. + var h Hasher + for _, b := range data { + h.Write([]byte{b}) + } + got := h.Sum256() + if got != want { + t.Fatalf("streaming byte-by-byte: %x vs %x", got, want) + } +} + +func TestHasherMultiBlock(t *testing.T) { + // Test with exactly 2 blocks + partial. + data := make([]byte, rate*2+50) + for i := range data { + data[i] = byte(i * 7) + } + want := Sum256(data) + // Write in chunks of 37 (not aligned to rate). + var h Hasher + for i := 0; i < len(data); i += 37 { + end := i + 37 + if end > len(data) { + end = len(data) + } + h.Write(data[i:end]) + } + got := h.Sum256() + if got != want { + t.Fatalf("multi-block streaming: %x vs %x", got, want) + } +} + +func TestReadMatchesSum256(t *testing.T) { + // Read of 32 bytes should produce the same result as Sum256. + data := []byte("hello") + var h Hasher + h.Write(data) + var got [32]byte + h.Read(got[:]) + want := Sum256(data) + if got != want { + t.Fatalf("Read(32) = %x, want %x", got, want) + } +} + +func TestReadMatchesXCrypto(t *testing.T) { + // Compare Read output against x/crypto/sha3 for various lengths. + for _, readLen := range []int{32, 64, 136, 200, 500} { + data := []byte("test data for read comparison") + ref := sha3.NewLegacyKeccak256() + ref.Write(data) + want := make([]byte, readLen) + ref.(KeccakState).Read(want) + + var h Hasher + h.Write(data) + got := make([]byte, readLen) + h.Read(got) + if !bytes.Equal(got, want) { + t.Fatalf("Read(%d) mismatch:\ngot: %x\nwant: %x", readLen, got, want) + } + } +} + +func TestReadMultipleCalls(t *testing.T) { + // Multiple Read calls should produce the same output as one large Read. + data := []byte("streaming read test") + + // One large read. + var h1 Hasher + h1.Write(data) + all := make([]byte, 300) + h1.Read(all) + + // Multiple small reads. + var h2 Hasher + h2.Write(data) + var parts []byte + for i := 0; i < 300; { + chunk := 37 + if i+chunk > 300 { + chunk = 300 - i + } + buf := make([]byte, chunk) + h2.Read(buf) + parts = append(parts, buf...) + i += chunk + } + if !bytes.Equal(all, parts) { + t.Fatalf("multi-read mismatch:\ngot: %x\nwant: %x", parts, all) + } +} + +func TestReadEmpty(t *testing.T) { + // Read from hasher with no data written. + ref := sha3.NewLegacyKeccak256() + want := make([]byte, 32) + ref.(KeccakState).Read(want) + + var h Hasher + got := make([]byte, 32) + h.Read(got) + if !bytes.Equal(got, want) { + t.Fatalf("Read empty mismatch:\ngot: %x\nwant: %x", got, want) + } +} + +func TestReadAfterReset(t *testing.T) { + var h Hasher + h.Write([]byte("first")) + h.Read(make([]byte, 32)) + + // Reset should allow Write again. + h.Reset() + h.Write([]byte("second")) + got := make([]byte, 32) + h.Read(got) + + want := Sum256([]byte("second")) + if !bytes.Equal(got, want[:]) { + t.Fatalf("Read after Reset mismatch:\ngot: %x\nwant: %x", got, want) + } +} + +func TestWriteAfterReadPanics(t *testing.T) { + defer func() { + if r := recover(); r == nil { + t.Fatal("expected panic on Write after Read") + } + }() + var h Hasher + h.Write([]byte("data")) + h.Read(make([]byte, 32)) + h.Write([]byte("more")) // should panic +} + +func FuzzSum256(f *testing.F) { + f.Add([]byte(nil)) + f.Add([]byte("hello")) + f.Add([]byte("hello world, this is a longer test string for streaming keccak")) + f.Add(make([]byte, rate)) + f.Add(make([]byte, rate+1)) + f.Add(make([]byte, rate*3+50)) + + f.Fuzz(func(t *testing.T, data []byte) { + // Reference: x/crypto NewLegacyKeccak256. + ref := sha3.NewLegacyKeccak256() + ref.Write(data) + want := ref.Sum(nil) + + // Test Sum256. + got := Sum256(data) + if !bytes.Equal(got[:], want) { + t.Fatalf("Sum256 mismatch for len=%d\ngot: %x\nwant: %x", len(data), got, want) + } + + // Test streaming Hasher (write all at once). + var h Hasher + h.Write(data) + gotH := h.Sum256() + if !bytes.Equal(gotH[:], want) { + t.Fatalf("Hasher mismatch for len=%d\ngot: %x\nwant: %x", len(data), gotH, want) + } + + // Test streaming Hasher (byte-by-byte). + h.Reset() + for _, b := range data { + h.Write([]byte{b}) + } + gotS := h.Sum256() + if !bytes.Equal(gotS[:], want) { + t.Fatalf("Hasher byte-by-byte mismatch for len=%d\ngot: %x\nwant: %x", len(data), gotS, want) + } + + // Test Read (32 bytes) matches Sum256. + h.Reset() + h.Write(data) + gotRead := make([]byte, 32) + h.Read(gotRead) + if !bytes.Equal(gotRead, want) { + t.Fatalf("Read(32) mismatch for len=%d\ngot: %x\nwant: %x", len(data), gotRead, want) + } + + // Test Read (extended output) matches x/crypto. + ref.Reset() + ref.Write(data) + wantExt := make([]byte, 200) + ref.(KeccakState).Read(wantExt) + + h.Reset() + h.Write(data) + gotExt := make([]byte, 200) + h.Read(gotExt) + if !bytes.Equal(gotExt, wantExt) { + t.Fatalf("Read(200) mismatch for len=%d\ngot: %x\nwant: %x", len(data), gotExt, wantExt) + } + }) +} + +// Comparison benchmarks: faster_keccak vs golang.org/x/crypto/sha3. +var benchSizes = []int{32, 128, 256, 1024, 4096, 500 * 1024} + +func benchName(size int) string { + if size >= 1024 { + return fmt.Sprintf("%dK", size/1024) + } + return fmt.Sprintf("%dB", size) +} + +// BenchmarkKeccak256Sum tests Sum256 with local faster_keccak implementation. +func BenchmarkKeccak256Sum(b *testing.B) { + for _, size := range benchSizes { + data := make([]byte, size) + for i := range data { + data[i] = byte(i) + } + b.Run("FasterKeccak/"+benchName(size), func(b *testing.B) { + b.SetBytes(int64(size)) + b.ReportAllocs() + for b.Loop() { + Sum256(data) + } + }) + } +} + +// BenchmarkKeccak256Stdlib tests Sum256 with golang.org/x/crypto/sha3 standard library. +func BenchmarkKeccak256Stdlib(b *testing.B) { + for _, size := range benchSizes { + data := make([]byte, size) + for i := range data { + data[i] = byte(i) + } + b.Run("StdLib/"+benchName(size), func(b *testing.B) { + b.SetBytes(int64(size)) + b.ReportAllocs() + h := sha3.NewLegacyKeccak256() + for b.Loop() { + h.Reset() + h.Write(data) + h.Sum(nil) + } + }) + } +} + +// BenchmarkKeccak256Hasher tests Hasher.Sum256() with local faster_keccak implementation. +func BenchmarkKeccak256Hasher(b *testing.B) { + for _, size := range benchSizes { + data := make([]byte, size) + for i := range data { + data[i] = byte(i) + } + b.Run("FasterKeccak/"+benchName(size), func(b *testing.B) { + b.SetBytes(int64(size)) + b.ReportAllocs() + var h Hasher + for b.Loop() { + h.Reset() + h.Write(data) + h.Sum256() + } + }) + } +} + +// BenchmarkKeccak256HasherStdlib tests Hasher API with golang.org/x/crypto/sha3 standard library. +func BenchmarkKeccak256HasherStdlib(b *testing.B) { + for _, size := range benchSizes { + data := make([]byte, size) + for i := range data { + data[i] = byte(i) + } + b.Run("StdLib/"+benchName(size), func(b *testing.B) { + b.SetBytes(int64(size)) + b.ReportAllocs() + h := sha3.NewLegacyKeccak256().(KeccakState) + var buf [32]byte + for b.Loop() { + h.Reset() + h.Write(data) + h.Read(buf[:]) + } + }) + } +} + +// BenchmarkKeccakStreaming benchmarks the streaming hasher (Reset+Write+Read). +// Use with benchstat: go test -bench=BenchmarkKeccakStreaming -benchmem ./... | benchstat +func BenchmarkKeccakStreaming(b *testing.B) { + data := make([]byte, 32) + for i := range data { + data[i] = byte(i) + } + var h Hasher + var buf [32]byte + b.SetBytes(int64(len(data))) + b.ReportAllocs() + for b.Loop() { + h.Reset() + h.Write(data) + h.Read(buf[:]) + } +} diff --git a/crypto/keccak/keccakf.go b/crypto/keccak/keccakf.go deleted file mode 100644 index 82694fa4a3..0000000000 --- a/crypto/keccak/keccakf.go +++ /dev/null @@ -1,414 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !amd64 || purego || !gc - -package keccak - -import "math/bits" - -// rc stores the round constants for use in the ι step. -var rc = [24]uint64{ - 0x0000000000000001, - 0x0000000000008082, - 0x800000000000808A, - 0x8000000080008000, - 0x000000000000808B, - 0x0000000080000001, - 0x8000000080008081, - 0x8000000000008009, - 0x000000000000008A, - 0x0000000000000088, - 0x0000000080008009, - 0x000000008000000A, - 0x000000008000808B, - 0x800000000000008B, - 0x8000000000008089, - 0x8000000000008003, - 0x8000000000008002, - 0x8000000000000080, - 0x000000000000800A, - 0x800000008000000A, - 0x8000000080008081, - 0x8000000000008080, - 0x0000000080000001, - 0x8000000080008008, -} - -// keccakF1600 applies the Keccak permutation to a 1600b-wide -// state represented as a slice of 25 uint64s. -func keccakF1600(a *[25]uint64) { - // Implementation translated from Keccak-inplace.c - // in the keccak reference code. - var t, bc0, bc1, bc2, bc3, bc4, d0, d1, d2, d3, d4 uint64 - - for i := 0; i < 24; i += 4 { - // Combines the 5 steps in each round into 2 steps. - // Unrolls 4 rounds per loop and spreads some steps across rounds. - - // Round 1 - bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] - bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] - bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] - bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] - bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] - d0 = bc4 ^ (bc1<<1 | bc1>>63) - d1 = bc0 ^ (bc2<<1 | bc2>>63) - d2 = bc1 ^ (bc3<<1 | bc3>>63) - d3 = bc2 ^ (bc4<<1 | bc4>>63) - d4 = bc3 ^ (bc0<<1 | bc0>>63) - - bc0 = a[0] ^ d0 - t = a[6] ^ d1 - bc1 = bits.RotateLeft64(t, 44) - t = a[12] ^ d2 - bc2 = bits.RotateLeft64(t, 43) - t = a[18] ^ d3 - bc3 = bits.RotateLeft64(t, 21) - t = a[24] ^ d4 - bc4 = bits.RotateLeft64(t, 14) - a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i] - a[6] = bc1 ^ (bc3 &^ bc2) - a[12] = bc2 ^ (bc4 &^ bc3) - a[18] = bc3 ^ (bc0 &^ bc4) - a[24] = bc4 ^ (bc1 &^ bc0) - - t = a[10] ^ d0 - bc2 = bits.RotateLeft64(t, 3) - t = a[16] ^ d1 - bc3 = bits.RotateLeft64(t, 45) - t = a[22] ^ d2 - bc4 = bits.RotateLeft64(t, 61) - t = a[3] ^ d3 - bc0 = bits.RotateLeft64(t, 28) - t = a[9] ^ d4 - bc1 = bits.RotateLeft64(t, 20) - a[10] = bc0 ^ (bc2 &^ bc1) - a[16] = bc1 ^ (bc3 &^ bc2) - a[22] = bc2 ^ (bc4 &^ bc3) - a[3] = bc3 ^ (bc0 &^ bc4) - a[9] = bc4 ^ (bc1 &^ bc0) - - t = a[20] ^ d0 - bc4 = bits.RotateLeft64(t, 18) - t = a[1] ^ d1 - bc0 = bits.RotateLeft64(t, 1) - t = a[7] ^ d2 - bc1 = bits.RotateLeft64(t, 6) - t = a[13] ^ d3 - bc2 = bits.RotateLeft64(t, 25) - t = a[19] ^ d4 - bc3 = bits.RotateLeft64(t, 8) - a[20] = bc0 ^ (bc2 &^ bc1) - a[1] = bc1 ^ (bc3 &^ bc2) - a[7] = bc2 ^ (bc4 &^ bc3) - a[13] = bc3 ^ (bc0 &^ bc4) - a[19] = bc4 ^ (bc1 &^ bc0) - - t = a[5] ^ d0 - bc1 = bits.RotateLeft64(t, 36) - t = a[11] ^ d1 - bc2 = bits.RotateLeft64(t, 10) - t = a[17] ^ d2 - bc3 = bits.RotateLeft64(t, 15) - t = a[23] ^ d3 - bc4 = bits.RotateLeft64(t, 56) - t = a[4] ^ d4 - bc0 = bits.RotateLeft64(t, 27) - a[5] = bc0 ^ (bc2 &^ bc1) - a[11] = bc1 ^ (bc3 &^ bc2) - a[17] = bc2 ^ (bc4 &^ bc3) - a[23] = bc3 ^ (bc0 &^ bc4) - a[4] = bc4 ^ (bc1 &^ bc0) - - t = a[15] ^ d0 - bc3 = bits.RotateLeft64(t, 41) - t = a[21] ^ d1 - bc4 = bits.RotateLeft64(t, 2) - t = a[2] ^ d2 - bc0 = bits.RotateLeft64(t, 62) - t = a[8] ^ d3 - bc1 = bits.RotateLeft64(t, 55) - t = a[14] ^ d4 - bc2 = bits.RotateLeft64(t, 39) - a[15] = bc0 ^ (bc2 &^ bc1) - a[21] = bc1 ^ (bc3 &^ bc2) - a[2] = bc2 ^ (bc4 &^ bc3) - a[8] = bc3 ^ (bc0 &^ bc4) - a[14] = bc4 ^ (bc1 &^ bc0) - - // Round 2 - bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] - bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] - bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] - bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] - bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] - d0 = bc4 ^ (bc1<<1 | bc1>>63) - d1 = bc0 ^ (bc2<<1 | bc2>>63) - d2 = bc1 ^ (bc3<<1 | bc3>>63) - d3 = bc2 ^ (bc4<<1 | bc4>>63) - d4 = bc3 ^ (bc0<<1 | bc0>>63) - - bc0 = a[0] ^ d0 - t = a[16] ^ d1 - bc1 = bits.RotateLeft64(t, 44) - t = a[7] ^ d2 - bc2 = bits.RotateLeft64(t, 43) - t = a[23] ^ d3 - bc3 = bits.RotateLeft64(t, 21) - t = a[14] ^ d4 - bc4 = bits.RotateLeft64(t, 14) - a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+1] - a[16] = bc1 ^ (bc3 &^ bc2) - a[7] = bc2 ^ (bc4 &^ bc3) - a[23] = bc3 ^ (bc0 &^ bc4) - a[14] = bc4 ^ (bc1 &^ bc0) - - t = a[20] ^ d0 - bc2 = bits.RotateLeft64(t, 3) - t = a[11] ^ d1 - bc3 = bits.RotateLeft64(t, 45) - t = a[2] ^ d2 - bc4 = bits.RotateLeft64(t, 61) - t = a[18] ^ d3 - bc0 = bits.RotateLeft64(t, 28) - t = a[9] ^ d4 - bc1 = bits.RotateLeft64(t, 20) - a[20] = bc0 ^ (bc2 &^ bc1) - a[11] = bc1 ^ (bc3 &^ bc2) - a[2] = bc2 ^ (bc4 &^ bc3) - a[18] = bc3 ^ (bc0 &^ bc4) - a[9] = bc4 ^ (bc1 &^ bc0) - - t = a[15] ^ d0 - bc4 = bits.RotateLeft64(t, 18) - t = a[6] ^ d1 - bc0 = bits.RotateLeft64(t, 1) - t = a[22] ^ d2 - bc1 = bits.RotateLeft64(t, 6) - t = a[13] ^ d3 - bc2 = bits.RotateLeft64(t, 25) - t = a[4] ^ d4 - bc3 = bits.RotateLeft64(t, 8) - a[15] = bc0 ^ (bc2 &^ bc1) - a[6] = bc1 ^ (bc3 &^ bc2) - a[22] = bc2 ^ (bc4 &^ bc3) - a[13] = bc3 ^ (bc0 &^ bc4) - a[4] = bc4 ^ (bc1 &^ bc0) - - t = a[10] ^ d0 - bc1 = bits.RotateLeft64(t, 36) - t = a[1] ^ d1 - bc2 = bits.RotateLeft64(t, 10) - t = a[17] ^ d2 - bc3 = bits.RotateLeft64(t, 15) - t = a[8] ^ d3 - bc4 = bits.RotateLeft64(t, 56) - t = a[24] ^ d4 - bc0 = bits.RotateLeft64(t, 27) - a[10] = bc0 ^ (bc2 &^ bc1) - a[1] = bc1 ^ (bc3 &^ bc2) - a[17] = bc2 ^ (bc4 &^ bc3) - a[8] = bc3 ^ (bc0 &^ bc4) - a[24] = bc4 ^ (bc1 &^ bc0) - - t = a[5] ^ d0 - bc3 = bits.RotateLeft64(t, 41) - t = a[21] ^ d1 - bc4 = bits.RotateLeft64(t, 2) - t = a[12] ^ d2 - bc0 = bits.RotateLeft64(t, 62) - t = a[3] ^ d3 - bc1 = bits.RotateLeft64(t, 55) - t = a[19] ^ d4 - bc2 = bits.RotateLeft64(t, 39) - a[5] = bc0 ^ (bc2 &^ bc1) - a[21] = bc1 ^ (bc3 &^ bc2) - a[12] = bc2 ^ (bc4 &^ bc3) - a[3] = bc3 ^ (bc0 &^ bc4) - a[19] = bc4 ^ (bc1 &^ bc0) - - // Round 3 - bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] - bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] - bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] - bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] - bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] - d0 = bc4 ^ (bc1<<1 | bc1>>63) - d1 = bc0 ^ (bc2<<1 | bc2>>63) - d2 = bc1 ^ (bc3<<1 | bc3>>63) - d3 = bc2 ^ (bc4<<1 | bc4>>63) - d4 = bc3 ^ (bc0<<1 | bc0>>63) - - bc0 = a[0] ^ d0 - t = a[11] ^ d1 - bc1 = bits.RotateLeft64(t, 44) - t = a[22] ^ d2 - bc2 = bits.RotateLeft64(t, 43) - t = a[8] ^ d3 - bc3 = bits.RotateLeft64(t, 21) - t = a[19] ^ d4 - bc4 = bits.RotateLeft64(t, 14) - a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+2] - a[11] = bc1 ^ (bc3 &^ bc2) - a[22] = bc2 ^ (bc4 &^ bc3) - a[8] = bc3 ^ (bc0 &^ bc4) - a[19] = bc4 ^ (bc1 &^ bc0) - - t = a[15] ^ d0 - bc2 = bits.RotateLeft64(t, 3) - t = a[1] ^ d1 - bc3 = bits.RotateLeft64(t, 45) - t = a[12] ^ d2 - bc4 = bits.RotateLeft64(t, 61) - t = a[23] ^ d3 - bc0 = bits.RotateLeft64(t, 28) - t = a[9] ^ d4 - bc1 = bits.RotateLeft64(t, 20) - a[15] = bc0 ^ (bc2 &^ bc1) - a[1] = bc1 ^ (bc3 &^ bc2) - a[12] = bc2 ^ (bc4 &^ bc3) - a[23] = bc3 ^ (bc0 &^ bc4) - a[9] = bc4 ^ (bc1 &^ bc0) - - t = a[5] ^ d0 - bc4 = bits.RotateLeft64(t, 18) - t = a[16] ^ d1 - bc0 = bits.RotateLeft64(t, 1) - t = a[2] ^ d2 - bc1 = bits.RotateLeft64(t, 6) - t = a[13] ^ d3 - bc2 = bits.RotateLeft64(t, 25) - t = a[24] ^ d4 - bc3 = bits.RotateLeft64(t, 8) - a[5] = bc0 ^ (bc2 &^ bc1) - a[16] = bc1 ^ (bc3 &^ bc2) - a[2] = bc2 ^ (bc4 &^ bc3) - a[13] = bc3 ^ (bc0 &^ bc4) - a[24] = bc4 ^ (bc1 &^ bc0) - - t = a[20] ^ d0 - bc1 = bits.RotateLeft64(t, 36) - t = a[6] ^ d1 - bc2 = bits.RotateLeft64(t, 10) - t = a[17] ^ d2 - bc3 = bits.RotateLeft64(t, 15) - t = a[3] ^ d3 - bc4 = bits.RotateLeft64(t, 56) - t = a[14] ^ d4 - bc0 = bits.RotateLeft64(t, 27) - a[20] = bc0 ^ (bc2 &^ bc1) - a[6] = bc1 ^ (bc3 &^ bc2) - a[17] = bc2 ^ (bc4 &^ bc3) - a[3] = bc3 ^ (bc0 &^ bc4) - a[14] = bc4 ^ (bc1 &^ bc0) - - t = a[10] ^ d0 - bc3 = bits.RotateLeft64(t, 41) - t = a[21] ^ d1 - bc4 = bits.RotateLeft64(t, 2) - t = a[7] ^ d2 - bc0 = bits.RotateLeft64(t, 62) - t = a[18] ^ d3 - bc1 = bits.RotateLeft64(t, 55) - t = a[4] ^ d4 - bc2 = bits.RotateLeft64(t, 39) - a[10] = bc0 ^ (bc2 &^ bc1) - a[21] = bc1 ^ (bc3 &^ bc2) - a[7] = bc2 ^ (bc4 &^ bc3) - a[18] = bc3 ^ (bc0 &^ bc4) - a[4] = bc4 ^ (bc1 &^ bc0) - - // Round 4 - bc0 = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20] - bc1 = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21] - bc2 = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22] - bc3 = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23] - bc4 = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24] - d0 = bc4 ^ (bc1<<1 | bc1>>63) - d1 = bc0 ^ (bc2<<1 | bc2>>63) - d2 = bc1 ^ (bc3<<1 | bc3>>63) - d3 = bc2 ^ (bc4<<1 | bc4>>63) - d4 = bc3 ^ (bc0<<1 | bc0>>63) - - bc0 = a[0] ^ d0 - t = a[1] ^ d1 - bc1 = bits.RotateLeft64(t, 44) - t = a[2] ^ d2 - bc2 = bits.RotateLeft64(t, 43) - t = a[3] ^ d3 - bc3 = bits.RotateLeft64(t, 21) - t = a[4] ^ d4 - bc4 = bits.RotateLeft64(t, 14) - a[0] = bc0 ^ (bc2 &^ bc1) ^ rc[i+3] - a[1] = bc1 ^ (bc3 &^ bc2) - a[2] = bc2 ^ (bc4 &^ bc3) - a[3] = bc3 ^ (bc0 &^ bc4) - a[4] = bc4 ^ (bc1 &^ bc0) - - t = a[5] ^ d0 - bc2 = bits.RotateLeft64(t, 3) - t = a[6] ^ d1 - bc3 = bits.RotateLeft64(t, 45) - t = a[7] ^ d2 - bc4 = bits.RotateLeft64(t, 61) - t = a[8] ^ d3 - bc0 = bits.RotateLeft64(t, 28) - t = a[9] ^ d4 - bc1 = bits.RotateLeft64(t, 20) - a[5] = bc0 ^ (bc2 &^ bc1) - a[6] = bc1 ^ (bc3 &^ bc2) - a[7] = bc2 ^ (bc4 &^ bc3) - a[8] = bc3 ^ (bc0 &^ bc4) - a[9] = bc4 ^ (bc1 &^ bc0) - - t = a[10] ^ d0 - bc4 = bits.RotateLeft64(t, 18) - t = a[11] ^ d1 - bc0 = bits.RotateLeft64(t, 1) - t = a[12] ^ d2 - bc1 = bits.RotateLeft64(t, 6) - t = a[13] ^ d3 - bc2 = bits.RotateLeft64(t, 25) - t = a[14] ^ d4 - bc3 = bits.RotateLeft64(t, 8) - a[10] = bc0 ^ (bc2 &^ bc1) - a[11] = bc1 ^ (bc3 &^ bc2) - a[12] = bc2 ^ (bc4 &^ bc3) - a[13] = bc3 ^ (bc0 &^ bc4) - a[14] = bc4 ^ (bc1 &^ bc0) - - t = a[15] ^ d0 - bc1 = bits.RotateLeft64(t, 36) - t = a[16] ^ d1 - bc2 = bits.RotateLeft64(t, 10) - t = a[17] ^ d2 - bc3 = bits.RotateLeft64(t, 15) - t = a[18] ^ d3 - bc4 = bits.RotateLeft64(t, 56) - t = a[19] ^ d4 - bc0 = bits.RotateLeft64(t, 27) - a[15] = bc0 ^ (bc2 &^ bc1) - a[16] = bc1 ^ (bc3 &^ bc2) - a[17] = bc2 ^ (bc4 &^ bc3) - a[18] = bc3 ^ (bc0 &^ bc4) - a[19] = bc4 ^ (bc1 &^ bc0) - - t = a[20] ^ d0 - bc3 = bits.RotateLeft64(t, 41) - t = a[21] ^ d1 - bc4 = bits.RotateLeft64(t, 2) - t = a[22] ^ d2 - bc0 = bits.RotateLeft64(t, 62) - t = a[23] ^ d3 - bc1 = bits.RotateLeft64(t, 55) - t = a[24] ^ d4 - bc2 = bits.RotateLeft64(t, 39) - a[20] = bc0 ^ (bc2 &^ bc1) - a[21] = bc1 ^ (bc3 &^ bc2) - a[22] = bc2 ^ (bc4 &^ bc3) - a[23] = bc3 ^ (bc0 &^ bc4) - a[24] = bc4 ^ (bc1 &^ bc0) - } -} diff --git a/crypto/keccak/keccakf_amd64.go b/crypto/keccak/keccakf_amd64.go index cb6eca44c3..b4565dd58e 100644 --- a/crypto/keccak/keccakf_amd64.go +++ b/crypto/keccak/keccakf_amd64.go @@ -1,13 +1,21 @@ -// Copyright 2015 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build amd64 && !purego && gc +//go:build amd64 && !purego package keccak -// This function is implemented in keccakf_amd64.s. +import "golang.org/x/sys/cpu" +func init() { useASM = cpu.X86.HasBMI1 && cpu.X86.HasBMI2 } + +// keccakF1600BMI2 permutes state. When buf != nil, it first XORs rate bytes +// of buf into state, saving one full memory pass. +// //go:noescape +func keccakF1600BMI2(a *[200]byte, buf *byte) -func keccakF1600(a *[25]uint64) +func keccakF1600(a *[200]byte) { + keccakF1600BMI2(a, nil) +} + +func xorAndPermute(state *[200]byte, buf *byte) { + keccakF1600BMI2(state, buf) +} diff --git a/crypto/keccak/keccakf_amd64.s b/crypto/keccak/keccakf_amd64.s deleted file mode 100644 index 99e2f16e97..0000000000 --- a/crypto/keccak/keccakf_amd64.s +++ /dev/null @@ -1,5419 +0,0 @@ -// Code generated by command: go run keccakf_amd64_asm.go -out ../keccakf_amd64.s -pkg sha3. DO NOT EDIT. - -//go:build amd64 && !purego && gc - -// func keccakF1600(a *[25]uint64) -TEXT ·keccakF1600(SB), $200-8 - MOVQ a+0(FP), DI - - // Convert the user state into an internal state - NOTQ 8(DI) - NOTQ 16(DI) - NOTQ 64(DI) - NOTQ 96(DI) - NOTQ 136(DI) - NOTQ 160(DI) - - // Execute the KeccakF permutation - MOVQ (DI), SI - MOVQ 8(DI), BP - MOVQ 32(DI), R15 - XORQ 40(DI), SI - XORQ 48(DI), BP - XORQ 72(DI), R15 - XORQ 80(DI), SI - XORQ 88(DI), BP - XORQ 112(DI), R15 - XORQ 120(DI), SI - XORQ 128(DI), BP - XORQ 152(DI), R15 - XORQ 160(DI), SI - XORQ 168(DI), BP - MOVQ 176(DI), DX - MOVQ 184(DI), R8 - XORQ 192(DI), R15 - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x0000000000000001, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x0000000000008082, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x800000000000808a, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000080008000, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x000000000000808b, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x0000000080000001, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000080008081, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000000008009, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x000000000000008a, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x0000000000000088, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x0000000080008009, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x000000008000000a, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x000000008000808b, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x800000000000008b, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000000008089, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000000008003, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000000008002, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000000000080, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x000000000000800a, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x800000008000000a, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000080008081, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000000008080, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - MOVQ R12, BP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - XORQ R10, R15 - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - XORQ R11, R15 - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(DI), R12 - XORQ 56(DI), DX - XORQ R15, BX - XORQ 96(DI), R12 - XORQ 136(DI), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(DI), R13 - XORQ 64(DI), R8 - XORQ SI, CX - XORQ 104(DI), R13 - XORQ 144(DI), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (DI), R10 - MOVQ 48(DI), R11 - XORQ R13, R9 - MOVQ 96(DI), R12 - MOVQ 144(DI), R13 - MOVQ 192(DI), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x0000000080000001, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (SP) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(SP) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(SP) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(SP) - MOVQ R12, 8(SP) - MOVQ R12, BP - - // Result g - MOVQ 72(DI), R11 - XORQ R9, R11 - MOVQ 80(DI), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(DI), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(DI), R13 - MOVQ 176(DI), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(SP) - XORQ AX, SI - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(SP) - XORQ AX, BP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(SP) - NOTQ R14 - XORQ R10, R15 - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(SP) - - // Result k - MOVQ 8(DI), R10 - MOVQ 56(DI), R11 - MOVQ 104(DI), R12 - MOVQ 152(DI), R13 - MOVQ 160(DI), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(SP) - XORQ AX, SI - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(SP) - XORQ AX, BP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(SP) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(SP) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(SP) - XORQ R10, R15 - - // Result m - MOVQ 40(DI), R11 - XORQ BX, R11 - MOVQ 88(DI), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(DI), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(DI), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(DI), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(SP) - XORQ AX, SI - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(SP) - XORQ AX, BP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(SP) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(SP) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(SP) - XORQ R11, R15 - - // Result s - MOVQ 16(DI), R10 - MOVQ 64(DI), R11 - MOVQ 112(DI), R12 - XORQ DX, R10 - MOVQ 120(DI), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(DI), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(SP) - ROLQ $0x27, R12 - XORQ R9, R15 - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(SP) - XORQ BX, SI - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(SP) - XORQ CX, BP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(SP) - MOVQ R8, 184(SP) - - // Prepare round - MOVQ BP, BX - ROLQ $0x01, BX - MOVQ 16(SP), R12 - XORQ 56(SP), DX - XORQ R15, BX - XORQ 96(SP), R12 - XORQ 136(SP), DX - XORQ DX, R12 - MOVQ R12, CX - ROLQ $0x01, CX - MOVQ 24(SP), R13 - XORQ 64(SP), R8 - XORQ SI, CX - XORQ 104(SP), R13 - XORQ 144(SP), R8 - XORQ R8, R13 - MOVQ R13, DX - ROLQ $0x01, DX - MOVQ R15, R8 - XORQ BP, DX - ROLQ $0x01, R8 - MOVQ SI, R9 - XORQ R12, R8 - ROLQ $0x01, R9 - - // Result b - MOVQ (SP), R10 - MOVQ 48(SP), R11 - XORQ R13, R9 - MOVQ 96(SP), R12 - MOVQ 144(SP), R13 - MOVQ 192(SP), R14 - XORQ CX, R11 - ROLQ $0x2c, R11 - XORQ DX, R12 - XORQ BX, R10 - ROLQ $0x2b, R12 - MOVQ R11, SI - MOVQ $0x8000000080008008, AX - ORQ R12, SI - XORQ R10, AX - XORQ AX, SI - MOVQ SI, (DI) - XORQ R9, R14 - ROLQ $0x0e, R14 - MOVQ R10, R15 - ANDQ R11, R15 - XORQ R14, R15 - MOVQ R15, 32(DI) - XORQ R8, R13 - ROLQ $0x15, R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 16(DI) - NOTQ R12 - ORQ R10, R14 - ORQ R13, R12 - XORQ R13, R14 - XORQ R11, R12 - MOVQ R14, 24(DI) - MOVQ R12, 8(DI) - NOP - - // Result g - MOVQ 72(SP), R11 - XORQ R9, R11 - MOVQ 80(SP), R12 - ROLQ $0x14, R11 - XORQ BX, R12 - ROLQ $0x03, R12 - MOVQ 24(SP), R10 - MOVQ R11, AX - ORQ R12, AX - XORQ R8, R10 - MOVQ 128(SP), R13 - MOVQ 176(SP), R14 - ROLQ $0x1c, R10 - XORQ R10, AX - MOVQ AX, 40(DI) - NOP - XORQ CX, R13 - ROLQ $0x2d, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 48(DI) - NOP - XORQ DX, R14 - ROLQ $0x3d, R14 - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 64(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 72(DI) - NOTQ R14 - NOP - ORQ R14, R13 - XORQ R12, R13 - MOVQ R13, 56(DI) - - // Result k - MOVQ 8(SP), R10 - MOVQ 56(SP), R11 - MOVQ 104(SP), R12 - MOVQ 152(SP), R13 - MOVQ 160(SP), R14 - XORQ DX, R11 - ROLQ $0x06, R11 - XORQ R8, R12 - ROLQ $0x19, R12 - MOVQ R11, AX - ORQ R12, AX - XORQ CX, R10 - ROLQ $0x01, R10 - XORQ R10, AX - MOVQ AX, 80(DI) - NOP - XORQ R9, R13 - ROLQ $0x08, R13 - MOVQ R12, AX - ANDQ R13, AX - XORQ R11, AX - MOVQ AX, 88(DI) - NOP - XORQ BX, R14 - ROLQ $0x12, R14 - NOTQ R13 - MOVQ R13, AX - ANDQ R14, AX - XORQ R12, AX - MOVQ AX, 96(DI) - MOVQ R14, AX - ORQ R10, AX - XORQ R13, AX - MOVQ AX, 104(DI) - ANDQ R11, R10 - XORQ R14, R10 - MOVQ R10, 112(DI) - NOP - - // Result m - MOVQ 40(SP), R11 - XORQ BX, R11 - MOVQ 88(SP), R12 - ROLQ $0x24, R11 - XORQ CX, R12 - MOVQ 32(SP), R10 - ROLQ $0x0a, R12 - MOVQ R11, AX - MOVQ 136(SP), R13 - ANDQ R12, AX - XORQ R9, R10 - MOVQ 184(SP), R14 - ROLQ $0x1b, R10 - XORQ R10, AX - MOVQ AX, 120(DI) - NOP - XORQ DX, R13 - ROLQ $0x0f, R13 - MOVQ R12, AX - ORQ R13, AX - XORQ R11, AX - MOVQ AX, 128(DI) - NOP - XORQ R8, R14 - ROLQ $0x38, R14 - NOTQ R13 - MOVQ R13, AX - ORQ R14, AX - XORQ R12, AX - MOVQ AX, 136(DI) - ORQ R10, R11 - XORQ R14, R11 - MOVQ R11, 152(DI) - ANDQ R10, R14 - XORQ R13, R14 - MOVQ R14, 144(DI) - NOP - - // Result s - MOVQ 16(SP), R10 - MOVQ 64(SP), R11 - MOVQ 112(SP), R12 - XORQ DX, R10 - MOVQ 120(SP), R13 - ROLQ $0x3e, R10 - XORQ R8, R11 - MOVQ 168(SP), R14 - ROLQ $0x37, R11 - XORQ R9, R12 - MOVQ R10, R9 - XORQ CX, R14 - ROLQ $0x02, R14 - ANDQ R11, R9 - XORQ R14, R9 - MOVQ R9, 192(DI) - ROLQ $0x27, R12 - NOP - NOTQ R11 - XORQ BX, R13 - MOVQ R11, BX - ANDQ R12, BX - XORQ R10, BX - MOVQ BX, 160(DI) - NOP - ROLQ $0x29, R13 - MOVQ R12, CX - ORQ R13, CX - XORQ R11, CX - MOVQ CX, 168(DI) - NOP - MOVQ R13, DX - MOVQ R14, R8 - ANDQ R14, DX - ORQ R10, R8 - XORQ R12, DX - XORQ R13, R8 - MOVQ DX, 176(DI) - MOVQ R8, 184(DI) - - // Revert the internal state to the user state - NOTQ 8(DI) - NOTQ 16(DI) - NOTQ 64(DI) - NOTQ 96(DI) - NOTQ 136(DI) - NOTQ 160(DI) - RET diff --git a/crypto/keccak/keccakf_amd64_bmi2.s b/crypto/keccak/keccakf_amd64_bmi2.s new file mode 100644 index 0000000000..f38381bfff --- /dev/null +++ b/crypto/keccak/keccakf_amd64_bmi2.s @@ -0,0 +1,4611 @@ +// Code generated by gen_keccakf_bmi2.go. DO NOT EDIT. + +//go:build amd64 && !purego + +#include "textflag.h" + +// func keccakF1600BMI2(a *[200]byte, buf *byte) +TEXT ·keccakF1600BMI2(SB), NOSPLIT, $200-16 + MOVQ a+0(FP), DI + MOVQ buf+8(FP), BX + TESTQ BX, BX + JZ rounds + + // XOR 17 lanes (136 bytes) of buf into state. + MOVQ 0(BX), AX + XORQ AX, 0(DI) + MOVQ 8(BX), AX + XORQ AX, 8(DI) + MOVQ 16(BX), AX + XORQ AX, 16(DI) + MOVQ 24(BX), AX + XORQ AX, 24(DI) + MOVQ 32(BX), AX + XORQ AX, 32(DI) + MOVQ 40(BX), AX + XORQ AX, 40(DI) + MOVQ 48(BX), AX + XORQ AX, 48(DI) + MOVQ 56(BX), AX + XORQ AX, 56(DI) + MOVQ 64(BX), AX + XORQ AX, 64(DI) + MOVQ 72(BX), AX + XORQ AX, 72(DI) + MOVQ 80(BX), AX + XORQ AX, 80(DI) + MOVQ 88(BX), AX + XORQ AX, 88(DI) + MOVQ 96(BX), AX + XORQ AX, 96(DI) + MOVQ 104(BX), AX + XORQ AX, 104(DI) + MOVQ 112(BX), AX + XORQ AX, 112(DI) + MOVQ 120(BX), AX + XORQ AX, 120(DI) + MOVQ 128(BX), AX + XORQ AX, 128(DI) + +rounds: + + // Round 0 + MOVQ $0x0000000000000001, R13 + MOVQ 0(DI), AX + XORQ 40(DI), AX + XORQ 80(DI), AX + XORQ 120(DI), AX + XORQ 160(DI), AX + MOVQ 8(DI), BX + XORQ 48(DI), BX + XORQ 88(DI), BX + XORQ 128(DI), BX + XORQ 168(DI), BX + MOVQ 16(DI), CX + XORQ 56(DI), CX + XORQ 96(DI), CX + XORQ 136(DI), CX + XORQ 176(DI), CX + MOVQ 24(DI), DX + XORQ 64(DI), DX + XORQ 104(DI), DX + XORQ 144(DI), DX + XORQ 184(DI), DX + MOVQ 32(DI), SI + XORQ 72(DI), SI + XORQ 112(DI), SI + XORQ 152(DI), SI + XORQ 192(DI), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(DI), R8 + XORQ R14, R8 + MOVQ 48(DI), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(DI), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(DI), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(DI), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(SP) + MOVQ 24(DI), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(DI), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(DI), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(DI), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(DI), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(SP) + MOVQ 8(DI), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(DI), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(DI), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(DI), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(DI), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(SP) + MOVQ 32(DI), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(DI), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(DI), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(DI), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(DI), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(SP) + MOVQ 16(DI), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(DI), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(DI), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(DI), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(DI), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(SP) + + // Round 1 + MOVQ $0x0000000000008082, R13 + MOVQ 0(SP), AX + XORQ 40(SP), AX + XORQ 80(SP), AX + XORQ 120(SP), AX + XORQ 160(SP), AX + MOVQ 8(SP), BX + XORQ 48(SP), BX + XORQ 88(SP), BX + XORQ 128(SP), BX + XORQ 168(SP), BX + MOVQ 16(SP), CX + XORQ 56(SP), CX + XORQ 96(SP), CX + XORQ 136(SP), CX + XORQ 176(SP), CX + MOVQ 24(SP), DX + XORQ 64(SP), DX + XORQ 104(SP), DX + XORQ 144(SP), DX + XORQ 184(SP), DX + MOVQ 32(SP), SI + XORQ 72(SP), SI + XORQ 112(SP), SI + XORQ 152(SP), SI + XORQ 192(SP), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(SP), R8 + XORQ R14, R8 + MOVQ 48(SP), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(SP), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(SP), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(SP), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(DI) + MOVQ 24(SP), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(SP), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(SP), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(SP), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(SP), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(DI) + MOVQ 8(SP), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(SP), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(SP), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(SP), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(SP), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(DI) + MOVQ 32(SP), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(SP), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(SP), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(SP), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(SP), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(DI) + MOVQ 16(SP), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(SP), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(SP), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(SP), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(SP), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(DI) + + // Round 2 + MOVQ $0x800000000000808a, R13 + MOVQ 0(DI), AX + XORQ 40(DI), AX + XORQ 80(DI), AX + XORQ 120(DI), AX + XORQ 160(DI), AX + MOVQ 8(DI), BX + XORQ 48(DI), BX + XORQ 88(DI), BX + XORQ 128(DI), BX + XORQ 168(DI), BX + MOVQ 16(DI), CX + XORQ 56(DI), CX + XORQ 96(DI), CX + XORQ 136(DI), CX + XORQ 176(DI), CX + MOVQ 24(DI), DX + XORQ 64(DI), DX + XORQ 104(DI), DX + XORQ 144(DI), DX + XORQ 184(DI), DX + MOVQ 32(DI), SI + XORQ 72(DI), SI + XORQ 112(DI), SI + XORQ 152(DI), SI + XORQ 192(DI), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(DI), R8 + XORQ R14, R8 + MOVQ 48(DI), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(DI), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(DI), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(DI), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(SP) + MOVQ 24(DI), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(DI), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(DI), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(DI), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(DI), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(SP) + MOVQ 8(DI), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(DI), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(DI), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(DI), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(DI), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(SP) + MOVQ 32(DI), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(DI), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(DI), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(DI), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(DI), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(SP) + MOVQ 16(DI), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(DI), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(DI), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(DI), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(DI), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(SP) + + // Round 3 + MOVQ $0x8000000080008000, R13 + MOVQ 0(SP), AX + XORQ 40(SP), AX + XORQ 80(SP), AX + XORQ 120(SP), AX + XORQ 160(SP), AX + MOVQ 8(SP), BX + XORQ 48(SP), BX + XORQ 88(SP), BX + XORQ 128(SP), BX + XORQ 168(SP), BX + MOVQ 16(SP), CX + XORQ 56(SP), CX + XORQ 96(SP), CX + XORQ 136(SP), CX + XORQ 176(SP), CX + MOVQ 24(SP), DX + XORQ 64(SP), DX + XORQ 104(SP), DX + XORQ 144(SP), DX + XORQ 184(SP), DX + MOVQ 32(SP), SI + XORQ 72(SP), SI + XORQ 112(SP), SI + XORQ 152(SP), SI + XORQ 192(SP), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(SP), R8 + XORQ R14, R8 + MOVQ 48(SP), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(SP), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(SP), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(SP), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(DI) + MOVQ 24(SP), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(SP), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(SP), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(SP), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(SP), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(DI) + MOVQ 8(SP), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(SP), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(SP), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(SP), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(SP), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(DI) + MOVQ 32(SP), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(SP), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(SP), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(SP), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(SP), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(DI) + MOVQ 16(SP), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(SP), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(SP), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(SP), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(SP), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(DI) + + // Round 4 + MOVQ $0x000000000000808b, R13 + MOVQ 0(DI), AX + XORQ 40(DI), AX + XORQ 80(DI), AX + XORQ 120(DI), AX + XORQ 160(DI), AX + MOVQ 8(DI), BX + XORQ 48(DI), BX + XORQ 88(DI), BX + XORQ 128(DI), BX + XORQ 168(DI), BX + MOVQ 16(DI), CX + XORQ 56(DI), CX + XORQ 96(DI), CX + XORQ 136(DI), CX + XORQ 176(DI), CX + MOVQ 24(DI), DX + XORQ 64(DI), DX + XORQ 104(DI), DX + XORQ 144(DI), DX + XORQ 184(DI), DX + MOVQ 32(DI), SI + XORQ 72(DI), SI + XORQ 112(DI), SI + XORQ 152(DI), SI + XORQ 192(DI), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(DI), R8 + XORQ R14, R8 + MOVQ 48(DI), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(DI), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(DI), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(DI), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(SP) + MOVQ 24(DI), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(DI), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(DI), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(DI), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(DI), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(SP) + MOVQ 8(DI), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(DI), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(DI), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(DI), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(DI), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(SP) + MOVQ 32(DI), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(DI), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(DI), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(DI), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(DI), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(SP) + MOVQ 16(DI), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(DI), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(DI), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(DI), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(DI), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(SP) + + // Round 5 + MOVQ $0x0000000080000001, R13 + MOVQ 0(SP), AX + XORQ 40(SP), AX + XORQ 80(SP), AX + XORQ 120(SP), AX + XORQ 160(SP), AX + MOVQ 8(SP), BX + XORQ 48(SP), BX + XORQ 88(SP), BX + XORQ 128(SP), BX + XORQ 168(SP), BX + MOVQ 16(SP), CX + XORQ 56(SP), CX + XORQ 96(SP), CX + XORQ 136(SP), CX + XORQ 176(SP), CX + MOVQ 24(SP), DX + XORQ 64(SP), DX + XORQ 104(SP), DX + XORQ 144(SP), DX + XORQ 184(SP), DX + MOVQ 32(SP), SI + XORQ 72(SP), SI + XORQ 112(SP), SI + XORQ 152(SP), SI + XORQ 192(SP), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(SP), R8 + XORQ R14, R8 + MOVQ 48(SP), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(SP), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(SP), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(SP), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(DI) + MOVQ 24(SP), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(SP), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(SP), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(SP), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(SP), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(DI) + MOVQ 8(SP), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(SP), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(SP), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(SP), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(SP), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(DI) + MOVQ 32(SP), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(SP), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(SP), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(SP), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(SP), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(DI) + MOVQ 16(SP), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(SP), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(SP), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(SP), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(SP), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(DI) + + // Round 6 + MOVQ $0x8000000080008081, R13 + MOVQ 0(DI), AX + XORQ 40(DI), AX + XORQ 80(DI), AX + XORQ 120(DI), AX + XORQ 160(DI), AX + MOVQ 8(DI), BX + XORQ 48(DI), BX + XORQ 88(DI), BX + XORQ 128(DI), BX + XORQ 168(DI), BX + MOVQ 16(DI), CX + XORQ 56(DI), CX + XORQ 96(DI), CX + XORQ 136(DI), CX + XORQ 176(DI), CX + MOVQ 24(DI), DX + XORQ 64(DI), DX + XORQ 104(DI), DX + XORQ 144(DI), DX + XORQ 184(DI), DX + MOVQ 32(DI), SI + XORQ 72(DI), SI + XORQ 112(DI), SI + XORQ 152(DI), SI + XORQ 192(DI), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(DI), R8 + XORQ R14, R8 + MOVQ 48(DI), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(DI), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(DI), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(DI), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(SP) + MOVQ 24(DI), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(DI), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(DI), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(DI), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(DI), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(SP) + MOVQ 8(DI), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(DI), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(DI), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(DI), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(DI), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(SP) + MOVQ 32(DI), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(DI), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(DI), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(DI), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(DI), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(SP) + MOVQ 16(DI), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(DI), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(DI), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(DI), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(DI), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(SP) + + // Round 7 + MOVQ $0x8000000000008009, R13 + MOVQ 0(SP), AX + XORQ 40(SP), AX + XORQ 80(SP), AX + XORQ 120(SP), AX + XORQ 160(SP), AX + MOVQ 8(SP), BX + XORQ 48(SP), BX + XORQ 88(SP), BX + XORQ 128(SP), BX + XORQ 168(SP), BX + MOVQ 16(SP), CX + XORQ 56(SP), CX + XORQ 96(SP), CX + XORQ 136(SP), CX + XORQ 176(SP), CX + MOVQ 24(SP), DX + XORQ 64(SP), DX + XORQ 104(SP), DX + XORQ 144(SP), DX + XORQ 184(SP), DX + MOVQ 32(SP), SI + XORQ 72(SP), SI + XORQ 112(SP), SI + XORQ 152(SP), SI + XORQ 192(SP), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(SP), R8 + XORQ R14, R8 + MOVQ 48(SP), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(SP), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(SP), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(SP), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(DI) + MOVQ 24(SP), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(SP), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(SP), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(SP), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(SP), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(DI) + MOVQ 8(SP), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(SP), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(SP), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(SP), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(SP), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(DI) + MOVQ 32(SP), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(SP), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(SP), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(SP), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(SP), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(DI) + MOVQ 16(SP), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(SP), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(SP), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(SP), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(SP), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(DI) + + // Round 8 + MOVQ $0x000000000000008a, R13 + MOVQ 0(DI), AX + XORQ 40(DI), AX + XORQ 80(DI), AX + XORQ 120(DI), AX + XORQ 160(DI), AX + MOVQ 8(DI), BX + XORQ 48(DI), BX + XORQ 88(DI), BX + XORQ 128(DI), BX + XORQ 168(DI), BX + MOVQ 16(DI), CX + XORQ 56(DI), CX + XORQ 96(DI), CX + XORQ 136(DI), CX + XORQ 176(DI), CX + MOVQ 24(DI), DX + XORQ 64(DI), DX + XORQ 104(DI), DX + XORQ 144(DI), DX + XORQ 184(DI), DX + MOVQ 32(DI), SI + XORQ 72(DI), SI + XORQ 112(DI), SI + XORQ 152(DI), SI + XORQ 192(DI), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(DI), R8 + XORQ R14, R8 + MOVQ 48(DI), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(DI), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(DI), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(DI), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(SP) + MOVQ 24(DI), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(DI), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(DI), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(DI), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(DI), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(SP) + MOVQ 8(DI), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(DI), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(DI), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(DI), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(DI), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(SP) + MOVQ 32(DI), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(DI), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(DI), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(DI), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(DI), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(SP) + MOVQ 16(DI), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(DI), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(DI), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(DI), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(DI), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(SP) + + // Round 9 + MOVQ $0x0000000000000088, R13 + MOVQ 0(SP), AX + XORQ 40(SP), AX + XORQ 80(SP), AX + XORQ 120(SP), AX + XORQ 160(SP), AX + MOVQ 8(SP), BX + XORQ 48(SP), BX + XORQ 88(SP), BX + XORQ 128(SP), BX + XORQ 168(SP), BX + MOVQ 16(SP), CX + XORQ 56(SP), CX + XORQ 96(SP), CX + XORQ 136(SP), CX + XORQ 176(SP), CX + MOVQ 24(SP), DX + XORQ 64(SP), DX + XORQ 104(SP), DX + XORQ 144(SP), DX + XORQ 184(SP), DX + MOVQ 32(SP), SI + XORQ 72(SP), SI + XORQ 112(SP), SI + XORQ 152(SP), SI + XORQ 192(SP), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(SP), R8 + XORQ R14, R8 + MOVQ 48(SP), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(SP), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(SP), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(SP), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(DI) + MOVQ 24(SP), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(SP), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(SP), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(SP), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(SP), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(DI) + MOVQ 8(SP), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(SP), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(SP), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(SP), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(SP), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(DI) + MOVQ 32(SP), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(SP), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(SP), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(SP), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(SP), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(DI) + MOVQ 16(SP), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(SP), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(SP), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(SP), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(SP), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(DI) + + // Round 10 + MOVQ $0x0000000080008009, R13 + MOVQ 0(DI), AX + XORQ 40(DI), AX + XORQ 80(DI), AX + XORQ 120(DI), AX + XORQ 160(DI), AX + MOVQ 8(DI), BX + XORQ 48(DI), BX + XORQ 88(DI), BX + XORQ 128(DI), BX + XORQ 168(DI), BX + MOVQ 16(DI), CX + XORQ 56(DI), CX + XORQ 96(DI), CX + XORQ 136(DI), CX + XORQ 176(DI), CX + MOVQ 24(DI), DX + XORQ 64(DI), DX + XORQ 104(DI), DX + XORQ 144(DI), DX + XORQ 184(DI), DX + MOVQ 32(DI), SI + XORQ 72(DI), SI + XORQ 112(DI), SI + XORQ 152(DI), SI + XORQ 192(DI), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(DI), R8 + XORQ R14, R8 + MOVQ 48(DI), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(DI), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(DI), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(DI), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(SP) + MOVQ 24(DI), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(DI), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(DI), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(DI), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(DI), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(SP) + MOVQ 8(DI), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(DI), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(DI), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(DI), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(DI), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(SP) + MOVQ 32(DI), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(DI), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(DI), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(DI), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(DI), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(SP) + MOVQ 16(DI), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(DI), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(DI), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(DI), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(DI), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(SP) + + // Round 11 + MOVQ $0x000000008000000a, R13 + MOVQ 0(SP), AX + XORQ 40(SP), AX + XORQ 80(SP), AX + XORQ 120(SP), AX + XORQ 160(SP), AX + MOVQ 8(SP), BX + XORQ 48(SP), BX + XORQ 88(SP), BX + XORQ 128(SP), BX + XORQ 168(SP), BX + MOVQ 16(SP), CX + XORQ 56(SP), CX + XORQ 96(SP), CX + XORQ 136(SP), CX + XORQ 176(SP), CX + MOVQ 24(SP), DX + XORQ 64(SP), DX + XORQ 104(SP), DX + XORQ 144(SP), DX + XORQ 184(SP), DX + MOVQ 32(SP), SI + XORQ 72(SP), SI + XORQ 112(SP), SI + XORQ 152(SP), SI + XORQ 192(SP), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(SP), R8 + XORQ R14, R8 + MOVQ 48(SP), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(SP), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(SP), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(SP), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(DI) + MOVQ 24(SP), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(SP), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(SP), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(SP), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(SP), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(DI) + MOVQ 8(SP), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(SP), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(SP), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(SP), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(SP), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(DI) + MOVQ 32(SP), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(SP), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(SP), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(SP), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(SP), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(DI) + MOVQ 16(SP), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(SP), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(SP), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(SP), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(SP), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(DI) + + // Round 12 + MOVQ $0x000000008000808b, R13 + MOVQ 0(DI), AX + XORQ 40(DI), AX + XORQ 80(DI), AX + XORQ 120(DI), AX + XORQ 160(DI), AX + MOVQ 8(DI), BX + XORQ 48(DI), BX + XORQ 88(DI), BX + XORQ 128(DI), BX + XORQ 168(DI), BX + MOVQ 16(DI), CX + XORQ 56(DI), CX + XORQ 96(DI), CX + XORQ 136(DI), CX + XORQ 176(DI), CX + MOVQ 24(DI), DX + XORQ 64(DI), DX + XORQ 104(DI), DX + XORQ 144(DI), DX + XORQ 184(DI), DX + MOVQ 32(DI), SI + XORQ 72(DI), SI + XORQ 112(DI), SI + XORQ 152(DI), SI + XORQ 192(DI), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(DI), R8 + XORQ R14, R8 + MOVQ 48(DI), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(DI), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(DI), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(DI), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(SP) + MOVQ 24(DI), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(DI), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(DI), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(DI), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(DI), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(SP) + MOVQ 8(DI), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(DI), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(DI), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(DI), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(DI), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(SP) + MOVQ 32(DI), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(DI), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(DI), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(DI), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(DI), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(SP) + MOVQ 16(DI), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(DI), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(DI), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(DI), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(DI), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(SP) + + // Round 13 + MOVQ $0x800000000000008b, R13 + MOVQ 0(SP), AX + XORQ 40(SP), AX + XORQ 80(SP), AX + XORQ 120(SP), AX + XORQ 160(SP), AX + MOVQ 8(SP), BX + XORQ 48(SP), BX + XORQ 88(SP), BX + XORQ 128(SP), BX + XORQ 168(SP), BX + MOVQ 16(SP), CX + XORQ 56(SP), CX + XORQ 96(SP), CX + XORQ 136(SP), CX + XORQ 176(SP), CX + MOVQ 24(SP), DX + XORQ 64(SP), DX + XORQ 104(SP), DX + XORQ 144(SP), DX + XORQ 184(SP), DX + MOVQ 32(SP), SI + XORQ 72(SP), SI + XORQ 112(SP), SI + XORQ 152(SP), SI + XORQ 192(SP), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(SP), R8 + XORQ R14, R8 + MOVQ 48(SP), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(SP), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(SP), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(SP), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(DI) + MOVQ 24(SP), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(SP), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(SP), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(SP), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(SP), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(DI) + MOVQ 8(SP), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(SP), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(SP), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(SP), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(SP), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(DI) + MOVQ 32(SP), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(SP), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(SP), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(SP), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(SP), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(DI) + MOVQ 16(SP), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(SP), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(SP), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(SP), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(SP), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(DI) + + // Round 14 + MOVQ $0x8000000000008089, R13 + MOVQ 0(DI), AX + XORQ 40(DI), AX + XORQ 80(DI), AX + XORQ 120(DI), AX + XORQ 160(DI), AX + MOVQ 8(DI), BX + XORQ 48(DI), BX + XORQ 88(DI), BX + XORQ 128(DI), BX + XORQ 168(DI), BX + MOVQ 16(DI), CX + XORQ 56(DI), CX + XORQ 96(DI), CX + XORQ 136(DI), CX + XORQ 176(DI), CX + MOVQ 24(DI), DX + XORQ 64(DI), DX + XORQ 104(DI), DX + XORQ 144(DI), DX + XORQ 184(DI), DX + MOVQ 32(DI), SI + XORQ 72(DI), SI + XORQ 112(DI), SI + XORQ 152(DI), SI + XORQ 192(DI), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(DI), R8 + XORQ R14, R8 + MOVQ 48(DI), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(DI), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(DI), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(DI), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(SP) + MOVQ 24(DI), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(DI), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(DI), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(DI), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(DI), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(SP) + MOVQ 8(DI), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(DI), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(DI), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(DI), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(DI), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(SP) + MOVQ 32(DI), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(DI), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(DI), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(DI), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(DI), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(SP) + MOVQ 16(DI), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(DI), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(DI), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(DI), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(DI), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(SP) + + // Round 15 + MOVQ $0x8000000000008003, R13 + MOVQ 0(SP), AX + XORQ 40(SP), AX + XORQ 80(SP), AX + XORQ 120(SP), AX + XORQ 160(SP), AX + MOVQ 8(SP), BX + XORQ 48(SP), BX + XORQ 88(SP), BX + XORQ 128(SP), BX + XORQ 168(SP), BX + MOVQ 16(SP), CX + XORQ 56(SP), CX + XORQ 96(SP), CX + XORQ 136(SP), CX + XORQ 176(SP), CX + MOVQ 24(SP), DX + XORQ 64(SP), DX + XORQ 104(SP), DX + XORQ 144(SP), DX + XORQ 184(SP), DX + MOVQ 32(SP), SI + XORQ 72(SP), SI + XORQ 112(SP), SI + XORQ 152(SP), SI + XORQ 192(SP), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(SP), R8 + XORQ R14, R8 + MOVQ 48(SP), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(SP), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(SP), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(SP), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(DI) + MOVQ 24(SP), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(SP), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(SP), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(SP), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(SP), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(DI) + MOVQ 8(SP), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(SP), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(SP), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(SP), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(SP), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(DI) + MOVQ 32(SP), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(SP), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(SP), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(SP), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(SP), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(DI) + MOVQ 16(SP), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(SP), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(SP), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(SP), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(SP), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(DI) + + // Round 16 + MOVQ $0x8000000000008002, R13 + MOVQ 0(DI), AX + XORQ 40(DI), AX + XORQ 80(DI), AX + XORQ 120(DI), AX + XORQ 160(DI), AX + MOVQ 8(DI), BX + XORQ 48(DI), BX + XORQ 88(DI), BX + XORQ 128(DI), BX + XORQ 168(DI), BX + MOVQ 16(DI), CX + XORQ 56(DI), CX + XORQ 96(DI), CX + XORQ 136(DI), CX + XORQ 176(DI), CX + MOVQ 24(DI), DX + XORQ 64(DI), DX + XORQ 104(DI), DX + XORQ 144(DI), DX + XORQ 184(DI), DX + MOVQ 32(DI), SI + XORQ 72(DI), SI + XORQ 112(DI), SI + XORQ 152(DI), SI + XORQ 192(DI), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(DI), R8 + XORQ R14, R8 + MOVQ 48(DI), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(DI), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(DI), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(DI), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(SP) + MOVQ 24(DI), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(DI), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(DI), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(DI), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(DI), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(SP) + MOVQ 8(DI), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(DI), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(DI), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(DI), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(DI), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(SP) + MOVQ 32(DI), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(DI), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(DI), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(DI), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(DI), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(SP) + MOVQ 16(DI), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(DI), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(DI), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(DI), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(DI), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(SP) + + // Round 17 + MOVQ $0x8000000000000080, R13 + MOVQ 0(SP), AX + XORQ 40(SP), AX + XORQ 80(SP), AX + XORQ 120(SP), AX + XORQ 160(SP), AX + MOVQ 8(SP), BX + XORQ 48(SP), BX + XORQ 88(SP), BX + XORQ 128(SP), BX + XORQ 168(SP), BX + MOVQ 16(SP), CX + XORQ 56(SP), CX + XORQ 96(SP), CX + XORQ 136(SP), CX + XORQ 176(SP), CX + MOVQ 24(SP), DX + XORQ 64(SP), DX + XORQ 104(SP), DX + XORQ 144(SP), DX + XORQ 184(SP), DX + MOVQ 32(SP), SI + XORQ 72(SP), SI + XORQ 112(SP), SI + XORQ 152(SP), SI + XORQ 192(SP), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(SP), R8 + XORQ R14, R8 + MOVQ 48(SP), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(SP), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(SP), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(SP), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(DI) + MOVQ 24(SP), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(SP), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(SP), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(SP), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(SP), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(DI) + MOVQ 8(SP), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(SP), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(SP), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(SP), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(SP), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(DI) + MOVQ 32(SP), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(SP), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(SP), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(SP), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(SP), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(DI) + MOVQ 16(SP), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(SP), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(SP), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(SP), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(SP), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(DI) + + // Round 18 + MOVQ $0x000000000000800a, R13 + MOVQ 0(DI), AX + XORQ 40(DI), AX + XORQ 80(DI), AX + XORQ 120(DI), AX + XORQ 160(DI), AX + MOVQ 8(DI), BX + XORQ 48(DI), BX + XORQ 88(DI), BX + XORQ 128(DI), BX + XORQ 168(DI), BX + MOVQ 16(DI), CX + XORQ 56(DI), CX + XORQ 96(DI), CX + XORQ 136(DI), CX + XORQ 176(DI), CX + MOVQ 24(DI), DX + XORQ 64(DI), DX + XORQ 104(DI), DX + XORQ 144(DI), DX + XORQ 184(DI), DX + MOVQ 32(DI), SI + XORQ 72(DI), SI + XORQ 112(DI), SI + XORQ 152(DI), SI + XORQ 192(DI), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(DI), R8 + XORQ R14, R8 + MOVQ 48(DI), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(DI), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(DI), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(DI), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(SP) + MOVQ 24(DI), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(DI), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(DI), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(DI), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(DI), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(SP) + MOVQ 8(DI), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(DI), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(DI), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(DI), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(DI), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(SP) + MOVQ 32(DI), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(DI), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(DI), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(DI), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(DI), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(SP) + MOVQ 16(DI), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(DI), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(DI), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(DI), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(DI), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(SP) + + // Round 19 + MOVQ $0x800000008000000a, R13 + MOVQ 0(SP), AX + XORQ 40(SP), AX + XORQ 80(SP), AX + XORQ 120(SP), AX + XORQ 160(SP), AX + MOVQ 8(SP), BX + XORQ 48(SP), BX + XORQ 88(SP), BX + XORQ 128(SP), BX + XORQ 168(SP), BX + MOVQ 16(SP), CX + XORQ 56(SP), CX + XORQ 96(SP), CX + XORQ 136(SP), CX + XORQ 176(SP), CX + MOVQ 24(SP), DX + XORQ 64(SP), DX + XORQ 104(SP), DX + XORQ 144(SP), DX + XORQ 184(SP), DX + MOVQ 32(SP), SI + XORQ 72(SP), SI + XORQ 112(SP), SI + XORQ 152(SP), SI + XORQ 192(SP), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(SP), R8 + XORQ R14, R8 + MOVQ 48(SP), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(SP), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(SP), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(SP), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(DI) + MOVQ 24(SP), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(SP), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(SP), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(SP), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(SP), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(DI) + MOVQ 8(SP), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(SP), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(SP), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(SP), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(SP), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(DI) + MOVQ 32(SP), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(SP), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(SP), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(SP), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(SP), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(DI) + MOVQ 16(SP), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(SP), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(SP), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(SP), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(SP), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(DI) + + // Round 20 + MOVQ $0x8000000080008081, R13 + MOVQ 0(DI), AX + XORQ 40(DI), AX + XORQ 80(DI), AX + XORQ 120(DI), AX + XORQ 160(DI), AX + MOVQ 8(DI), BX + XORQ 48(DI), BX + XORQ 88(DI), BX + XORQ 128(DI), BX + XORQ 168(DI), BX + MOVQ 16(DI), CX + XORQ 56(DI), CX + XORQ 96(DI), CX + XORQ 136(DI), CX + XORQ 176(DI), CX + MOVQ 24(DI), DX + XORQ 64(DI), DX + XORQ 104(DI), DX + XORQ 144(DI), DX + XORQ 184(DI), DX + MOVQ 32(DI), SI + XORQ 72(DI), SI + XORQ 112(DI), SI + XORQ 152(DI), SI + XORQ 192(DI), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(DI), R8 + XORQ R14, R8 + MOVQ 48(DI), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(DI), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(DI), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(DI), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(SP) + MOVQ 24(DI), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(DI), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(DI), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(DI), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(DI), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(SP) + MOVQ 8(DI), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(DI), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(DI), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(DI), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(DI), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(SP) + MOVQ 32(DI), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(DI), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(DI), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(DI), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(DI), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(SP) + MOVQ 16(DI), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(DI), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(DI), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(DI), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(DI), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(SP) + + // Round 21 + MOVQ $0x8000000000008080, R13 + MOVQ 0(SP), AX + XORQ 40(SP), AX + XORQ 80(SP), AX + XORQ 120(SP), AX + XORQ 160(SP), AX + MOVQ 8(SP), BX + XORQ 48(SP), BX + XORQ 88(SP), BX + XORQ 128(SP), BX + XORQ 168(SP), BX + MOVQ 16(SP), CX + XORQ 56(SP), CX + XORQ 96(SP), CX + XORQ 136(SP), CX + XORQ 176(SP), CX + MOVQ 24(SP), DX + XORQ 64(SP), DX + XORQ 104(SP), DX + XORQ 144(SP), DX + XORQ 184(SP), DX + MOVQ 32(SP), SI + XORQ 72(SP), SI + XORQ 112(SP), SI + XORQ 152(SP), SI + XORQ 192(SP), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(SP), R8 + XORQ R14, R8 + MOVQ 48(SP), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(SP), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(SP), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(SP), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(DI) + MOVQ 24(SP), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(SP), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(SP), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(SP), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(SP), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(DI) + MOVQ 8(SP), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(SP), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(SP), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(SP), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(SP), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(DI) + MOVQ 32(SP), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(SP), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(SP), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(SP), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(SP), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(DI) + MOVQ 16(SP), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(SP), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(SP), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(SP), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(SP), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(DI) + + // Round 22 + MOVQ $0x0000000080000001, R13 + MOVQ 0(DI), AX + XORQ 40(DI), AX + XORQ 80(DI), AX + XORQ 120(DI), AX + XORQ 160(DI), AX + MOVQ 8(DI), BX + XORQ 48(DI), BX + XORQ 88(DI), BX + XORQ 128(DI), BX + XORQ 168(DI), BX + MOVQ 16(DI), CX + XORQ 56(DI), CX + XORQ 96(DI), CX + XORQ 136(DI), CX + XORQ 176(DI), CX + MOVQ 24(DI), DX + XORQ 64(DI), DX + XORQ 104(DI), DX + XORQ 144(DI), DX + XORQ 184(DI), DX + MOVQ 32(DI), SI + XORQ 72(DI), SI + XORQ 112(DI), SI + XORQ 152(DI), SI + XORQ 192(DI), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(DI), R8 + XORQ R14, R8 + MOVQ 48(DI), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(DI), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(DI), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(DI), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(SP) + MOVQ 24(DI), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(DI), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(DI), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(DI), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(DI), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(SP) + MOVQ 8(DI), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(DI), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(DI), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(DI), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(DI), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(SP) + MOVQ 32(DI), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(DI), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(DI), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(DI), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(DI), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(SP) + MOVQ 16(DI), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(DI), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(DI), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(DI), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(DI), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(SP) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(SP) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(SP) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(SP) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(SP) + + // Round 23 + MOVQ $0x8000000080008008, R13 + MOVQ 0(SP), AX + XORQ 40(SP), AX + XORQ 80(SP), AX + XORQ 120(SP), AX + XORQ 160(SP), AX + MOVQ 8(SP), BX + XORQ 48(SP), BX + XORQ 88(SP), BX + XORQ 128(SP), BX + XORQ 168(SP), BX + MOVQ 16(SP), CX + XORQ 56(SP), CX + XORQ 96(SP), CX + XORQ 136(SP), CX + XORQ 176(SP), CX + MOVQ 24(SP), DX + XORQ 64(SP), DX + XORQ 104(SP), DX + XORQ 144(SP), DX + XORQ 184(SP), DX + MOVQ 32(SP), SI + XORQ 72(SP), SI + XORQ 112(SP), SI + XORQ 152(SP), SI + XORQ 192(SP), SI + RORXQ $63, BX, R14 + XORQ SI, R14 + RORXQ $63, CX, R15 + XORQ AX, R15 + RORXQ $63, DX, BP + XORQ BX, BP + RORXQ $63, SI, R8 + XORQ CX, R8 + RORXQ $63, AX, R9 + XORQ DX, R9 + MOVQ R8, SI + MOVQ R9, DX + MOVQ 0(SP), R8 + XORQ R14, R8 + MOVQ 48(SP), R9 + XORQ R15, R9 + RORXQ $20, R9, R9 + MOVQ 96(SP), R10 + XORQ BP, R10 + RORXQ $21, R10, R10 + MOVQ 144(SP), R11 + XORQ SI, R11 + RORXQ $43, R11, R11 + MOVQ 192(SP), R12 + XORQ DX, R12 + RORXQ $50, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + XORQ R13, AX + MOVQ AX, 0(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 8(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 16(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 24(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 32(DI) + MOVQ 24(SP), R8 + XORQ SI, R8 + RORXQ $36, R8, R8 + MOVQ 72(SP), R9 + XORQ DX, R9 + RORXQ $44, R9, R9 + MOVQ 80(SP), R10 + XORQ R14, R10 + RORXQ $61, R10, R10 + MOVQ 128(SP), R11 + XORQ R15, R11 + RORXQ $19, R11, R11 + MOVQ 176(SP), R12 + XORQ BP, R12 + RORXQ $3, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 40(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 48(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 56(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 64(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 72(DI) + MOVQ 8(SP), R8 + XORQ R15, R8 + RORXQ $63, R8, R8 + MOVQ 56(SP), R9 + XORQ BP, R9 + RORXQ $58, R9, R9 + MOVQ 104(SP), R10 + XORQ SI, R10 + RORXQ $39, R10, R10 + MOVQ 152(SP), R11 + XORQ DX, R11 + RORXQ $56, R11, R11 + MOVQ 160(SP), R12 + XORQ R14, R12 + RORXQ $46, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 80(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 88(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 96(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 104(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 112(DI) + MOVQ 32(SP), R8 + XORQ DX, R8 + RORXQ $37, R8, R8 + MOVQ 40(SP), R9 + XORQ R14, R9 + RORXQ $28, R9, R9 + MOVQ 88(SP), R10 + XORQ R15, R10 + RORXQ $54, R10, R10 + MOVQ 136(SP), R11 + XORQ BP, R11 + RORXQ $49, R11, R11 + MOVQ 184(SP), R12 + XORQ SI, R12 + RORXQ $8, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 120(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 128(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 136(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 144(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 152(DI) + MOVQ 16(SP), R8 + XORQ BP, R8 + RORXQ $2, R8, R8 + MOVQ 64(SP), R9 + XORQ SI, R9 + RORXQ $9, R9, R9 + MOVQ 112(SP), R10 + XORQ DX, R10 + RORXQ $25, R10, R10 + MOVQ 120(SP), R11 + XORQ R14, R11 + RORXQ $23, R11, R11 + MOVQ 168(SP), R12 + XORQ R15, R12 + RORXQ $62, R12, R12 + ANDNQ R10, R9, AX + XORQ R8, AX + MOVQ AX, 160(DI) + ANDNQ R11, R10, AX + XORQ R9, AX + MOVQ AX, 168(DI) + ANDNQ R12, R11, AX + XORQ R10, AX + MOVQ AX, 176(DI) + ANDNQ R8, R12, AX + XORQ R11, AX + MOVQ AX, 184(DI) + ANDNQ R9, R8, AX + XORQ R12, AX + MOVQ AX, 192(DI) + RET diff --git a/crypto/keccak/sha3.go b/crypto/keccak/sha3.go deleted file mode 100644 index a554323244..0000000000 --- a/crypto/keccak/sha3.go +++ /dev/null @@ -1,244 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package keccak - -import ( - "crypto/subtle" - "encoding/binary" - "errors" - "unsafe" - - "golang.org/x/sys/cpu" -) - -// spongeDirection indicates the direction bytes are flowing through the sponge. -type spongeDirection int - -const ( - // spongeAbsorbing indicates that the sponge is absorbing input. - spongeAbsorbing spongeDirection = iota - // spongeSqueezing indicates that the sponge is being squeezed. - spongeSqueezing -) - -type state struct { - a [1600 / 8]byte // main state of the hash - - // a[n:rate] is the buffer. If absorbing, it's the remaining space to XOR - // into before running the permutation. If squeezing, it's the remaining - // output to produce before running the permutation. - n, rate int - - // dsbyte contains the "domain separation" bits and the first bit of - // the padding. Sections 6.1 and 6.2 of [1] separate the outputs of the - // SHA-3 and SHAKE functions by appending bitstrings to the message. - // Using a little-endian bit-ordering convention, these are "01" for SHA-3 - // and "1111" for SHAKE, or 00000010b and 00001111b, respectively. Then the - // padding rule from section 5.1 is applied to pad the message to a multiple - // of the rate, which involves adding a "1" bit, zero or more "0" bits, and - // a final "1" bit. We merge the first "1" bit from the padding into dsbyte, - // giving 00000110b (0x06) and 00011111b (0x1f). - // [1] http://csrc.nist.gov/publications/drafts/fips-202/fips_202_draft.pdf - // "Draft FIPS 202: SHA-3 Standard: Permutation-Based Hash and - // Extendable-Output Functions (May 2014)" - dsbyte byte - - outputLen int // the default output size in bytes - state spongeDirection // whether the sponge is absorbing or squeezing -} - -// BlockSize returns the rate of sponge underlying this hash function. -func (d *state) BlockSize() int { return d.rate } - -// Size returns the output size of the hash function in bytes. -func (d *state) Size() int { return d.outputLen } - -// Reset clears the internal state by zeroing the sponge state and -// the buffer indexes, and setting Sponge.state to absorbing. -func (d *state) Reset() { - // Zero the permutation's state. - for i := range d.a { - d.a[i] = 0 - } - d.state = spongeAbsorbing - d.n = 0 -} - -func (d *state) clone() *state { - ret := *d - return &ret -} - -// permute applies the KeccakF-1600 permutation. -func (d *state) permute() { - var a *[25]uint64 - if cpu.IsBigEndian { - a = new([25]uint64) - for i := range a { - a[i] = binary.LittleEndian.Uint64(d.a[i*8:]) - } - } else { - a = (*[25]uint64)(unsafe.Pointer(&d.a)) - } - - keccakF1600(a) - d.n = 0 - - if cpu.IsBigEndian { - for i := range a { - binary.LittleEndian.PutUint64(d.a[i*8:], a[i]) - } - } -} - -// pads appends the domain separation bits in dsbyte, applies -// the multi-bitrate 10..1 padding rule, and permutes the state. -func (d *state) padAndPermute() { - // Pad with this instance's domain-separator bits. We know that there's - // at least one byte of space in the sponge because, if it were full, - // permute would have been called to empty it. dsbyte also contains the - // first one bit for the padding. See the comment in the state struct. - d.a[d.n] ^= d.dsbyte - // This adds the final one bit for the padding. Because of the way that - // bits are numbered from the LSB upwards, the final bit is the MSB of - // the last byte. - d.a[d.rate-1] ^= 0x80 - // Apply the permutation - d.permute() - d.state = spongeSqueezing -} - -// Write absorbs more data into the hash's state. It panics if any -// output has already been read. -func (d *state) Write(p []byte) (n int, err error) { - if d.state != spongeAbsorbing { - panic("sha3: Write after Read") - } - - n = len(p) - - for len(p) > 0 { - x := subtle.XORBytes(d.a[d.n:d.rate], d.a[d.n:d.rate], p) - d.n += x - p = p[x:] - - // If the sponge is full, apply the permutation. - if d.n == d.rate { - d.permute() - } - } - - return -} - -// Read squeezes an arbitrary number of bytes from the sponge. -func (d *state) Read(out []byte) (n int, err error) { - // If we're still absorbing, pad and apply the permutation. - if d.state == spongeAbsorbing { - d.padAndPermute() - } - - n = len(out) - - // Now, do the squeezing. - for len(out) > 0 { - // Apply the permutation if we've squeezed the sponge dry. - if d.n == d.rate { - d.permute() - } - - x := copy(out, d.a[d.n:d.rate]) - d.n += x - out = out[x:] - } - - return -} - -// Sum applies padding to the hash state and then squeezes out the desired -// number of output bytes. It panics if any output has already been read. -func (d *state) Sum(in []byte) []byte { - if d.state != spongeAbsorbing { - panic("sha3: Sum after Read") - } - - // Make a copy of the original hash so that caller can keep writing - // and summing. - dup := d.clone() - hash := make([]byte, dup.outputLen, 64) // explicit cap to allow stack allocation - dup.Read(hash) - return append(in, hash...) -} - -const ( - magicSHA3 = "sha\x08" - magicShake = "sha\x09" - magicCShake = "sha\x0a" - magicKeccak = "sha\x0b" - // magic || rate || main state || n || sponge direction - marshaledSize = len(magicSHA3) + 1 + 200 + 1 + 1 -) - -func (d *state) MarshalBinary() ([]byte, error) { - return d.AppendBinary(make([]byte, 0, marshaledSize)) -} - -func (d *state) AppendBinary(b []byte) ([]byte, error) { - switch d.dsbyte { - case dsbyteSHA3: - b = append(b, magicSHA3...) - case dsbyteShake: - b = append(b, magicShake...) - case dsbyteCShake: - b = append(b, magicCShake...) - case dsbyteKeccak: - b = append(b, magicKeccak...) - default: - panic("unknown dsbyte") - } - // rate is at most 168, and n is at most rate. - b = append(b, byte(d.rate)) - b = append(b, d.a[:]...) - b = append(b, byte(d.n), byte(d.state)) - return b, nil -} - -func (d *state) UnmarshalBinary(b []byte) error { - if len(b) != marshaledSize { - return errors.New("sha3: invalid hash state") - } - - magic := string(b[:len(magicSHA3)]) - b = b[len(magicSHA3):] - switch { - case magic == magicSHA3 && d.dsbyte == dsbyteSHA3: - case magic == magicShake && d.dsbyte == dsbyteShake: - case magic == magicCShake && d.dsbyte == dsbyteCShake: - case magic == magicKeccak && d.dsbyte == dsbyteKeccak: - default: - return errors.New("sha3: invalid hash state identifier") - } - - rate := int(b[0]) - b = b[1:] - if rate != d.rate { - return errors.New("sha3: invalid hash state function") - } - - copy(d.a[:], b) - b = b[len(d.a):] - - n, state := int(b[0]), spongeDirection(b[1]) - if n > d.rate { - return errors.New("sha3: invalid hash state") - } - d.n = n - if state != spongeAbsorbing && state != spongeSqueezing { - return errors.New("sha3: invalid hash state") - } - d.state = state - - return nil -} diff --git a/crypto/keccak/sha3_test.go b/crypto/keccak/sha3_test.go deleted file mode 100644 index 28a20ec72d..0000000000 --- a/crypto/keccak/sha3_test.go +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright 2014 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package keccak - -// Tests include all the ShortMsgKATs provided by the Keccak team at -// https://github.com/gvanas/KeccakCodePackage -// -// They only include the zero-bit case of the bitwise testvectors -// published by NIST in the draft of FIPS-202. - -import ( - "bytes" - "compress/flate" - "encoding" - "encoding/hex" - "encoding/json" - "hash" - "math/rand" - "os" - "strings" - "testing" -) - -const ( - testString = "brekeccakkeccak koax koax" - katFilename = "testdata/keccakKats.json.deflate" -) - -// testDigests contains functions returning hash.Hash instances -// with output-length equal to the KAT length for SHA-3, Keccak -// and SHAKE instances. -var testDigests = map[string]func() hash.Hash{ - "Keccak-256": NewLegacyKeccak256, - "Keccak-512": NewLegacyKeccak512, -} - -// decodeHex converts a hex-encoded string into a raw byte string. -func decodeHex(s string) []byte { - b, err := hex.DecodeString(s) - if err != nil { - panic(err) - } - return b -} - -// structs used to marshal JSON test-cases. -type KeccakKats struct { - Kats map[string][]struct { - Digest string `json:"digest"` - Length int64 `json:"length"` - Message string `json:"message"` - - // Defined only for cSHAKE - N string `json:"N"` - S string `json:"S"` - } -} - -// TestKeccakKats tests the SHA-3 and Shake implementations against all the -// ShortMsgKATs from https://github.com/gvanas/KeccakCodePackage -// (The testvectors are stored in keccakKats.json.deflate due to their length.) -func TestKeccakKats(t *testing.T) { - // Read the KATs. - deflated, err := os.Open(katFilename) - if err != nil { - t.Errorf("error opening %s: %s", katFilename, err) - } - file := flate.NewReader(deflated) - dec := json.NewDecoder(file) - var katSet KeccakKats - err = dec.Decode(&katSet) - if err != nil { - t.Errorf("error decoding KATs: %s", err) - } - - for algo, function := range testDigests { - d := function() - for _, kat := range katSet.Kats[algo] { - d.Reset() - in, err := hex.DecodeString(kat.Message) - if err != nil { - t.Errorf("error decoding KAT: %s", err) - } - d.Write(in[:kat.Length/8]) - got := strings.ToUpper(hex.EncodeToString(d.Sum(nil))) - if got != kat.Digest { - t.Errorf("function=%s, length=%d\nmessage:\n %s\ngot:\n %s\nwanted:\n %s", - algo, kat.Length, kat.Message, got, kat.Digest) - t.Logf("wanted %+v", kat) - t.FailNow() - } - continue - } - } -} - -// TestKeccak does a basic test of the non-standardized Keccak hash functions. -func TestKeccak(t *testing.T) { - tests := []struct { - fn func() hash.Hash - data []byte - want string - }{ - { - NewLegacyKeccak256, - []byte("abc"), - "4e03657aea45a94fc7d47ba826c8d667c0d1e6e33a64a036ec44f58fa12d6c45", - }, - { - NewLegacyKeccak512, - []byte("abc"), - "18587dc2ea106b9a1563e32b3312421ca164c7f1f07bc922a9c83d77cea3a1e5d0c69910739025372dc14ac9642629379540c17e2a65b19d77aa511a9d00bb96", - }, - } - - for _, u := range tests { - h := u.fn() - h.Write(u.data) - got := h.Sum(nil) - want := decodeHex(u.want) - if !bytes.Equal(got, want) { - t.Errorf("unexpected hash for size %d: got '%x' want '%s'", h.Size()*8, got, u.want) - } - } -} - -// TestUnalignedWrite tests that writing data in an arbitrary pattern with -// small input buffers. -func TestUnalignedWrite(t *testing.T) { - buf := sequentialBytes(0x10000) - for alg, df := range testDigests { - d := df() - d.Reset() - d.Write(buf) - want := d.Sum(nil) - d.Reset() - for i := 0; i < len(buf); { - // Cycle through offsets which make a 137 byte sequence. - // Because 137 is prime this sequence should exercise all corner cases. - offsets := [17]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1} - for _, j := range offsets { - if v := len(buf) - i; v < j { - j = v - } - d.Write(buf[i : i+j]) - i += j - } - } - got := d.Sum(nil) - if !bytes.Equal(got, want) { - t.Errorf("Unaligned writes, alg=%s\ngot %q, want %q", alg, got, want) - } - } -} - -// sequentialBytes produces a buffer of size consecutive bytes 0x00, 0x01, ..., used for testing. -// -// The alignment of each slice is intentionally randomized to detect alignment -// issues in the implementation. See https://golang.org/issue/37644. -// Ideally, the compiler should fuzz the alignment itself. -// (See https://golang.org/issue/35128.) -func sequentialBytes(size int) []byte { - alignmentOffset := rand.Intn(8) - result := make([]byte, size+alignmentOffset)[alignmentOffset:] - for i := range result { - result[i] = byte(i) - } - return result -} - -func TestMarshalUnmarshal(t *testing.T) { - t.Run("Keccak-256", func(t *testing.T) { testMarshalUnmarshal(t, NewLegacyKeccak256()) }) - t.Run("Keccak-512", func(t *testing.T) { testMarshalUnmarshal(t, NewLegacyKeccak512()) }) -} - -// TODO(filippo): move this to crypto/internal/cryptotest. -func testMarshalUnmarshal(t *testing.T, h hash.Hash) { - buf := make([]byte, 200) - rand.Read(buf) - n := rand.Intn(200) - h.Write(buf) - want := h.Sum(nil) - h.Reset() - h.Write(buf[:n]) - b, err := h.(encoding.BinaryMarshaler).MarshalBinary() - if err != nil { - t.Errorf("MarshalBinary: %v", err) - } - h.Write(bytes.Repeat([]byte{0}, 200)) - if err := h.(encoding.BinaryUnmarshaler).UnmarshalBinary(b); err != nil { - t.Errorf("UnmarshalBinary: %v", err) - } - h.Write(buf[n:]) - got := h.Sum(nil) - if !bytes.Equal(got, want) { - t.Errorf("got %x, want %x", got, want) - } -} - -// BenchmarkPermutationFunction measures the speed of the permutation function -// with no input data. -func BenchmarkPermutationFunction(b *testing.B) { - b.SetBytes(int64(200)) - var lanes [25]uint64 - for i := 0; i < b.N; i++ { - keccakF1600(&lanes) - } -} diff --git a/crypto/keccak/testdata/keccakKats.json.deflate b/crypto/keccak/testdata/keccakKats.json.deflate deleted file mode 100644 index 7a94c2f8bc..0000000000 Binary files a/crypto/keccak/testdata/keccakKats.json.deflate and /dev/null differ diff --git a/crypto/keccak/‎gen_keccakf_bmi2.go b/crypto/keccak/‎gen_keccakf_bmi2.go new file mode 100644 index 0000000000..1773fa11dd --- /dev/null +++ b/crypto/keccak/‎gen_keccakf_bmi2.go @@ -0,0 +1,175 @@ +//go:build ignore + +// gen_keccakf_bmi2.go generates keccakf_amd64_bmi2.s — a BMI2-optimized +// Keccak-f[1600] permutation using RORXQ and ANDNQ. +// Fully unrolled (all 24 rounds). +// +// Key optimizations: +// - D values kept in registers (R14, R15, BP, SI, DX), not on stack +// - State alternates between the original array (DI) and a 200-byte stack +// buffer, avoiding a second 200-byte copy +// - Frame is only 200 bytes (25 × 8 for temp state) +// - Optional XOR-and-permute: when buf != nil, XORs rate bytes into state +// before permuting, eliminating one full memory pass +// +// Usage: go run gen_keccakf_bmi2.go + +package main + +import ( + "fmt" + "os" +) + +var rc = [24]uint64{ + 0x0000000000000001, 0x0000000000008082, + 0x800000000000808a, 0x8000000080008000, + 0x000000000000808b, 0x0000000080000001, + 0x8000000080008081, 0x8000000000008009, + 0x000000000000008a, 0x0000000000000088, + 0x0000000080008009, 0x000000008000000a, + 0x000000008000808b, 0x800000000000008b, + 0x8000000000008089, 0x8000000000008003, + 0x8000000000008002, 0x8000000000000080, + 0x000000000000800a, 0x800000008000000a, + 0x8000000080008081, 0x8000000000008080, + 0x0000000080000001, 0x8000000080008008, +} + +type lane struct { + idx int // state lane index (0–24) + rot int // left-rotation amount +} + +// Chi groups: each group reads 5 lanes (after theta+rho+pi) +// and produces 5 consecutive output lanes. +var groups = [5][5]lane{ + {{0, 0}, {6, 44}, {12, 43}, {18, 21}, {24, 14}}, // → lanes 0–4 + {{3, 28}, {9, 20}, {10, 3}, {16, 45}, {22, 61}}, // → lanes 5–9 + {{1, 1}, {7, 6}, {13, 25}, {19, 8}, {20, 18}}, // → lanes 10–14 + {{4, 27}, {5, 36}, {11, 10}, {17, 15}, {23, 56}}, // → lanes 15–19 + {{2, 62}, {8, 55}, {14, 39}, {15, 41}, {21, 2}}, // → lanes 20–24 +} + +// D-value registers, indexed by lane%5. +var dReg = [5]string{"R14", "R15", "BP", "SI", "DX"} + +const ( + fsize = 200 + rateLanes = 17 // rate / 8 = 136 / 8 = 17 lanes +) + +var p func(string, ...any) + +func main() { + f, err := os.Create("keccakf_amd64_bmi2.s") + if err != nil { + panic(err) + } + defer f.Close() + p = func(format string, args ...any) { fmt.Fprintf(f, format+"\n", args...) } + + p("// Code generated by gen_keccakf_bmi2.go. DO NOT EDIT.") + p("") + p("//go:build amd64 && !purego") + p("") + p("#include \"textflag.h\"") + p("") + + // Single function: keccakF1600BMI2(a *[200]byte, buf *byte) + // When buf != nil, XORs rate bytes into state before permuting. + // When buf == nil, just permutes. + p("// func keccakF1600BMI2(a *[200]byte, buf *byte)") + p("TEXT ·keccakF1600BMI2(SB), NOSPLIT, $%d-16", fsize) + p("\tMOVQ a+0(FP), DI") + p("\tMOVQ buf+8(FP), BX") + p("\tTESTQ BX, BX") + p("\tJZ rounds") + p("") + p("\t// XOR %d lanes (%d bytes) of buf into state.", rateLanes, rateLanes*8) + for i := 0; i < rateLanes; i++ { + p("\tMOVQ %d(BX), AX", i*8) + p("\tXORQ AX, %d(DI)", i*8) + } + p("") + p("rounds:") + + for round := 0; round < 24; round++ { + p("") + p("\t// Round %d", round) + srcArray := (round % 2) == 0 + emitRound(srcArray, round) + } + + p("\tRET") +} + +// srcArray: true = source is array (DI), dest is stack (SP) +// +// false = source is stack (SP), dest is array (DI) +func emitRound(srcArray bool, round int) { + // Load round constant into R13. + p("\tMOVQ $0x%016x, R13", rc[round]) + + // Theta: 5 column parities → AX, BX, CX, DX, SI. + colR := [5]string{"AX", "BX", "CX", "DX", "SI"} + for c := 0; c < 5; c++ { + p("\tMOVQ %s, %s", off(c, srcArray), colR[c]) + for r := 1; r < 5; r++ { + p("\tXORQ %s, %s", off(r*5+c, srcArray), colR[c]) + } + } + + // D values: D[x] = C[(x+4)%5] ^ rol(C[(x+1)%5], 1). + // D[0..2] go directly into R14, R15, BP (no conflicts). + for _, x := range []int{0, 1, 2} { + p("\tRORXQ $63, %s, %s", colR[(x+1)%5], dReg[x]) + p("\tXORQ %s, %s", colR[(x+4)%5], dReg[x]) + } + // D[3] and D[4] target SI and DX, which still hold column parities + // C[4] and C[3] needed as inputs, so compute via temps first. + p("\tRORXQ $63, SI, R8") + p("\tXORQ CX, R8") + p("\tRORXQ $63, AX, R9") + p("\tXORQ DX, R9") + p("\tMOVQ R8, SI") // SI = D[3] + p("\tMOVQ R9, DX") // DX = D[4] + + // Five chi groups. + for g := 0; g < 5; g++ { + emitChi(g, srcArray, g == 0) + } +} + +func emitChi(g int, srcArray, first bool) { + B := [5]string{"R8", "R9", "R10", "R11", "R12"} + + // Load lane, XOR with D (register!), rotate. + for i := 0; i < 5; i++ { + l := groups[g][i] + p("\tMOVQ %s, %s", off(l.idx, srcArray), B[i]) + p("\tXORQ %s, %s", dReg[l.idx%5], B[i]) + if l.rot != 0 { + p("\tRORXQ $%d, %s, %s", 64-l.rot, B[i], B[i]) + } + } + + // Chi: out[j] = B[j] ^ (~B[(j+1)%5] & B[(j+2)%5]). + for j := 0; j < 5; j++ { + p("\tANDNQ %s, %s, AX", B[(j+2)%5], B[(j+1)%5]) + p("\tXORQ %s, AX", B[j]) + if first && j == 0 { + p("\tXORQ R13, AX") + } + p("\tMOVQ AX, %s", off(g*5+j, !srcArray)) + } +} + +// off returns the memory operand for lane idx. +func off(idx int, array bool) string { + o := idx * 8 + if array { + return fmt.Sprintf("%d(DI)", o) + } + return fmt.Sprintf("%d(SP)", o) +} diff --git a/crypto/keccak_ziren.go b/crypto/keccak_ziren.go index 8e967c6dbf..2429012e92 100644 --- a/crypto/keccak_ziren.go +++ b/crypto/keccak_ziren.go @@ -21,6 +21,7 @@ package crypto import ( "github.com/ProjectZKM/Ziren/crates/go-runtime/zkvm_runtime" "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/crypto/keccak" ) // zirenKeccakState implements the KeccakState interface using the Ziren zkvm_runtime. @@ -31,7 +32,7 @@ type zirenKeccakState struct { dirty bool // whether new data has been written since last hash } -func newZirenKeccakState() KeccakState { +func newZirenKeccakState() keccak.KeccakState { return &zirenKeccakState{ buf: make([]byte, 0, 512), // pre-allocate reasonable capacity } @@ -85,7 +86,7 @@ func (s *zirenKeccakState) computeHashIfNeeded() { // NewKeccakState creates a new KeccakState // This uses a Ziren-optimized implementation that leverages the zkvm_runtime.Keccak256 system call. -func NewKeccakState() KeccakState { +func NewKeccakState() keccak.KeccakState { return newZirenKeccakState() } diff --git a/trie/hasher.go b/trie/hasher.go index a2a1f5b662..9f84cd960d 100644 --- a/trie/hasher.go +++ b/trie/hasher.go @@ -22,13 +22,14 @@ import ( "sync" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/crypto/keccak" "github.com/ethereum/go-ethereum/rlp" ) // hasher is a type used for the trie Hash operation. A hasher has some // internal preallocated temp space type hasher struct { - sha crypto.KeccakState + sha keccak.KeccakState tmp []byte encbuf rlp.EncoderBuffer parallel bool // Whether to use parallel threads when hashing