mirror of
https://github.com/ethereum/go-ethereum.git
synced 2026-02-26 15:47:21 +00:00
Merge 8deeb5778f into 406a852ec8
This commit is contained in:
commit
dea9d5132b
6 changed files with 240 additions and 25 deletions
|
|
@ -76,33 +76,15 @@ func safeANDBytes(dst, a, b []byte) int {
|
|||
|
||||
// ORBytes ors the bytes in a and b. The destination is assumed to have enough
|
||||
// space. Returns the number of bytes or'd.
|
||||
//
|
||||
// dst and x or y may overlap exactly or not at all,
|
||||
// otherwise ORBytes may panic.
|
||||
func ORBytes(dst, a, b []byte) int {
|
||||
if supportsUnaligned {
|
||||
return fastORBytes(dst, a, b)
|
||||
n := min(len(a), len(b))
|
||||
if inexactOverlap(dst[:n], a[:n]) || inexactOverlap(dst[:n], b[:n]) {
|
||||
panic("ORBytes: invalid overlap")
|
||||
}
|
||||
return safeORBytes(dst, a, b)
|
||||
}
|
||||
|
||||
// fastORBytes ors in bulk. It only works on architectures that support
|
||||
// unaligned read/writes.
|
||||
func fastORBytes(dst, a, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
w := n / wordSize
|
||||
if w > 0 {
|
||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
||||
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
||||
for i := 0; i < w; i++ {
|
||||
dw[i] = aw[i] | bw[i]
|
||||
}
|
||||
}
|
||||
for i := n - n%wordSize; i < n; i++ {
|
||||
dst[i] = a[i] | b[i]
|
||||
}
|
||||
return n
|
||||
return orBytes(dst, a, b)
|
||||
}
|
||||
|
||||
// safeORBytes ors one by one. It works on all architectures, independent if
|
||||
|
|
@ -157,3 +139,26 @@ func safeTestBytes(p []byte) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// anyOverlap reports whether x and y share memory at any (not necessarily
|
||||
// corresponding) index. The memory beyond the slice length is ignored.
|
||||
// from: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/alias/alias.go#L13-L17
|
||||
func anyOverlap(x, y []byte) bool {
|
||||
return len(x) > 0 && len(y) > 0 &&
|
||||
uintptr(unsafe.Pointer(&x[0])) <= uintptr(unsafe.Pointer(&y[len(y)-1])) &&
|
||||
uintptr(unsafe.Pointer(&y[0])) <= uintptr(unsafe.Pointer(&x[len(x)-1]))
|
||||
}
|
||||
|
||||
// inexactOverlap reports whether x and y share memory at any non-corresponding
|
||||
// index. The memory beyond the slice length is ignored. Note that x and y can
|
||||
// have different lengths and still not have any inexact overlap.
|
||||
//
|
||||
// inexactOverlap can be used to implement the requirements of the crypto/cipher
|
||||
// AEAD, Block, BlockMode and Stream interfaces.
|
||||
// from: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/alias/alias.go#L25-L30
|
||||
func inexactOverlap(x, y []byte) bool {
|
||||
if len(x) == 0 || len(y) == 0 || &x[0] == &y[0] {
|
||||
return false
|
||||
}
|
||||
return anyOverlap(x, y)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -104,6 +104,32 @@ func TestOR(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestORBytesInexactOverlap(t *testing.T) {
|
||||
shouldPanic := func(f func()) (ok bool) {
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
if r.(string) == "ORBytes: invalid overlap" {
|
||||
ok = true
|
||||
}
|
||||
}
|
||||
}()
|
||||
f()
|
||||
return
|
||||
}
|
||||
a := make([]byte, 5)
|
||||
if ok := shouldPanic(func() {
|
||||
ORBytes(a[1:4], a[0:3], make([]byte, 3))
|
||||
}); !ok {
|
||||
t.Error("expected panic on inexact overlap")
|
||||
}
|
||||
|
||||
if ok := shouldPanic(func() {
|
||||
ORBytes(a[1:4], make([]byte, 3), a[0:3])
|
||||
}); !ok {
|
||||
t.Error("expected panic on inexact overlap")
|
||||
}
|
||||
}
|
||||
|
||||
// Tests that bit testing works for various alignments.
|
||||
func TestTest(t *testing.T) {
|
||||
for align := 0; align < 2; align++ {
|
||||
|
|
|
|||
59
common/bitutil/or_amd64.s
Normal file
59
common/bitutil/or_amd64.s
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_amd64.s
|
||||
|
||||
//go:build !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func orBytesASM(dst, a, b *byte, n int)
|
||||
TEXT ·orBytesASM(SB), NOSPLIT, $0
|
||||
MOVQ dst+0(FP), BX
|
||||
MOVQ a+8(FP), SI
|
||||
MOVQ b+16(FP), CX
|
||||
MOVQ n+24(FP), DX
|
||||
TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned.
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, AX // position in slices
|
||||
|
||||
PCALIGN $16
|
||||
loop16b:
|
||||
MOVOU (SI)(AX*1), X0 // OR 16byte forwards.
|
||||
MOVOU (CX)(AX*1), X1
|
||||
POR X1, X0
|
||||
MOVOU X0, (BX)(AX*1)
|
||||
ADDQ $16, AX
|
||||
CMPQ DX, AX
|
||||
JNE loop16b
|
||||
RET
|
||||
|
||||
PCALIGN $16
|
||||
loop_1b:
|
||||
SUBQ $1, DX // OR 1byte backwards.
|
||||
MOVB (SI)(DX*1), DI
|
||||
MOVB (CX)(DX*1), AX
|
||||
ORB AX, DI
|
||||
MOVB DI, (BX)(DX*1)
|
||||
TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b.
|
||||
JNZ loop_1b
|
||||
CMPQ DX, $0 // if len is 0, ret.
|
||||
JE ret
|
||||
TESTQ $15, DX // AND 15 & len, if zero jump to aligned.
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b.
|
||||
JNE loop_1b
|
||||
SUBQ $8, DX // OR 8bytes backwards.
|
||||
MOVQ (SI)(DX*1), DI
|
||||
MOVQ (CX)(DX*1), AX
|
||||
ORQ AX, DI
|
||||
MOVQ DI, (BX)(DX*1)
|
||||
CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned.
|
||||
JGE aligned
|
||||
|
||||
ret:
|
||||
RET
|
||||
70
common/bitutil/or_arm64.s
Normal file
70
common/bitutil/or_arm64.s
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_arm64.s
|
||||
|
||||
//go:build !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func orBytesASM(dst, a, b *byte, n int)
|
||||
TEXT ·orBytesASM(SB), NOSPLIT|NOFRAME, $0
|
||||
MOVD dst+0(FP), R0
|
||||
MOVD a+8(FP), R1
|
||||
MOVD b+16(FP), R2
|
||||
MOVD n+24(FP), R3
|
||||
CMP $64, R3
|
||||
BLT tail
|
||||
loop_64:
|
||||
VLD1.P 64(R1), [V0.B16, V1.B16, V2.B16, V3.B16]
|
||||
VLD1.P 64(R2), [V4.B16, V5.B16, V6.B16, V7.B16]
|
||||
VORR V0.B16, V4.B16, V4.B16
|
||||
VORR V1.B16, V5.B16, V5.B16
|
||||
VORR V2.B16, V6.B16, V6.B16
|
||||
VORR V3.B16, V7.B16, V7.B16
|
||||
VST1.P [V4.B16, V5.B16, V6.B16, V7.B16], 64(R0)
|
||||
SUBS $64, R3
|
||||
CMP $64, R3
|
||||
BGE loop_64
|
||||
tail:
|
||||
// quick end
|
||||
CBZ R3, end
|
||||
TBZ $5, R3, less_than32
|
||||
VLD1.P 32(R1), [V0.B16, V1.B16]
|
||||
VLD1.P 32(R2), [V2.B16, V3.B16]
|
||||
VORR V0.B16, V2.B16, V2.B16
|
||||
VORR V1.B16, V3.B16, V3.B16
|
||||
VST1.P [V2.B16, V3.B16], 32(R0)
|
||||
less_than32:
|
||||
TBZ $4, R3, less_than16
|
||||
LDP.P 16(R1), (R11, R12)
|
||||
LDP.P 16(R2), (R13, R14)
|
||||
ORR R11, R13, R13
|
||||
ORR R12, R14, R14
|
||||
STP.P (R13, R14), 16(R0)
|
||||
less_than16:
|
||||
TBZ $3, R3, less_than8
|
||||
MOVD.P 8(R1), R11
|
||||
MOVD.P 8(R2), R12
|
||||
ORR R11, R12, R12
|
||||
MOVD.P R12, 8(R0)
|
||||
less_than8:
|
||||
TBZ $2, R3, less_than4
|
||||
MOVWU.P 4(R1), R13
|
||||
MOVWU.P 4(R2), R14
|
||||
ORRW R13, R14, R14
|
||||
MOVWU.P R14, 4(R0)
|
||||
less_than4:
|
||||
TBZ $1, R3, less_than2
|
||||
MOVHU.P 2(R1), R15
|
||||
MOVHU.P 2(R2), R16
|
||||
ORRW R15, R16, R16
|
||||
MOVHU.P R16, 2(R0)
|
||||
less_than2:
|
||||
TBZ $0, R3, end
|
||||
MOVBU (R1), R17
|
||||
MOVBU (R2), R19
|
||||
ORRW R17, R19, R19
|
||||
MOVBU R19, (R0)
|
||||
end:
|
||||
RET
|
||||
17
common/bitutil/or_asm.go
Normal file
17
common/bitutil/or_asm.go
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_asm.go
|
||||
|
||||
//go:build (amd64 || arm64) && !purego
|
||||
|
||||
package bitutil
|
||||
|
||||
func orBytes(dst, a, b []byte) int {
|
||||
n := min(len(a), len(b))
|
||||
orBytesASM(&dst[0], &a[0], &b[0], n)
|
||||
return len(a)
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func orBytesASM(dst, a, b *byte, n int)
|
||||
38
common/bitutil/or_generic.go
Normal file
38
common/bitutil/or_generic.go
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
// Copyright 2025 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build (!amd64 && !arm64) || purego
|
||||
|
||||
package bitutil
|
||||
|
||||
import "unsafe"
|
||||
|
||||
func orBytes(dst, a, b []byte) int {
|
||||
if supportsUnaligned {
|
||||
return fastORBytes(dst, a, b)
|
||||
}
|
||||
return safeORBytes(dst, a, b)
|
||||
}
|
||||
|
||||
// fastORBytes ors in bulk. It only works on architectures that support
|
||||
// unaligned read/writes.
|
||||
func fastORBytes(dst, a, b []byte) int {
|
||||
n := len(a)
|
||||
if len(b) < n {
|
||||
n = len(b)
|
||||
}
|
||||
w := n / wordSize
|
||||
if w > 0 {
|
||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
||||
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
||||
for i := 0; i < w; i++ {
|
||||
dw[i] = aw[i] | bw[i]
|
||||
}
|
||||
}
|
||||
for i := n - n%wordSize; i < n; i++ {
|
||||
dst[i] = a[i] | b[i]
|
||||
}
|
||||
return n
|
||||
}
|
||||
Loading…
Reference in a new issue