mirror of
https://github.com/golang/go.git
synced 2026-04-03 01:40:30 +09:00
cmd/compile, simd/archsimd: add VPSRL immeidate peepholes
Before this CL, the simdgen contains a sign check to selectively enable such rules for deduplication purposes. This left out `VPSRL` as it's only available in unsigned form. This CL fixes that. It looks like the previous documentation fix to SHA instruction might not had run go generate, so this CL also contains the generated code for that fix. There is also a weird phantom import in cmd/compile/internal/ssa/issue77582_test.go This CL also fixes that The trybot didn't complain? Change-Id: Ibbf9f789c1a67af1474f0285ab376bc07f17667e Reviewed-on: https://go-review.googlesource.com/c/go/+/748501 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
@@ -1067,14 +1067,23 @@
|
||||
(ShiftAllRightInt64x8 ...) => (VPSRAQ512 ...)
|
||||
(VPSRAQ512 x (MOVQconst [c])) => (VPSRAQ512const [uint8(c)] x)
|
||||
(ShiftAllRightUint16x8 ...) => (VPSRLW128 ...)
|
||||
(VPSRLW128 x (MOVQconst [c])) => (VPSRLW128const [uint8(c)] x)
|
||||
(ShiftAllRightUint16x16 ...) => (VPSRLW256 ...)
|
||||
(VPSRLW256 x (MOVQconst [c])) => (VPSRLW256const [uint8(c)] x)
|
||||
(ShiftAllRightUint16x32 ...) => (VPSRLW512 ...)
|
||||
(VPSRLW512 x (MOVQconst [c])) => (VPSRLW512const [uint8(c)] x)
|
||||
(ShiftAllRightUint32x4 ...) => (VPSRLD128 ...)
|
||||
(VPSRLD128 x (MOVQconst [c])) => (VPSRLD128const [uint8(c)] x)
|
||||
(ShiftAllRightUint32x8 ...) => (VPSRLD256 ...)
|
||||
(VPSRLD256 x (MOVQconst [c])) => (VPSRLD256const [uint8(c)] x)
|
||||
(ShiftAllRightUint32x16 ...) => (VPSRLD512 ...)
|
||||
(VPSRLD512 x (MOVQconst [c])) => (VPSRLD512const [uint8(c)] x)
|
||||
(ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...)
|
||||
(VPSRLQ128 x (MOVQconst [c])) => (VPSRLQ128const [uint8(c)] x)
|
||||
(ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...)
|
||||
(VPSRLQ256 x (MOVQconst [c])) => (VPSRLQ256const [uint8(c)] x)
|
||||
(ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...)
|
||||
(VPSRLQ512 x (MOVQconst [c])) => (VPSRLQ512const [uint8(c)] x)
|
||||
(ShiftAllRightConcatInt16x8 ...) => (VPSHRDW128 ...)
|
||||
(ShiftAllRightConcatInt16x16 ...) => (VPSHRDW256 ...)
|
||||
(ShiftAllRightConcatInt16x32 ...) => (VPSHRDW512 ...)
|
||||
@@ -1102,6 +1111,15 @@
|
||||
(VPSRAQMasked128 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [uint8(c)] x mask)
|
||||
(VPSRAQMasked256 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [uint8(c)] x mask)
|
||||
(VPSRAQMasked512 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [uint8(c)] x mask)
|
||||
(VPSRLWMasked128 x (MOVQconst [c]) mask) => (VPSRLWMasked128const [uint8(c)] x mask)
|
||||
(VPSRLWMasked256 x (MOVQconst [c]) mask) => (VPSRLWMasked256const [uint8(c)] x mask)
|
||||
(VPSRLWMasked512 x (MOVQconst [c]) mask) => (VPSRLWMasked512const [uint8(c)] x mask)
|
||||
(VPSRLDMasked128 x (MOVQconst [c]) mask) => (VPSRLDMasked128const [uint8(c)] x mask)
|
||||
(VPSRLDMasked256 x (MOVQconst [c]) mask) => (VPSRLDMasked256const [uint8(c)] x mask)
|
||||
(VPSRLDMasked512 x (MOVQconst [c]) mask) => (VPSRLDMasked512const [uint8(c)] x mask)
|
||||
(VPSRLQMasked128 x (MOVQconst [c]) mask) => (VPSRLQMasked128const [uint8(c)] x mask)
|
||||
(VPSRLQMasked256 x (MOVQconst [c]) mask) => (VPSRLQMasked256const [uint8(c)] x mask)
|
||||
(VPSRLQMasked512 x (MOVQconst [c]) mask) => (VPSRLQMasked512const [uint8(c)] x mask)
|
||||
(ShiftLeftInt16x8 ...) => (VPSLLVW128 ...)
|
||||
(ShiftLeftInt16x16 ...) => (VPSLLVW256 ...)
|
||||
(ShiftLeftInt16x32 ...) => (VPSLLVW512 ...)
|
||||
@@ -1960,6 +1978,15 @@
|
||||
(VMOVDQU64Masked128 (VPSLLQ128const [a] x) mask) => (VPSLLQMasked128const [a] x mask)
|
||||
(VMOVDQU64Masked256 (VPSLLQ256const [a] x) mask) => (VPSLLQMasked256const [a] x mask)
|
||||
(VMOVDQU64Masked512 (VPSLLQ512const [a] x) mask) => (VPSLLQMasked512const [a] x mask)
|
||||
(VMOVDQU16Masked128 (VPSRLW128const [a] x) mask) => (VPSRLWMasked128const [a] x mask)
|
||||
(VMOVDQU16Masked256 (VPSRLW256const [a] x) mask) => (VPSRLWMasked256const [a] x mask)
|
||||
(VMOVDQU16Masked512 (VPSRLW512const [a] x) mask) => (VPSRLWMasked512const [a] x mask)
|
||||
(VMOVDQU32Masked128 (VPSRLD128const [a] x) mask) => (VPSRLDMasked128const [a] x mask)
|
||||
(VMOVDQU32Masked256 (VPSRLD256const [a] x) mask) => (VPSRLDMasked256const [a] x mask)
|
||||
(VMOVDQU32Masked512 (VPSRLD512const [a] x) mask) => (VPSRLDMasked512const [a] x mask)
|
||||
(VMOVDQU64Masked128 (VPSRLQ128const [a] x) mask) => (VPSRLQMasked128const [a] x mask)
|
||||
(VMOVDQU64Masked256 (VPSRLQ256const [a] x) mask) => (VPSRLQMasked256const [a] x mask)
|
||||
(VMOVDQU64Masked512 (VPSRLQ512const [a] x) mask) => (VPSRLQMasked512const [a] x mask)
|
||||
(VMOVDQU16Masked128 (VPSRAW128const [a] x) mask) => (VPSRAWMasked128const [a] x mask)
|
||||
(VMOVDQU16Masked256 (VPSRAW256const [a] x) mask) => (VPSRAWMasked256const [a] x mask)
|
||||
(VMOVDQU16Masked512 (VPSRAW512const [a] x) mask) => (VPSRAWMasked512const [a] x mask)
|
||||
@@ -2024,6 +2051,7 @@
|
||||
(VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) => (VPSLLVDMasked512Merging dst x y mask)
|
||||
(VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) => (VPSRADMasked512constMerging dst [a] x mask)
|
||||
(VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) => (VPSRAVDMasked512Merging dst x y mask)
|
||||
(VPBLENDMDMasked512 dst (VPSRLD512const [a] x) mask) => (VPSRLDMasked512constMerging dst [a] x mask)
|
||||
(VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) => (VPSRLVDMasked512Merging dst x y mask)
|
||||
(VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) => (VPSUBDMasked512Merging dst x y mask)
|
||||
(VPBLENDMDMasked512 dst (VPXORD512 x y) mask) => (VPXORDMasked512Merging dst x y mask)
|
||||
@@ -2078,6 +2106,7 @@
|
||||
(VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) => (VPSLLVQMasked512Merging dst x y mask)
|
||||
(VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512constMerging dst [a] x mask)
|
||||
(VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) => (VPSRAVQMasked512Merging dst x y mask)
|
||||
(VPBLENDMQMasked512 dst (VPSRLQ512const [a] x) mask) => (VPSRLQMasked512constMerging dst [a] x mask)
|
||||
(VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) => (VPSRLVQMasked512Merging dst x y mask)
|
||||
(VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) => (VPSUBQMasked512Merging dst x y mask)
|
||||
(VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) => (VPXORQMasked512Merging dst x y mask)
|
||||
@@ -2115,6 +2144,7 @@
|
||||
(VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask)
|
||||
(VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) => (VPSRAWMasked512constMerging dst [a] x mask)
|
||||
(VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) => (VPSRLVWMasked512Merging dst x y mask)
|
||||
(VPBLENDMWMasked512 dst (VPSRLW512const [a] x) mask) => (VPSRLWMasked512constMerging dst [a] x mask)
|
||||
(VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) => (VPSUBSWMasked512Merging dst x y mask)
|
||||
(VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512Merging dst x y mask)
|
||||
(VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) => (VPSUBWMasked512Merging dst x y mask)
|
||||
@@ -2288,9 +2318,12 @@
|
||||
(VPBLENDVB128 dst (VPSRAVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRAVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRAW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSRLW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSUBB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSUBD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB128 dst (VPSUBQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
@@ -2440,9 +2473,12 @@
|
||||
(VPBLENDVB256 dst (VPSRAVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRAVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRAW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSRLW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSUBB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSUBD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
(VPBLENDVB256 dst (VPSUBQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
|
||||
@@ -1724,20 +1724,44 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
return rewriteValueAMD64_OpAMD64VPSRAWMasked256(v)
|
||||
case OpAMD64VPSRAWMasked512:
|
||||
return rewriteValueAMD64_OpAMD64VPSRAWMasked512(v)
|
||||
case OpAMD64VPSRLD128:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLD128(v)
|
||||
case OpAMD64VPSRLD256:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLD256(v)
|
||||
case OpAMD64VPSRLD512:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLD512(v)
|
||||
case OpAMD64VPSRLD512const:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLD512const(v)
|
||||
case OpAMD64VPSRLDMasked128:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLDMasked128(v)
|
||||
case OpAMD64VPSRLDMasked128const:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v)
|
||||
case OpAMD64VPSRLDMasked256:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLDMasked256(v)
|
||||
case OpAMD64VPSRLDMasked256const:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v)
|
||||
case OpAMD64VPSRLDMasked512:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLDMasked512(v)
|
||||
case OpAMD64VPSRLDMasked512const:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v)
|
||||
case OpAMD64VPSRLQ128:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLQ128(v)
|
||||
case OpAMD64VPSRLQ256:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLQ256(v)
|
||||
case OpAMD64VPSRLQ512:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLQ512(v)
|
||||
case OpAMD64VPSRLQ512const:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLQ512const(v)
|
||||
case OpAMD64VPSRLQMasked128:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLQMasked128(v)
|
||||
case OpAMD64VPSRLQMasked128const:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v)
|
||||
case OpAMD64VPSRLQMasked256:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLQMasked256(v)
|
||||
case OpAMD64VPSRLQMasked256const:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v)
|
||||
case OpAMD64VPSRLQMasked512:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLQMasked512(v)
|
||||
case OpAMD64VPSRLQMasked512const:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v)
|
||||
case OpAMD64VPSRLVD512:
|
||||
@@ -1756,6 +1780,18 @@ func rewriteValueAMD64(v *Value) bool {
|
||||
return rewriteValueAMD64_OpAMD64VPSRLVQMasked256(v)
|
||||
case OpAMD64VPSRLVQMasked512:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v)
|
||||
case OpAMD64VPSRLW128:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLW128(v)
|
||||
case OpAMD64VPSRLW256:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLW256(v)
|
||||
case OpAMD64VPSRLW512:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLW512(v)
|
||||
case OpAMD64VPSRLWMasked128:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLWMasked128(v)
|
||||
case OpAMD64VPSRLWMasked256:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLWMasked256(v)
|
||||
case OpAMD64VPSRLWMasked512:
|
||||
return rewriteValueAMD64_OpAMD64VPSRLWMasked512(v)
|
||||
case OpAMD64VPSUBD512:
|
||||
return rewriteValueAMD64_OpAMD64VPSUBD512(v)
|
||||
case OpAMD64VPSUBDMasked128:
|
||||
@@ -33642,6 +33678,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool {
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU16Masked128 (VPSRLW128const [a] x) mask)
|
||||
// result: (VPSRLWMasked128const [a] x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPSRLW128const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_0.AuxInt)
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPSRLWMasked128const)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU16Masked128 (VPSRAW128const [a] x) mask)
|
||||
// result: (VPSRAWMasked128const [a] x mask)
|
||||
for {
|
||||
@@ -34230,6 +34280,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool {
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU16Masked256 (VPSRLW256const [a] x) mask)
|
||||
// result: (VPSRLWMasked256const [a] x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPSRLW256const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_0.AuxInt)
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPSRLWMasked256const)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU16Masked256 (VPSRAW256const [a] x) mask)
|
||||
// result: (VPSRAWMasked256const [a] x mask)
|
||||
for {
|
||||
@@ -34746,6 +34810,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool {
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU16Masked512 (VPSRLW512const [a] x) mask)
|
||||
// result: (VPSRLWMasked512const [a] x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPSRLW512const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_0.AuxInt)
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPSRLWMasked512const)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU16Masked512 (VPSRAW512const [a] x) mask)
|
||||
// result: (VPSRAWMasked512const [a] x mask)
|
||||
for {
|
||||
@@ -35505,6 +35583,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool {
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU32Masked128 (VPSRLD128const [a] x) mask)
|
||||
// result: (VPSRLDMasked128const [a] x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPSRLD128const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_0.AuxInt)
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPSRLDMasked128const)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU32Masked128 (VPSRAD128const [a] x) mask)
|
||||
// result: (VPSRADMasked128const [a] x mask)
|
||||
for {
|
||||
@@ -36386,6 +36478,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool {
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU32Masked256 (VPSRLD256const [a] x) mask)
|
||||
// result: (VPSRLDMasked256const [a] x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPSRLD256const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_0.AuxInt)
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPSRLDMasked256const)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU32Masked256 (VPSRAD256const [a] x) mask)
|
||||
// result: (VPSRADMasked256const [a] x mask)
|
||||
for {
|
||||
@@ -37271,6 +37377,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU32Masked512 (VPSRLD512const [a] x) mask)
|
||||
// result: (VPSRLDMasked512const [a] x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPSRLD512const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_0.AuxInt)
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPSRLDMasked512const)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU32Masked512 (VPSRAD512const [a] x) mask)
|
||||
// result: (VPSRADMasked512const [a] x mask)
|
||||
for {
|
||||
@@ -38134,6 +38254,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool {
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU64Masked128 (VPSRLQ128const [a] x) mask)
|
||||
// result: (VPSRLQMasked128const [a] x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPSRLQ128const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_0.AuxInt)
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPSRLQMasked128const)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU64Masked128 (VPSRAQ128const [a] x) mask)
|
||||
// result: (VPSRAQMasked128const [a] x mask)
|
||||
for {
|
||||
@@ -39011,6 +39145,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool {
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU64Masked256 (VPSRLQ256const [a] x) mask)
|
||||
// result: (VPSRLQMasked256const [a] x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPSRLQ256const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_0.AuxInt)
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPSRLQMasked256const)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU64Masked256 (VPSRAQ256const [a] x) mask)
|
||||
// result: (VPSRAQMasked256const [a] x mask)
|
||||
for {
|
||||
@@ -39808,6 +39956,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool {
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU64Masked512 (VPSRLQ512const [a] x) mask)
|
||||
// result: (VPSRLQMasked512const [a] x mask)
|
||||
for {
|
||||
if v_0.Op != OpAMD64VPSRLQ512const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_0.AuxInt)
|
||||
x := v_0.Args[0]
|
||||
mask := v_1
|
||||
v.reset(OpAMD64VPSRLQMasked512const)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask)
|
||||
// result: (VPSRAQMasked512const [a] x mask)
|
||||
for {
|
||||
@@ -43835,6 +43997,21 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool {
|
||||
v.AddArg4(dst, x, y, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDMDMasked512 dst (VPSRLD512const [a] x) mask)
|
||||
// result: (VPSRLDMasked512constMerging dst [a] x mask)
|
||||
for {
|
||||
dst := v_0
|
||||
if v_1.Op != OpAMD64VPSRLD512const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_1.AuxInt)
|
||||
x := v_1.Args[0]
|
||||
mask := v_2
|
||||
v.reset(OpAMD64VPSRLDMasked512constMerging)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v.AddArg3(dst, x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask)
|
||||
// result: (VPSRLVDMasked512Merging dst x y mask)
|
||||
for {
|
||||
@@ -44606,6 +44783,21 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool {
|
||||
v.AddArg4(dst, x, y, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDMQMasked512 dst (VPSRLQ512const [a] x) mask)
|
||||
// result: (VPSRLQMasked512constMerging dst [a] x mask)
|
||||
for {
|
||||
dst := v_0
|
||||
if v_1.Op != OpAMD64VPSRLQ512const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_1.AuxInt)
|
||||
x := v_1.Args[0]
|
||||
mask := v_2
|
||||
v.reset(OpAMD64VPSRLQMasked512constMerging)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v.AddArg3(dst, x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask)
|
||||
// result: (VPSRLVQMasked512Merging dst x y mask)
|
||||
for {
|
||||
@@ -45155,6 +45347,21 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool {
|
||||
v.AddArg4(dst, x, y, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDMWMasked512 dst (VPSRLW512const [a] x) mask)
|
||||
// result: (VPSRLWMasked512constMerging dst [a] x mask)
|
||||
for {
|
||||
dst := v_0
|
||||
if v_1.Op != OpAMD64VPSRLW512const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_1.AuxInt)
|
||||
x := v_1.Args[0]
|
||||
mask := v_2
|
||||
v.reset(OpAMD64VPSRLWMasked512constMerging)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v.AddArg3(dst, x, mask)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask)
|
||||
// result: (VPSUBSWMasked512Merging dst x y mask)
|
||||
for {
|
||||
@@ -48538,6 +48745,48 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
|
||||
v.AddArg3(dst, x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDVB128 dst (VPSRLD128const [a] x) mask)
|
||||
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
|
||||
// result: (VPSRLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
dst := v_0
|
||||
if v_1.Op != OpAMD64VPSRLD128const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_1.AuxInt)
|
||||
x := v_1.Args[0]
|
||||
mask := v_2
|
||||
if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VPSRLDMasked128constMerging)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg3(dst, x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDVB128 dst (VPSRLQ128const [a] x) mask)
|
||||
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
|
||||
// result: (VPSRLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
|
||||
for {
|
||||
dst := v_0
|
||||
if v_1.Op != OpAMD64VPSRLQ128const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_1.AuxInt)
|
||||
x := v_1.Args[0]
|
||||
mask := v_2
|
||||
if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VPSRLQMasked128constMerging)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg3(dst, x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDVB128 dst (VPSRLVD128 x y) mask)
|
||||
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
|
||||
// result: (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
|
||||
@@ -48598,6 +48847,27 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
|
||||
v.AddArg4(dst, x, y, v0)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDVB128 dst (VPSRLW128const [a] x) mask)
|
||||
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
|
||||
// result: (VPSRLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
dst := v_0
|
||||
if v_1.Op != OpAMD64VPSRLW128const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_1.AuxInt)
|
||||
x := v_1.Args[0]
|
||||
mask := v_2
|
||||
if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VPSRLWMasked128constMerging)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg3(dst, x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDVB128 dst (VPSUBB128 x y) mask)
|
||||
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
|
||||
// result: (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
|
||||
@@ -51560,6 +51830,48 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
|
||||
v.AddArg3(dst, x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDVB256 dst (VPSRLD256const [a] x) mask)
|
||||
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
|
||||
// result: (VPSRLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
for {
|
||||
dst := v_0
|
||||
if v_1.Op != OpAMD64VPSRLD256const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_1.AuxInt)
|
||||
x := v_1.Args[0]
|
||||
mask := v_2
|
||||
if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VPSRLDMasked256constMerging)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg3(dst, x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDVB256 dst (VPSRLQ256const [a] x) mask)
|
||||
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
|
||||
// result: (VPSRLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
|
||||
for {
|
||||
dst := v_0
|
||||
if v_1.Op != OpAMD64VPSRLQ256const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_1.AuxInt)
|
||||
x := v_1.Args[0]
|
||||
mask := v_2
|
||||
if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VPSRLQMasked256constMerging)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg3(dst, x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDVB256 dst (VPSRLVD256 x y) mask)
|
||||
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
|
||||
// result: (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
|
||||
@@ -51620,6 +51932,27 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
|
||||
v.AddArg4(dst, x, y, v0)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDVB256 dst (VPSRLW256const [a] x) mask)
|
||||
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
|
||||
// result: (VPSRLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
|
||||
for {
|
||||
dst := v_0
|
||||
if v_1.Op != OpAMD64VPSRLW256const {
|
||||
break
|
||||
}
|
||||
a := auxIntToUint8(v_1.AuxInt)
|
||||
x := v_1.Args[0]
|
||||
mask := v_2
|
||||
if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
|
||||
break
|
||||
}
|
||||
v.reset(OpAMD64VPSRLWMasked256constMerging)
|
||||
v.AuxInt = uint8ToAuxInt(a)
|
||||
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
|
||||
v0.AddArg(mask)
|
||||
v.AddArg3(dst, x, v0)
|
||||
return true
|
||||
}
|
||||
// match: (VPBLENDVB256 dst (VPSUBB256 x y) mask)
|
||||
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
|
||||
// result: (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
|
||||
@@ -61380,6 +61713,60 @@ func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLD128(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLD128 x (MOVQconst [c]))
|
||||
// result: (VPSRLD128const [uint8(c)] x)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
v.reset(OpAMD64VPSRLD128const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLD256(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLD256 x (MOVQconst [c]))
|
||||
// result: (VPSRLD256const [uint8(c)] x)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
v.reset(OpAMD64VPSRLD256const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLD512(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLD512 x (MOVQconst [c]))
|
||||
// result: (VPSRLD512const [uint8(c)] x)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
v.reset(OpAMD64VPSRLD512const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
|
||||
@@ -61406,6 +61793,26 @@ func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLDMasked128(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLDMasked128 x (MOVQconst [c]) mask)
|
||||
// result: (VPSRLDMasked128const [uint8(c)] x mask)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
mask := v_2
|
||||
v.reset(OpAMD64VPSRLDMasked128const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
@@ -61434,6 +61841,26 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLDMasked256(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLDMasked256 x (MOVQconst [c]) mask)
|
||||
// result: (VPSRLDMasked256const [uint8(c)] x mask)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
mask := v_2
|
||||
v.reset(OpAMD64VPSRLDMasked256const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
@@ -61462,6 +61889,26 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLDMasked512(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLDMasked512 x (MOVQconst [c]) mask)
|
||||
// result: (VPSRLDMasked512const [uint8(c)] x mask)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
mask := v_2
|
||||
v.reset(OpAMD64VPSRLDMasked512const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
@@ -61490,6 +61937,60 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLQ128(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLQ128 x (MOVQconst [c]))
|
||||
// result: (VPSRLQ128const [uint8(c)] x)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
v.reset(OpAMD64VPSRLQ128const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLQ256(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLQ256 x (MOVQconst [c]))
|
||||
// result: (VPSRLQ256const [uint8(c)] x)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
v.reset(OpAMD64VPSRLQ256const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLQ512(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLQ512 x (MOVQconst [c]))
|
||||
// result: (VPSRLQ512const [uint8(c)] x)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
v.reset(OpAMD64VPSRLQ512const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool {
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
|
||||
@@ -61516,6 +62017,26 @@ func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLQMasked128(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLQMasked128 x (MOVQconst [c]) mask)
|
||||
// result: (VPSRLQMasked128const [uint8(c)] x mask)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
mask := v_2
|
||||
v.reset(OpAMD64VPSRLQMasked128const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
@@ -61544,6 +62065,26 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLQMasked256(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLQMasked256 x (MOVQconst [c]) mask)
|
||||
// result: (VPSRLQMasked256const [uint8(c)] x mask)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
mask := v_2
|
||||
v.reset(OpAMD64VPSRLQMasked256const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
@@ -61572,6 +62113,26 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLQMasked512(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLQMasked512 x (MOVQconst [c]) mask)
|
||||
// result: (VPSRLQMasked512const [uint8(c)] x mask)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
mask := v_2
|
||||
v.reset(OpAMD64VPSRLQMasked512const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
@@ -61828,6 +62389,120 @@ func rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v *Value) bool {
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLW128(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLW128 x (MOVQconst [c]))
|
||||
// result: (VPSRLW128const [uint8(c)] x)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
v.reset(OpAMD64VPSRLW128const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLW256(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLW256 x (MOVQconst [c]))
|
||||
// result: (VPSRLW256const [uint8(c)] x)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
v.reset(OpAMD64VPSRLW256const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLW512(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLW512 x (MOVQconst [c]))
|
||||
// result: (VPSRLW512const [uint8(c)] x)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
v.reset(OpAMD64VPSRLW512const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLWMasked128(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLWMasked128 x (MOVQconst [c]) mask)
|
||||
// result: (VPSRLWMasked128const [uint8(c)] x mask)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
mask := v_2
|
||||
v.reset(OpAMD64VPSRLWMasked128const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLWMasked256(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLWMasked256 x (MOVQconst [c]) mask)
|
||||
// result: (VPSRLWMasked256const [uint8(c)] x mask)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
mask := v_2
|
||||
v.reset(OpAMD64VPSRLWMasked256const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSRLWMasked512(v *Value) bool {
|
||||
v_2 := v.Args[2]
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
// match: (VPSRLWMasked512 x (MOVQconst [c]) mask)
|
||||
// result: (VPSRLWMasked512const [uint8(c)] x mask)
|
||||
for {
|
||||
x := v_0
|
||||
if v_1.Op != OpAMD64MOVQconst {
|
||||
break
|
||||
}
|
||||
c := auxIntToInt64(v_1.AuxInt)
|
||||
mask := v_2
|
||||
v.reset(OpAMD64VPSRLWMasked512const)
|
||||
v.AuxInt = uint8ToAuxInt(uint8(c))
|
||||
v.AddArg2(x, mask)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64VPSUBD512(v *Value) bool {
|
||||
v_1 := v.Args[1]
|
||||
v_0 := v.Args[0]
|
||||
|
||||
@@ -129,7 +129,8 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
|
||||
// asm -> masked merging rules
|
||||
maskedMergeOpts := make(map[string]string)
|
||||
s2n := map[int]string{8: "B", 16: "W", 32: "D", 64: "Q"}
|
||||
asmCheck := map[string]bool{}
|
||||
asmCheck := map[string]bool{} // for masked merge optimizations.
|
||||
sftimmCheck := map[string]bool{} // deduplicate sftimm rules
|
||||
var allData []tplRuleData
|
||||
var optData []tplRuleData // for mask peephole optimizations, and other misc
|
||||
var memOptData []tplRuleData // for memory peephole optimizations
|
||||
@@ -229,8 +230,8 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
|
||||
|
||||
if gOp.SpecialLower != nil {
|
||||
if *gOp.SpecialLower == "sftimm" {
|
||||
if data.GoType[0] == 'I' {
|
||||
// only do these for signed types, it is a duplicate rewrite for unsigned
|
||||
if !sftimmCheck[data.Asm] {
|
||||
sftimmCheck[data.Asm] = true
|
||||
sftImmData := data
|
||||
if tplName == "maskIn" {
|
||||
sftImmData.tplName = "masksftimm"
|
||||
|
||||
@@ -5359,7 +5359,7 @@ func (x Uint32x4) SHA1NextE(y Uint32x4) Uint32x4
|
||||
|
||||
/* SHA256Message1 */
|
||||
|
||||
// SHA256Message1 does the sigma and addtion of 1 in SHA256 algorithm defined in FIPS 180-4.
|
||||
// SHA256Message1 does the sigma and addition of 1 in SHA256 algorithm defined in FIPS 180-4.
|
||||
// x = {W0, W1, W2, W3}
|
||||
// y = {W4, 0, 0, 0}
|
||||
// result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)}
|
||||
|
||||
@@ -104,3 +104,9 @@ func simdIsNaN512() {
|
||||
c := a.Or(b)
|
||||
c.ToInt64x8().StoreSlice(sinkInt64s)
|
||||
}
|
||||
|
||||
func sftImmVPSRL() archsimd.Uint32x4 {
|
||||
var x archsimd.Uint32x4
|
||||
// amd64:`VPSRLD\s\$1,\s.*$`
|
||||
return x.ShiftAllRight(1)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user