cmd/compile, simd/archsimd: add VPSRL immeidate peepholes

Before this CL, the simdgen contains a sign check to selectively enable
such rules for deduplication purposes. This left out `VPSRL` as it's
only available in unsigned form. This CL fixes that.

It looks like the previous documentation fix to SHA instruction might
not had run go generate, so this CL also contains the generated code for
that fix.

There is also a weird phantom import in
cmd/compile/internal/ssa/issue77582_test.go
This CL also fixes that

The trybot didn't complain?

Change-Id: Ibbf9f789c1a67af1474f0285ab376bc07f17667e
Reviewed-on: https://go-review.googlesource.com/c/go/+/748501
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Junyang Shao
2026-02-24 22:34:44 +00:00
parent ba057f7950
commit aa80d7a7e6
5 changed files with 722 additions and 4 deletions

View File

@@ -1067,14 +1067,23 @@
(ShiftAllRightInt64x8 ...) => (VPSRAQ512 ...)
(VPSRAQ512 x (MOVQconst [c])) => (VPSRAQ512const [uint8(c)] x)
(ShiftAllRightUint16x8 ...) => (VPSRLW128 ...)
(VPSRLW128 x (MOVQconst [c])) => (VPSRLW128const [uint8(c)] x)
(ShiftAllRightUint16x16 ...) => (VPSRLW256 ...)
(VPSRLW256 x (MOVQconst [c])) => (VPSRLW256const [uint8(c)] x)
(ShiftAllRightUint16x32 ...) => (VPSRLW512 ...)
(VPSRLW512 x (MOVQconst [c])) => (VPSRLW512const [uint8(c)] x)
(ShiftAllRightUint32x4 ...) => (VPSRLD128 ...)
(VPSRLD128 x (MOVQconst [c])) => (VPSRLD128const [uint8(c)] x)
(ShiftAllRightUint32x8 ...) => (VPSRLD256 ...)
(VPSRLD256 x (MOVQconst [c])) => (VPSRLD256const [uint8(c)] x)
(ShiftAllRightUint32x16 ...) => (VPSRLD512 ...)
(VPSRLD512 x (MOVQconst [c])) => (VPSRLD512const [uint8(c)] x)
(ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...)
(VPSRLQ128 x (MOVQconst [c])) => (VPSRLQ128const [uint8(c)] x)
(ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...)
(VPSRLQ256 x (MOVQconst [c])) => (VPSRLQ256const [uint8(c)] x)
(ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...)
(VPSRLQ512 x (MOVQconst [c])) => (VPSRLQ512const [uint8(c)] x)
(ShiftAllRightConcatInt16x8 ...) => (VPSHRDW128 ...)
(ShiftAllRightConcatInt16x16 ...) => (VPSHRDW256 ...)
(ShiftAllRightConcatInt16x32 ...) => (VPSHRDW512 ...)
@@ -1102,6 +1111,15 @@
(VPSRAQMasked128 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [uint8(c)] x mask)
(VPSRAQMasked256 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [uint8(c)] x mask)
(VPSRAQMasked512 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [uint8(c)] x mask)
(VPSRLWMasked128 x (MOVQconst [c]) mask) => (VPSRLWMasked128const [uint8(c)] x mask)
(VPSRLWMasked256 x (MOVQconst [c]) mask) => (VPSRLWMasked256const [uint8(c)] x mask)
(VPSRLWMasked512 x (MOVQconst [c]) mask) => (VPSRLWMasked512const [uint8(c)] x mask)
(VPSRLDMasked128 x (MOVQconst [c]) mask) => (VPSRLDMasked128const [uint8(c)] x mask)
(VPSRLDMasked256 x (MOVQconst [c]) mask) => (VPSRLDMasked256const [uint8(c)] x mask)
(VPSRLDMasked512 x (MOVQconst [c]) mask) => (VPSRLDMasked512const [uint8(c)] x mask)
(VPSRLQMasked128 x (MOVQconst [c]) mask) => (VPSRLQMasked128const [uint8(c)] x mask)
(VPSRLQMasked256 x (MOVQconst [c]) mask) => (VPSRLQMasked256const [uint8(c)] x mask)
(VPSRLQMasked512 x (MOVQconst [c]) mask) => (VPSRLQMasked512const [uint8(c)] x mask)
(ShiftLeftInt16x8 ...) => (VPSLLVW128 ...)
(ShiftLeftInt16x16 ...) => (VPSLLVW256 ...)
(ShiftLeftInt16x32 ...) => (VPSLLVW512 ...)
@@ -1960,6 +1978,15 @@
(VMOVDQU64Masked128 (VPSLLQ128const [a] x) mask) => (VPSLLQMasked128const [a] x mask)
(VMOVDQU64Masked256 (VPSLLQ256const [a] x) mask) => (VPSLLQMasked256const [a] x mask)
(VMOVDQU64Masked512 (VPSLLQ512const [a] x) mask) => (VPSLLQMasked512const [a] x mask)
(VMOVDQU16Masked128 (VPSRLW128const [a] x) mask) => (VPSRLWMasked128const [a] x mask)
(VMOVDQU16Masked256 (VPSRLW256const [a] x) mask) => (VPSRLWMasked256const [a] x mask)
(VMOVDQU16Masked512 (VPSRLW512const [a] x) mask) => (VPSRLWMasked512const [a] x mask)
(VMOVDQU32Masked128 (VPSRLD128const [a] x) mask) => (VPSRLDMasked128const [a] x mask)
(VMOVDQU32Masked256 (VPSRLD256const [a] x) mask) => (VPSRLDMasked256const [a] x mask)
(VMOVDQU32Masked512 (VPSRLD512const [a] x) mask) => (VPSRLDMasked512const [a] x mask)
(VMOVDQU64Masked128 (VPSRLQ128const [a] x) mask) => (VPSRLQMasked128const [a] x mask)
(VMOVDQU64Masked256 (VPSRLQ256const [a] x) mask) => (VPSRLQMasked256const [a] x mask)
(VMOVDQU64Masked512 (VPSRLQ512const [a] x) mask) => (VPSRLQMasked512const [a] x mask)
(VMOVDQU16Masked128 (VPSRAW128const [a] x) mask) => (VPSRAWMasked128const [a] x mask)
(VMOVDQU16Masked256 (VPSRAW256const [a] x) mask) => (VPSRAWMasked256const [a] x mask)
(VMOVDQU16Masked512 (VPSRAW512const [a] x) mask) => (VPSRAWMasked512const [a] x mask)
@@ -2024,6 +2051,7 @@
(VPBLENDMDMasked512 dst (VPSLLVD512 x y) mask) => (VPSLLVDMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VPSRAD512const [a] x) mask) => (VPSRADMasked512constMerging dst [a] x mask)
(VPBLENDMDMasked512 dst (VPSRAVD512 x y) mask) => (VPSRAVDMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VPSRLD512const [a] x) mask) => (VPSRLDMasked512constMerging dst [a] x mask)
(VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask) => (VPSRLVDMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VPSUBD512 x y) mask) => (VPSUBDMasked512Merging dst x y mask)
(VPBLENDMDMasked512 dst (VPXORD512 x y) mask) => (VPXORDMasked512Merging dst x y mask)
@@ -2078,6 +2106,7 @@
(VPBLENDMQMasked512 dst (VPSLLVQ512 x y) mask) => (VPSLLVQMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512constMerging dst [a] x mask)
(VPBLENDMQMasked512 dst (VPSRAVQ512 x y) mask) => (VPSRAVQMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VPSRLQ512const [a] x) mask) => (VPSRLQMasked512constMerging dst [a] x mask)
(VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask) => (VPSRLVQMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VPSUBQ512 x y) mask) => (VPSUBQMasked512Merging dst x y mask)
(VPBLENDMQMasked512 dst (VPXORQ512 x y) mask) => (VPXORQMasked512Merging dst x y mask)
@@ -2115,6 +2144,7 @@
(VPBLENDMWMasked512 dst (VPSRAVW512 x y) mask) => (VPSRAVWMasked512Merging dst x y mask)
(VPBLENDMWMasked512 dst (VPSRAW512const [a] x) mask) => (VPSRAWMasked512constMerging dst [a] x mask)
(VPBLENDMWMasked512 dst (VPSRLVW512 x y) mask) => (VPSRLVWMasked512Merging dst x y mask)
(VPBLENDMWMasked512 dst (VPSRLW512const [a] x) mask) => (VPSRLWMasked512constMerging dst [a] x mask)
(VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask) => (VPSUBSWMasked512Merging dst x y mask)
(VPBLENDMWMasked512 dst (VPSUBUSW512 x y) mask) => (VPSUBUSWMasked512Merging dst x y mask)
(VPBLENDMWMasked512 dst (VPSUBW512 x y) mask) => (VPSUBWMasked512Merging dst x y mask)
@@ -2288,9 +2318,12 @@
(VPBLENDVB128 dst (VPSRAVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRAVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRAW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRLD128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRLQ128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRLVD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRLVQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRLVW128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked128Merging dst x y (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSRLW128const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSUBB128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSUBD128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
(VPBLENDVB128 dst (VPSUBQ128 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked128Merging dst x y (VPMOVVec64x2ToM <types.TypeMask> mask))
@@ -2440,9 +2473,12 @@
(VPBLENDVB256 dst (VPSRAVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRAVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRAW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRAWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRLD256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRLQ256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRLVD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRLVQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRLVW256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLVWMasked256Merging dst x y (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSRLW256const [a] x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSRLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSUBB256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSUBD256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
(VPBLENDVB256 dst (VPSUBQ256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VPSUBQMasked256Merging dst x y (VPMOVVec64x4ToM <types.TypeMask> mask))

View File

@@ -1724,20 +1724,44 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64VPSRAWMasked256(v)
case OpAMD64VPSRAWMasked512:
return rewriteValueAMD64_OpAMD64VPSRAWMasked512(v)
case OpAMD64VPSRLD128:
return rewriteValueAMD64_OpAMD64VPSRLD128(v)
case OpAMD64VPSRLD256:
return rewriteValueAMD64_OpAMD64VPSRLD256(v)
case OpAMD64VPSRLD512:
return rewriteValueAMD64_OpAMD64VPSRLD512(v)
case OpAMD64VPSRLD512const:
return rewriteValueAMD64_OpAMD64VPSRLD512const(v)
case OpAMD64VPSRLDMasked128:
return rewriteValueAMD64_OpAMD64VPSRLDMasked128(v)
case OpAMD64VPSRLDMasked128const:
return rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v)
case OpAMD64VPSRLDMasked256:
return rewriteValueAMD64_OpAMD64VPSRLDMasked256(v)
case OpAMD64VPSRLDMasked256const:
return rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v)
case OpAMD64VPSRLDMasked512:
return rewriteValueAMD64_OpAMD64VPSRLDMasked512(v)
case OpAMD64VPSRLDMasked512const:
return rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v)
case OpAMD64VPSRLQ128:
return rewriteValueAMD64_OpAMD64VPSRLQ128(v)
case OpAMD64VPSRLQ256:
return rewriteValueAMD64_OpAMD64VPSRLQ256(v)
case OpAMD64VPSRLQ512:
return rewriteValueAMD64_OpAMD64VPSRLQ512(v)
case OpAMD64VPSRLQ512const:
return rewriteValueAMD64_OpAMD64VPSRLQ512const(v)
case OpAMD64VPSRLQMasked128:
return rewriteValueAMD64_OpAMD64VPSRLQMasked128(v)
case OpAMD64VPSRLQMasked128const:
return rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v)
case OpAMD64VPSRLQMasked256:
return rewriteValueAMD64_OpAMD64VPSRLQMasked256(v)
case OpAMD64VPSRLQMasked256const:
return rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v)
case OpAMD64VPSRLQMasked512:
return rewriteValueAMD64_OpAMD64VPSRLQMasked512(v)
case OpAMD64VPSRLQMasked512const:
return rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v)
case OpAMD64VPSRLVD512:
@@ -1756,6 +1780,18 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64VPSRLVQMasked256(v)
case OpAMD64VPSRLVQMasked512:
return rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v)
case OpAMD64VPSRLW128:
return rewriteValueAMD64_OpAMD64VPSRLW128(v)
case OpAMD64VPSRLW256:
return rewriteValueAMD64_OpAMD64VPSRLW256(v)
case OpAMD64VPSRLW512:
return rewriteValueAMD64_OpAMD64VPSRLW512(v)
case OpAMD64VPSRLWMasked128:
return rewriteValueAMD64_OpAMD64VPSRLWMasked128(v)
case OpAMD64VPSRLWMasked256:
return rewriteValueAMD64_OpAMD64VPSRLWMasked256(v)
case OpAMD64VPSRLWMasked512:
return rewriteValueAMD64_OpAMD64VPSRLWMasked512(v)
case OpAMD64VPSUBD512:
return rewriteValueAMD64_OpAMD64VPSUBD512(v)
case OpAMD64VPSUBDMasked128:
@@ -33642,6 +33678,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked128(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU16Masked128 (VPSRLW128const [a] x) mask)
// result: (VPSRLWMasked128const [a] x mask)
for {
if v_0.Op != OpAMD64VPSRLW128const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPSRLWMasked128const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU16Masked128 (VPSRAW128const [a] x) mask)
// result: (VPSRAWMasked128const [a] x mask)
for {
@@ -34230,6 +34280,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked256(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU16Masked256 (VPSRLW256const [a] x) mask)
// result: (VPSRLWMasked256const [a] x mask)
for {
if v_0.Op != OpAMD64VPSRLW256const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPSRLWMasked256const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU16Masked256 (VPSRAW256const [a] x) mask)
// result: (VPSRAWMasked256const [a] x mask)
for {
@@ -34746,6 +34810,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU16Masked512 (VPSRLW512const [a] x) mask)
// result: (VPSRLWMasked512const [a] x mask)
for {
if v_0.Op != OpAMD64VPSRLW512const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPSRLWMasked512const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU16Masked512 (VPSRAW512const [a] x) mask)
// result: (VPSRAWMasked512const [a] x mask)
for {
@@ -35505,6 +35583,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked128(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU32Masked128 (VPSRLD128const [a] x) mask)
// result: (VPSRLDMasked128const [a] x mask)
for {
if v_0.Op != OpAMD64VPSRLD128const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPSRLDMasked128const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU32Masked128 (VPSRAD128const [a] x) mask)
// result: (VPSRADMasked128const [a] x mask)
for {
@@ -36386,6 +36478,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked256(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU32Masked256 (VPSRLD256const [a] x) mask)
// result: (VPSRLDMasked256const [a] x mask)
for {
if v_0.Op != OpAMD64VPSRLD256const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPSRLDMasked256const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU32Masked256 (VPSRAD256const [a] x) mask)
// result: (VPSRADMasked256const [a] x mask)
for {
@@ -37271,6 +37377,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU32Masked512 (VPSRLD512const [a] x) mask)
// result: (VPSRLDMasked512const [a] x mask)
for {
if v_0.Op != OpAMD64VPSRLD512const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPSRLDMasked512const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU32Masked512 (VPSRAD512const [a] x) mask)
// result: (VPSRADMasked512const [a] x mask)
for {
@@ -38134,6 +38254,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked128(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU64Masked128 (VPSRLQ128const [a] x) mask)
// result: (VPSRLQMasked128const [a] x mask)
for {
if v_0.Op != OpAMD64VPSRLQ128const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPSRLQMasked128const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU64Masked128 (VPSRAQ128const [a] x) mask)
// result: (VPSRAQMasked128const [a] x mask)
for {
@@ -39011,6 +39145,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked256(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU64Masked256 (VPSRLQ256const [a] x) mask)
// result: (VPSRLQMasked256const [a] x mask)
for {
if v_0.Op != OpAMD64VPSRLQ256const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPSRLQMasked256const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU64Masked256 (VPSRAQ256const [a] x) mask)
// result: (VPSRAQMasked256const [a] x mask)
for {
@@ -39808,6 +39956,20 @@ func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool {
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU64Masked512 (VPSRLQ512const [a] x) mask)
// result: (VPSRLQMasked512const [a] x mask)
for {
if v_0.Op != OpAMD64VPSRLQ512const {
break
}
a := auxIntToUint8(v_0.AuxInt)
x := v_0.Args[0]
mask := v_1
v.reset(OpAMD64VPSRLQMasked512const)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg2(x, mask)
return true
}
// match: (VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask)
// result: (VPSRAQMasked512const [a] x mask)
for {
@@ -43835,6 +43997,21 @@ func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool {
v.AddArg4(dst, x, y, mask)
return true
}
// match: (VPBLENDMDMasked512 dst (VPSRLD512const [a] x) mask)
// result: (VPSRLDMasked512constMerging dst [a] x mask)
for {
dst := v_0
if v_1.Op != OpAMD64VPSRLD512const {
break
}
a := auxIntToUint8(v_1.AuxInt)
x := v_1.Args[0]
mask := v_2
v.reset(OpAMD64VPSRLDMasked512constMerging)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg3(dst, x, mask)
return true
}
// match: (VPBLENDMDMasked512 dst (VPSRLVD512 x y) mask)
// result: (VPSRLVDMasked512Merging dst x y mask)
for {
@@ -44606,6 +44783,21 @@ func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool {
v.AddArg4(dst, x, y, mask)
return true
}
// match: (VPBLENDMQMasked512 dst (VPSRLQ512const [a] x) mask)
// result: (VPSRLQMasked512constMerging dst [a] x mask)
for {
dst := v_0
if v_1.Op != OpAMD64VPSRLQ512const {
break
}
a := auxIntToUint8(v_1.AuxInt)
x := v_1.Args[0]
mask := v_2
v.reset(OpAMD64VPSRLQMasked512constMerging)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg3(dst, x, mask)
return true
}
// match: (VPBLENDMQMasked512 dst (VPSRLVQ512 x y) mask)
// result: (VPSRLVQMasked512Merging dst x y mask)
for {
@@ -45155,6 +45347,21 @@ func rewriteValueAMD64_OpAMD64VPBLENDMWMasked512(v *Value) bool {
v.AddArg4(dst, x, y, mask)
return true
}
// match: (VPBLENDMWMasked512 dst (VPSRLW512const [a] x) mask)
// result: (VPSRLWMasked512constMerging dst [a] x mask)
for {
dst := v_0
if v_1.Op != OpAMD64VPSRLW512const {
break
}
a := auxIntToUint8(v_1.AuxInt)
x := v_1.Args[0]
mask := v_2
v.reset(OpAMD64VPSRLWMasked512constMerging)
v.AuxInt = uint8ToAuxInt(a)
v.AddArg3(dst, x, mask)
return true
}
// match: (VPBLENDMWMasked512 dst (VPSUBSW512 x y) mask)
// result: (VPSUBSWMasked512Merging dst x y mask)
for {
@@ -48538,6 +48745,48 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
v.AddArg3(dst, x, v0)
return true
}
// match: (VPBLENDVB128 dst (VPSRLD128const [a] x) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPSRLDMasked128constMerging dst [a] x (VPMOVVec32x4ToM <types.TypeMask> mask))
for {
dst := v_0
if v_1.Op != OpAMD64VPSRLD128const {
break
}
a := auxIntToUint8(v_1.AuxInt)
x := v_1.Args[0]
mask := v_2
if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
break
}
v.reset(OpAMD64VPSRLDMasked128constMerging)
v.AuxInt = uint8ToAuxInt(a)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(dst, x, v0)
return true
}
// match: (VPBLENDVB128 dst (VPSRLQ128const [a] x) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPSRLQMasked128constMerging dst [a] x (VPMOVVec64x2ToM <types.TypeMask> mask))
for {
dst := v_0
if v_1.Op != OpAMD64VPSRLQ128const {
break
}
a := auxIntToUint8(v_1.AuxInt)
x := v_1.Args[0]
mask := v_2
if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
break
}
v.reset(OpAMD64VPSRLQMasked128constMerging)
v.AuxInt = uint8ToAuxInt(a)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(dst, x, v0)
return true
}
// match: (VPBLENDVB128 dst (VPSRLVD128 x y) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPSRLVDMasked128Merging dst x y (VPMOVVec32x4ToM <types.TypeMask> mask))
@@ -48598,6 +48847,27 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB128(v *Value) bool {
v.AddArg4(dst, x, y, v0)
return true
}
// match: (VPBLENDVB128 dst (VPSRLW128const [a] x) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPSRLWMasked128constMerging dst [a] x (VPMOVVec16x8ToM <types.TypeMask> mask))
for {
dst := v_0
if v_1.Op != OpAMD64VPSRLW128const {
break
}
a := auxIntToUint8(v_1.AuxInt)
x := v_1.Args[0]
mask := v_2
if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
break
}
v.reset(OpAMD64VPSRLWMasked128constMerging)
v.AuxInt = uint8ToAuxInt(a)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(dst, x, v0)
return true
}
// match: (VPBLENDVB128 dst (VPSUBB128 x y) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPSUBBMasked128Merging dst x y (VPMOVVec8x16ToM <types.TypeMask> mask))
@@ -51560,6 +51830,48 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
v.AddArg3(dst, x, v0)
return true
}
// match: (VPBLENDVB256 dst (VPSRLD256const [a] x) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPSRLDMasked256constMerging dst [a] x (VPMOVVec32x8ToM <types.TypeMask> mask))
for {
dst := v_0
if v_1.Op != OpAMD64VPSRLD256const {
break
}
a := auxIntToUint8(v_1.AuxInt)
x := v_1.Args[0]
mask := v_2
if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
break
}
v.reset(OpAMD64VPSRLDMasked256constMerging)
v.AuxInt = uint8ToAuxInt(a)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(dst, x, v0)
return true
}
// match: (VPBLENDVB256 dst (VPSRLQ256const [a] x) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPSRLQMasked256constMerging dst [a] x (VPMOVVec64x4ToM <types.TypeMask> mask))
for {
dst := v_0
if v_1.Op != OpAMD64VPSRLQ256const {
break
}
a := auxIntToUint8(v_1.AuxInt)
x := v_1.Args[0]
mask := v_2
if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
break
}
v.reset(OpAMD64VPSRLQMasked256constMerging)
v.AuxInt = uint8ToAuxInt(a)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(dst, x, v0)
return true
}
// match: (VPBLENDVB256 dst (VPSRLVD256 x y) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPSRLVDMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
@@ -51620,6 +51932,27 @@ func rewriteValueAMD64_OpAMD64VPBLENDVB256(v *Value) bool {
v.AddArg4(dst, x, y, v0)
return true
}
// match: (VPBLENDVB256 dst (VPSRLW256const [a] x) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPSRLWMasked256constMerging dst [a] x (VPMOVVec16x16ToM <types.TypeMask> mask))
for {
dst := v_0
if v_1.Op != OpAMD64VPSRLW256const {
break
}
a := auxIntToUint8(v_1.AuxInt)
x := v_1.Args[0]
mask := v_2
if !(v.Block.CPUfeatures.hasFeature(CPUavx512)) {
break
}
v.reset(OpAMD64VPSRLWMasked256constMerging)
v.AuxInt = uint8ToAuxInt(a)
v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask)
v0.AddArg(mask)
v.AddArg3(dst, x, v0)
return true
}
// match: (VPBLENDVB256 dst (VPSUBB256 x y) mask)
// cond: v.Block.CPUfeatures.hasFeature(CPUavx512)
// result: (VPSUBBMasked256Merging dst x y (VPMOVVec8x32ToM <types.TypeMask> mask))
@@ -61380,6 +61713,60 @@ func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLD128(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLD128 x (MOVQconst [c]))
// result: (VPSRLD128const [uint8(c)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
v.reset(OpAMD64VPSRLD128const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg(x)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLD256(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLD256 x (MOVQconst [c]))
// result: (VPSRLD256const [uint8(c)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
v.reset(OpAMD64VPSRLD256const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg(x)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLD512(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLD512 x (MOVQconst [c]))
// result: (VPSRLD512const [uint8(c)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
v.reset(OpAMD64VPSRLD512const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg(x)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
@@ -61406,6 +61793,26 @@ func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLDMasked128(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLDMasked128 x (MOVQconst [c]) mask)
// result: (VPSRLDMasked128const [uint8(c)] x mask)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
mask := v_2
v.reset(OpAMD64VPSRLDMasked128const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg2(x, mask)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -61434,6 +61841,26 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLDMasked256(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLDMasked256 x (MOVQconst [c]) mask)
// result: (VPSRLDMasked256const [uint8(c)] x mask)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
mask := v_2
v.reset(OpAMD64VPSRLDMasked256const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg2(x, mask)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -61462,6 +61889,26 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLDMasked512(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLDMasked512 x (MOVQconst [c]) mask)
// result: (VPSRLDMasked512const [uint8(c)] x mask)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
mask := v_2
v.reset(OpAMD64VPSRLDMasked512const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg2(x, mask)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -61490,6 +61937,60 @@ func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLQ128(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLQ128 x (MOVQconst [c]))
// result: (VPSRLQ128const [uint8(c)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
v.reset(OpAMD64VPSRLQ128const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg(x)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLQ256(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLQ256 x (MOVQconst [c]))
// result: (VPSRLQ256const [uint8(c)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
v.reset(OpAMD64VPSRLQ256const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg(x)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLQ512(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLQ512 x (MOVQconst [c]))
// result: (VPSRLQ512const [uint8(c)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
v.reset(OpAMD64VPSRLQ512const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg(x)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool {
v_0 := v.Args[0]
// match: (VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem))
@@ -61516,6 +62017,26 @@ func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLQMasked128(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLQMasked128 x (MOVQconst [c]) mask)
// result: (VPSRLQMasked128const [uint8(c)] x mask)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
mask := v_2
v.reset(OpAMD64VPSRLQMasked128const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg2(x, mask)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -61544,6 +62065,26 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLQMasked256(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLQMasked256 x (MOVQconst [c]) mask)
// result: (VPSRLQMasked256const [uint8(c)] x mask)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
mask := v_2
v.reset(OpAMD64VPSRLQMasked256const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg2(x, mask)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -61572,6 +62113,26 @@ func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLQMasked512(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLQMasked512 x (MOVQconst [c]) mask)
// result: (VPSRLQMasked512const [uint8(c)] x mask)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
mask := v_2
v.reset(OpAMD64VPSRLQMasked512const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg2(x, mask)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -61828,6 +62389,120 @@ func rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLW128(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLW128 x (MOVQconst [c]))
// result: (VPSRLW128const [uint8(c)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
v.reset(OpAMD64VPSRLW128const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg(x)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLW256(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLW256 x (MOVQconst [c]))
// result: (VPSRLW256const [uint8(c)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
v.reset(OpAMD64VPSRLW256const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg(x)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLW512(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLW512 x (MOVQconst [c]))
// result: (VPSRLW512const [uint8(c)] x)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
v.reset(OpAMD64VPSRLW512const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg(x)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLWMasked128(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLWMasked128 x (MOVQconst [c]) mask)
// result: (VPSRLWMasked128const [uint8(c)] x mask)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
mask := v_2
v.reset(OpAMD64VPSRLWMasked128const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg2(x, mask)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLWMasked256(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLWMasked256 x (MOVQconst [c]) mask)
// result: (VPSRLWMasked256const [uint8(c)] x mask)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
mask := v_2
v.reset(OpAMD64VPSRLWMasked256const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg2(x, mask)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSRLWMasked512(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (VPSRLWMasked512 x (MOVQconst [c]) mask)
// result: (VPSRLWMasked512const [uint8(c)] x mask)
for {
x := v_0
if v_1.Op != OpAMD64MOVQconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
mask := v_2
v.reset(OpAMD64VPSRLWMasked512const)
v.AuxInt = uint8ToAuxInt(uint8(c))
v.AddArg2(x, mask)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64VPSUBD512(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]

View File

@@ -129,7 +129,8 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
// asm -> masked merging rules
maskedMergeOpts := make(map[string]string)
s2n := map[int]string{8: "B", 16: "W", 32: "D", 64: "Q"}
asmCheck := map[string]bool{}
asmCheck := map[string]bool{} // for masked merge optimizations.
sftimmCheck := map[string]bool{} // deduplicate sftimm rules
var allData []tplRuleData
var optData []tplRuleData // for mask peephole optimizations, and other misc
var memOptData []tplRuleData // for memory peephole optimizations
@@ -229,8 +230,8 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer {
if gOp.SpecialLower != nil {
if *gOp.SpecialLower == "sftimm" {
if data.GoType[0] == 'I' {
// only do these for signed types, it is a duplicate rewrite for unsigned
if !sftimmCheck[data.Asm] {
sftimmCheck[data.Asm] = true
sftImmData := data
if tplName == "maskIn" {
sftImmData.tplName = "masksftimm"

View File

@@ -5359,7 +5359,7 @@ func (x Uint32x4) SHA1NextE(y Uint32x4) Uint32x4
/* SHA256Message1 */
// SHA256Message1 does the sigma and addtion of 1 in SHA256 algorithm defined in FIPS 180-4.
// SHA256Message1 does the sigma and addition of 1 in SHA256 algorithm defined in FIPS 180-4.
// x = {W0, W1, W2, W3}
// y = {W4, 0, 0, 0}
// result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)}

View File

@@ -104,3 +104,9 @@ func simdIsNaN512() {
c := a.Or(b)
c.ToInt64x8().StoreSlice(sinkInt64s)
}
func sftImmVPSRL() archsimd.Uint32x4 {
var x archsimd.Uint32x4
// amd64:`VPSRLD\s\$1,\s.*$`
return x.ShiftAllRight(1)
}