From ca94cf12479f2a2c065215218f6a3f5fc7bfe1f4 Mon Sep 17 00:00:00 2001 From: Xueqi Luo <1824368278@qq.com> Date: Tue, 10 Feb 2026 08:10:32 +0000 Subject: [PATCH] cmd/compile/internal/ssa: add codegen for Zicond extension on riscv64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zicond is a mandatory extension in rva23u64. This patch converts certain branches to CondSelect and optimizes them to Zicond instructions on RISC-V in appropriate cases, along with additional optimization rules. Zicond can provide performance benefits on unpredictable branches by avoiding branch misprediction penalties. However, on simple predictable branches, zicond uses 4 instructions vs 2 for traditional branches, which can cause performance regressions. To avoid regressions, we keep CondSelect globally disabled for riscv64 and only enable it for the ConstantTimeSelect intrinsic, which has been shown to benefit from zicond: goos: linux goarch: riscv64 pkg: crypto/subtle CPU: SG2044 │ nozicond.txt │ zicond.txt │ │ sec/op │ sec/op vs base │ ConstantTimeSelect-44 2.325n ± 4% 1.750n ± 2% -24.69% (p=0.000 n=10) Future work can explore enabling zicond for other cases that can benefit from zicond. Follow-up to CL 631595 Updates #75350 Co-authored-by: wangpengcheng.pp@bytedance.com mengzhuo1203@gmail.com Change-Id: If5d9555980e0d1e26fa924974f88943eb86b050b GitHub-Last-Rev: 7a61508780953295f5507e5f927ab5be1d6afd91 GitHub-Pull-Request: golang/go#75577 Reviewed-on: https://go-review.googlesource.com/c/go/+/705996 Reviewed-by: Mark Freeman Reviewed-by: David Chase Reviewed-by: Meng Zhuo Reviewed-by: Joel Sing LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/riscv64/ssa.go | 3 +- .../compile/internal/ssa/_gen/RISCV64.rules | 19 + .../compile/internal/ssa/_gen/RISCV64Ops.go | 4 + src/cmd/compile/internal/ssa/opGen.go | 30 + .../compile/internal/ssa/rewriteRISCV64.go | 778 ++++++++++++++++++ src/cmd/compile/internal/ssagen/intrinsics.go | 7 +- .../internal/ssagen/intrinsics_test.go | 1 + 7 files changed, 839 insertions(+), 3 deletions(-) diff --git a/src/cmd/compile/internal/riscv64/ssa.go b/src/cmd/compile/internal/riscv64/ssa.go index 9aa77c3d02..2231219579 100644 --- a/src/cmd/compile/internal/riscv64/ssa.go +++ b/src/cmd/compile/internal/riscv64/ssa.go @@ -294,7 +294,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpRISCV64FADDD, ssa.OpRISCV64FSUBD, ssa.OpRISCV64FMULD, ssa.OpRISCV64FDIVD, ssa.OpRISCV64FEQD, ssa.OpRISCV64FNED, ssa.OpRISCV64FLTD, ssa.OpRISCV64FLED, ssa.OpRISCV64FSGNJD, ssa.OpRISCV64MIN, ssa.OpRISCV64MAX, ssa.OpRISCV64MINU, ssa.OpRISCV64MAXU, - ssa.OpRISCV64SH1ADD, ssa.OpRISCV64SH2ADD, ssa.OpRISCV64SH3ADD: + ssa.OpRISCV64SH1ADD, ssa.OpRISCV64SH2ADD, ssa.OpRISCV64SH3ADD, + ssa.OpRISCV64CZEROEQZ, ssa.OpRISCV64CZERONEZ: r := v.Reg() r1 := v.Args[0].Reg() r2 := v.Args[1].Reg() diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules index 13a8cab3b5..296b8fc2b4 100644 --- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules +++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules @@ -859,3 +859,22 @@ (Max64 x y) && buildcfg.GORISCV64 >= 22 => (MAX x y) (Min64u x y) && buildcfg.GORISCV64 >= 22 => (MINU x y) (Max64u x y) && buildcfg.GORISCV64 >= 22 => (MAXU x y) + +// Conditional selection (note that these will only be emitted for rva23u64 and above) +(CondSelect x y cond) => + (OR (CZEROEQZ x (MOVBUreg cond)) (CZERONEZ y (MOVBUreg cond))) +(CZERO(EQ|NE)Z x (SNEZ y)) => (CZERO(EQ|NE)Z x y) +(CZERO(EQ|NE)Z x (SEQZ y)) => (CZERO(NE|EQ)Z x y) +(CZEROEQZ x x) => x +(CZERONEZ x x) => (MOVDconst [0]) +(CZERO(EQ|NE)Z (MOVDconst [0]) _) => (MOVDconst [0]) + +// Optimize conditional arithmetic operations +(OR (CZEROEQZ x cond) (CZERONEZ ((ADD|SUB|OR|XOR|SUBW) x y) cond)) => ((ADD|SUB|OR|XOR|SUBW) x (CZERONEZ y cond)) +(OR (CZEROEQZ ((ADD|SUB|OR|XOR|SUBW) x y) cond) (CZERONEZ x cond)) => ((ADD|SUB|OR|XOR|SUBW) x (CZEROEQZ y cond)) +(OR x:(CZEROEQZ z cond) (CZERONEZ y:(AND z _) cond)) => (OR y x) +(OR (CZEROEQZ x:(AND z _) cond) y:(CZERONEZ z cond)) => (OR x y) +(OR x:(CZEROEQZ z cond) (CZERONEZ y:(ANDI [c] z) cond)) => (OR y x) +(OR (CZEROEQZ x:(ANDI [c] z) cond) y:(CZERONEZ z cond)) => (OR x y) +(OR (CZEROEQZ x cond) (CZERONEZ ((ADDI|ORI|XORI) [c] x) cond)) => ((ADD|OR|XOR) x (CZERONEZ (MOVDconst [c]) cond)) +(OR (CZEROEQZ ((ADDI|ORI|XORI) [c] x) cond) (CZERONEZ x cond)) => ((ADD|OR|XOR) x (CZEROEQZ (MOVDconst [c]) cond)) diff --git a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go index 68674211b0..cd7e8fb144 100644 --- a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go @@ -528,6 +528,10 @@ func init() { // ====+============================= {name: "FCLASSS", argLength: 1, reg: fpgp, asm: "FCLASSS", typ: "Int64"}, // classify float32 {name: "FCLASSD", argLength: 1, reg: fpgp, asm: "FCLASSD", typ: "Int64"}, // classify float64 + + // RISC-V Integer Conditional (Zicond) operations extension + {name: "CZEROEQZ", argLength: 2, reg: gp21, asm: "CZEROEQZ"}, // arg1 == 0 result is 0, else arg0 + {name: "CZERONEZ", argLength: 2, reg: gp21, asm: "CZERONEZ"}, // arg1 != 0 result is 0, else arg0 } RISCV64blocks := []blockData{ diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index f039139e4a..2014f9c60f 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -5356,6 +5356,8 @@ const ( OpRISCV64LoweredFMAXD OpRISCV64FCLASSS OpRISCV64FCLASSD + OpRISCV64CZEROEQZ + OpRISCV64CZERONEZ OpS390XFADDS OpS390XFADD @@ -81426,6 +81428,34 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "CZEROEQZ", + argLen: 2, + asm: riscv.ACZEROEQZ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + }, + outputs: []outputInfo{ + {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + }, + }, + }, + { + name: "CZERONEZ", + argLen: 2, + asm: riscv.ACZERONEZ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + {1, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + }, + outputs: []outputInfo{ + {0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30 + }, + }, + }, { name: "FADDS", diff --git a/src/cmd/compile/internal/ssa/rewriteRISCV64.go b/src/cmd/compile/internal/ssa/rewriteRISCV64.go index 284d88967b..1fa1cd8a48 100644 --- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go +++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go @@ -133,6 +133,8 @@ func rewriteValueRISCV64(v *Value) bool { case OpCom8: v.Op = OpRISCV64NOT return true + case OpCondSelect: + return rewriteValueRISCV64_OpCondSelect(v) case OpConst16: return rewriteValueRISCV64_OpConst16(v) case OpConst32: @@ -514,6 +516,10 @@ func rewriteValueRISCV64(v *Value) bool { return rewriteValueRISCV64_OpRISCV64AND(v) case OpRISCV64ANDI: return rewriteValueRISCV64_OpRISCV64ANDI(v) + case OpRISCV64CZEROEQZ: + return rewriteValueRISCV64_OpRISCV64CZEROEQZ(v) + case OpRISCV64CZERONEZ: + return rewriteValueRISCV64_OpRISCV64CZERONEZ(v) case OpRISCV64FADDD: return rewriteValueRISCV64_OpRISCV64FADDD(v) case OpRISCV64FADDS: @@ -1081,6 +1087,30 @@ func rewriteValueRISCV64_OpBswap32(v *Value) bool { return true } } +func rewriteValueRISCV64_OpCondSelect(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (CondSelect x y cond) + // result: (OR (CZEROEQZ x (MOVBUreg cond)) (CZERONEZ y (MOVBUreg cond))) + for { + t := v.Type + x := v_0 + y := v_1 + cond := v_2 + v.reset(OpRISCV64OR) + v0 := b.NewValue0(v.Pos, OpRISCV64CZEROEQZ, t) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVBUreg, typ.UInt64) + v1.AddArg(cond) + v0.AddArg2(x, v1) + v2 := b.NewValue0(v.Pos, OpRISCV64CZERONEZ, t) + v2.AddArg2(y, v1) + v.AddArg2(v0, v2) + return true + } +} func rewriteValueRISCV64_OpConst16(v *Value) bool { // match: (Const16 [val]) // result: (MOVDconst [int64(val)]) @@ -3528,6 +3558,105 @@ func rewriteValueRISCV64_OpRISCV64ANDI(v *Value) bool { } return false } +func rewriteValueRISCV64_OpRISCV64CZEROEQZ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (CZEROEQZ x (SNEZ y)) + // result: (CZEROEQZ x y) + for { + x := v_0 + if v_1.Op != OpRISCV64SNEZ { + break + } + y := v_1.Args[0] + v.reset(OpRISCV64CZEROEQZ) + v.AddArg2(x, y) + return true + } + // match: (CZEROEQZ x (SEQZ y)) + // result: (CZERONEZ x y) + for { + x := v_0 + if v_1.Op != OpRISCV64SEQZ { + break + } + y := v_1.Args[0] + v.reset(OpRISCV64CZERONEZ) + v.AddArg2(x, y) + return true + } + // match: (CZEROEQZ x x) + // result: x + for { + x := v_0 + if x != v_1 { + break + } + v.copyOf(x) + return true + } + // match: (CZEROEQZ (MOVDconst [0]) _) + // result: (MOVDconst [0]) + for { + if v_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_0.AuxInt) != 0 { + break + } + v.reset(OpRISCV64MOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + return false +} +func rewriteValueRISCV64_OpRISCV64CZERONEZ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (CZERONEZ x (SNEZ y)) + // result: (CZERONEZ x y) + for { + x := v_0 + if v_1.Op != OpRISCV64SNEZ { + break + } + y := v_1.Args[0] + v.reset(OpRISCV64CZERONEZ) + v.AddArg2(x, y) + return true + } + // match: (CZERONEZ x (SEQZ y)) + // result: (CZEROEQZ x y) + for { + x := v_0 + if v_1.Op != OpRISCV64SEQZ { + break + } + y := v_1.Args[0] + v.reset(OpRISCV64CZEROEQZ) + v.AddArg2(x, y) + return true + } + // match: (CZERONEZ x x) + // result: (MOVDconst [0]) + for { + x := v_0 + if x != v_1 { + break + } + v.reset(OpRISCV64MOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + // match: (CZERONEZ (MOVDconst [0]) _) + // result: (MOVDconst [0]) + for { + if v_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_0.AuxInt) != 0 { + break + } + v.reset(OpRISCV64MOVDconst) + v.AuxInt = int64ToAuxInt(0) + return true + } + return false +} func rewriteValueRISCV64_OpRISCV64FADDD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -6925,6 +7054,8 @@ func rewriteValueRISCV64_OpRISCV64NEGW(v *Value) bool { func rewriteValueRISCV64_OpRISCV64OR(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types // match: (OR (MOVDconst [val]) x) // cond: is32Bit(val) // result: (ORI [val] x) @@ -6955,6 +7086,653 @@ func rewriteValueRISCV64_OpRISCV64OR(v *Value) bool { v.copyOf(x) return true } + // match: (OR (CZEROEQZ x cond) (CZERONEZ (ADD x y) cond)) + // result: (ADD x (CZERONEZ y cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpRISCV64ADD { + continue + } + _ = v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + v_1_0_1 := v_1_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0_0, v_1_0_1 = _i1+1, v_1_0_1, v_1_0_0 { + if x != v_1_0_0 { + continue + } + y := v_1_0_1 + if cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64ADD) + v0 := b.NewValue0(v.Pos, OpRISCV64CZERONEZ, t) + v0.AddArg2(y, cond) + v.AddArg2(x, v0) + return true + } + } + break + } + // match: (OR (CZEROEQZ x cond) (CZERONEZ (SUB x y) cond)) + // result: (SUB x (CZERONEZ y cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpRISCV64SUB { + continue + } + y := v_1_0.Args[1] + if x != v_1_0.Args[0] || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64SUB) + v0 := b.NewValue0(v.Pos, OpRISCV64CZERONEZ, t) + v0.AddArg2(y, cond) + v.AddArg2(x, v0) + return true + } + break + } + // match: (OR (CZEROEQZ x cond) (CZERONEZ (OR x y) cond)) + // result: (OR x (CZERONEZ y cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpRISCV64OR { + continue + } + _ = v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + v_1_0_1 := v_1_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0_0, v_1_0_1 = _i1+1, v_1_0_1, v_1_0_0 { + if x != v_1_0_0 { + continue + } + y := v_1_0_1 + if cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64OR) + v0 := b.NewValue0(v.Pos, OpRISCV64CZERONEZ, t) + v0.AddArg2(y, cond) + v.AddArg2(x, v0) + return true + } + } + break + } + // match: (OR (CZEROEQZ x cond) (CZERONEZ (XOR x y) cond)) + // result: (XOR x (CZERONEZ y cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpRISCV64XOR { + continue + } + _ = v_1_0.Args[1] + v_1_0_0 := v_1_0.Args[0] + v_1_0_1 := v_1_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_1_0_0, v_1_0_1 = _i1+1, v_1_0_1, v_1_0_0 { + if x != v_1_0_0 { + continue + } + y := v_1_0_1 + if cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64XOR) + v0 := b.NewValue0(v.Pos, OpRISCV64CZERONEZ, t) + v0.AddArg2(y, cond) + v.AddArg2(x, v0) + return true + } + } + break + } + // match: (OR (CZEROEQZ x cond) (CZERONEZ (SUBW x y) cond)) + // result: (SUBW x (CZERONEZ y cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpRISCV64SUBW { + continue + } + y := v_1_0.Args[1] + if x != v_1_0.Args[0] || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64SUBW) + v0 := b.NewValue0(v.Pos, OpRISCV64CZERONEZ, t) + v0.AddArg2(y, cond) + v.AddArg2(x, v0) + return true + } + break + } + // match: (OR (CZEROEQZ (ADD x y) cond) (CZERONEZ x cond)) + // result: (ADD x (CZEROEQZ y cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpRISCV64ADD { + continue + } + _ = v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + v_0_0_1 := v_0_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_0_0_0, v_0_0_1 = _i1+1, v_0_0_1, v_0_0_0 { + x := v_0_0_0 + y := v_0_0_1 + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64ADD) + v0 := b.NewValue0(v.Pos, OpRISCV64CZEROEQZ, t) + v0.AddArg2(y, cond) + v.AddArg2(x, v0) + return true + } + } + break + } + // match: (OR (CZEROEQZ (SUB x y) cond) (CZERONEZ x cond)) + // result: (SUB x (CZEROEQZ y cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpRISCV64SUB { + continue + } + y := v_0_0.Args[1] + x := v_0_0.Args[0] + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64SUB) + v0 := b.NewValue0(v.Pos, OpRISCV64CZEROEQZ, t) + v0.AddArg2(y, cond) + v.AddArg2(x, v0) + return true + } + break + } + // match: (OR (CZEROEQZ (OR x y) cond) (CZERONEZ x cond)) + // result: (OR x (CZEROEQZ y cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpRISCV64OR { + continue + } + _ = v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + v_0_0_1 := v_0_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_0_0_0, v_0_0_1 = _i1+1, v_0_0_1, v_0_0_0 { + x := v_0_0_0 + y := v_0_0_1 + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64OR) + v0 := b.NewValue0(v.Pos, OpRISCV64CZEROEQZ, t) + v0.AddArg2(y, cond) + v.AddArg2(x, v0) + return true + } + } + break + } + // match: (OR (CZEROEQZ (XOR x y) cond) (CZERONEZ x cond)) + // result: (XOR x (CZEROEQZ y cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpRISCV64XOR { + continue + } + _ = v_0_0.Args[1] + v_0_0_0 := v_0_0.Args[0] + v_0_0_1 := v_0_0.Args[1] + for _i1 := 0; _i1 <= 1; _i1, v_0_0_0, v_0_0_1 = _i1+1, v_0_0_1, v_0_0_0 { + x := v_0_0_0 + y := v_0_0_1 + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64XOR) + v0 := b.NewValue0(v.Pos, OpRISCV64CZEROEQZ, t) + v0.AddArg2(y, cond) + v.AddArg2(x, v0) + return true + } + } + break + } + // match: (OR (CZEROEQZ (SUBW x y) cond) (CZERONEZ x cond)) + // result: (SUBW x (CZEROEQZ y cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpRISCV64SUBW { + continue + } + y := v_0_0.Args[1] + x := v_0_0.Args[0] + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64SUBW) + v0 := b.NewValue0(v.Pos, OpRISCV64CZEROEQZ, t) + v0.AddArg2(y, cond) + v.AddArg2(x, v0) + return true + } + break + } + // match: (OR x:(CZEROEQZ z cond) (CZERONEZ y:(AND z _) cond)) + // result: (OR y x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if x.Op != OpRISCV64CZEROEQZ { + continue + } + cond := x.Args[1] + z := x.Args[0] + if v_1.Op != OpRISCV64CZERONEZ { + continue + } + _ = v_1.Args[1] + y := v_1.Args[0] + if y.Op != OpRISCV64AND { + continue + } + y_0 := y.Args[0] + y_1 := y.Args[1] + for _i1 := 0; _i1 <= 1; _i1, y_0, y_1 = _i1+1, y_1, y_0 { + if z != y_0 || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64OR) + v.AddArg2(y, x) + return true + } + } + break + } + // match: (OR (CZEROEQZ x:(AND z _) cond) y:(CZERONEZ z cond)) + // result: (OR x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + cond := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpRISCV64AND { + continue + } + x_0 := x.Args[0] + x_1 := x.Args[1] + for _i1 := 0; _i1 <= 1; _i1, x_0, x_1 = _i1+1, x_1, x_0 { + z := x_0 + y := v_1 + if y.Op != OpRISCV64CZERONEZ { + continue + } + _ = y.Args[1] + if z != y.Args[0] || cond != y.Args[1] { + continue + } + v.reset(OpRISCV64OR) + v.AddArg2(x, y) + return true + } + } + break + } + // match: (OR x:(CZEROEQZ z cond) (CZERONEZ y:(ANDI [c] z) cond)) + // result: (OR y x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if x.Op != OpRISCV64CZEROEQZ { + continue + } + cond := x.Args[1] + z := x.Args[0] + if v_1.Op != OpRISCV64CZERONEZ { + continue + } + _ = v_1.Args[1] + y := v_1.Args[0] + if y.Op != OpRISCV64ANDI { + continue + } + if z != y.Args[0] || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64OR) + v.AddArg2(y, x) + return true + } + break + } + // match: (OR (CZEROEQZ x:(ANDI [c] z) cond) y:(CZERONEZ z cond)) + // result: (OR x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + cond := v_0.Args[1] + x := v_0.Args[0] + if x.Op != OpRISCV64ANDI { + continue + } + z := x.Args[0] + y := v_1 + if y.Op != OpRISCV64CZERONEZ { + continue + } + _ = y.Args[1] + if z != y.Args[0] || cond != y.Args[1] { + continue + } + v.reset(OpRISCV64OR) + v.AddArg2(x, y) + return true + } + break + } + // match: (OR (CZEROEQZ x cond) (CZERONEZ (ADDI [c] x) cond)) + // result: (ADD x (CZERONEZ (MOVDconst [c]) cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpRISCV64ADDI { + continue + } + c := auxIntToInt64(v_1_0.AuxInt) + if x != v_1_0.Args[0] || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64ADD) + v0 := b.NewValue0(v.Pos, OpRISCV64CZERONEZ, t) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) + v1.AuxInt = int64ToAuxInt(c) + v0.AddArg2(v1, cond) + v.AddArg2(x, v0) + return true + } + break + } + // match: (OR (CZEROEQZ x cond) (CZERONEZ (ORI [c] x) cond)) + // result: (OR x (CZERONEZ (MOVDconst [c]) cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpRISCV64ORI { + continue + } + c := auxIntToInt64(v_1_0.AuxInt) + if x != v_1_0.Args[0] || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64OR) + v0 := b.NewValue0(v.Pos, OpRISCV64CZERONEZ, t) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) + v1.AuxInt = int64ToAuxInt(c) + v0.AddArg2(v1, cond) + v.AddArg2(x, v0) + return true + } + break + } + // match: (OR (CZEROEQZ x cond) (CZERONEZ (XORI [c] x) cond)) + // result: (XOR x (CZERONEZ (MOVDconst [c]) cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + x := v_0.Args[0] + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpRISCV64XORI { + continue + } + c := auxIntToInt64(v_1_0.AuxInt) + if x != v_1_0.Args[0] || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64XOR) + v0 := b.NewValue0(v.Pos, OpRISCV64CZERONEZ, t) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) + v1.AuxInt = int64ToAuxInt(c) + v0.AddArg2(v1, cond) + v.AddArg2(x, v0) + return true + } + break + } + // match: (OR (CZEROEQZ (ADDI [c] x) cond) (CZERONEZ x cond)) + // result: (ADD x (CZEROEQZ (MOVDconst [c]) cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpRISCV64ADDI { + continue + } + c := auxIntToInt64(v_0_0.AuxInt) + x := v_0_0.Args[0] + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64ADD) + v0 := b.NewValue0(v.Pos, OpRISCV64CZEROEQZ, t) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) + v1.AuxInt = int64ToAuxInt(c) + v0.AddArg2(v1, cond) + v.AddArg2(x, v0) + return true + } + break + } + // match: (OR (CZEROEQZ (ORI [c] x) cond) (CZERONEZ x cond)) + // result: (OR x (CZEROEQZ (MOVDconst [c]) cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpRISCV64ORI { + continue + } + c := auxIntToInt64(v_0_0.AuxInt) + x := v_0_0.Args[0] + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64OR) + v0 := b.NewValue0(v.Pos, OpRISCV64CZEROEQZ, t) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) + v1.AuxInt = int64ToAuxInt(c) + v0.AddArg2(v1, cond) + v.AddArg2(x, v0) + return true + } + break + } + // match: (OR (CZEROEQZ (XORI [c] x) cond) (CZERONEZ x cond)) + // result: (XOR x (CZEROEQZ (MOVDconst [c]) cond)) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpRISCV64CZEROEQZ { + continue + } + t := v_0.Type + cond := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpRISCV64XORI { + continue + } + c := auxIntToInt64(v_0_0.AuxInt) + x := v_0_0.Args[0] + if v_1.Op != OpRISCV64CZERONEZ || v_1.Type != t { + continue + } + _ = v_1.Args[1] + if x != v_1.Args[0] || cond != v_1.Args[1] { + continue + } + v.reset(OpRISCV64XOR) + v0 := b.NewValue0(v.Pos, OpRISCV64CZEROEQZ, t) + v1 := b.NewValue0(v.Pos, OpRISCV64MOVDconst, typ.UInt64) + v1.AuxInt = int64ToAuxInt(c) + v0.AddArg2(v1, cond) + v.AddArg2(x, v0) + return true + } + break + } return false } func rewriteValueRISCV64_OpRISCV64ORI(v *Value) bool { diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go index f6ece1fbe3..0657d5fc75 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics.go +++ b/src/cmd/compile/internal/ssagen/intrinsics.go @@ -1612,6 +1612,10 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { /******** crypto/internal/constanttime ********/ // We implement a superset of the Select promise: // Select returns x if v != 0 and y if v == 0. + hasCMOV := []*sys.Arch{sys.ArchAMD64, sys.ArchARM64, sys.ArchLoong64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchWasm} + if cfg.goriscv64 >= 23 { + hasCMOV = append(hasCMOV, sys.ArchRISCV64) + } add("crypto/internal/constanttime", "Select", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { v, x, y := args[0], args[1], args[2] @@ -1631,8 +1635,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { check := s.newValue2(checkOp, types.Types[types.TBOOL], zero, v) return s.newValue3(ssa.OpCondSelect, types.Types[types.TINT], x, y, check) - }, - sys.ArchAMD64, sys.ArchARM64, sys.ArchLoong64, sys.ArchPPC64, sys.ArchPPC64LE, sys.ArchWasm) // all with CMOV support. + }, hasCMOV...) // all with CMOV support. add("crypto/internal/constanttime", "boolToUint8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue1(ssa.OpCvtBoolToUint8, types.Types[types.TUINT8], args[0]) diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go index a85a8871e3..37058556dc 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics_test.go +++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go @@ -1204,6 +1204,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"riscv64", "sync/atomic", "SwapUint32"}: struct{}{}, {"riscv64", "sync/atomic", "SwapUint64"}: struct{}{}, {"riscv64", "sync/atomic", "SwapUintptr"}: struct{}{}, + {"riscv64", "crypto/internal/constanttime", "Select"}: struct{}{}, {"riscv64", "crypto/internal/constanttime", "boolToUint8"}: struct{}{}, {"s390x", "internal/runtime/atomic", "And"}: struct{}{}, {"s390x", "internal/runtime/atomic", "And8"}: struct{}{},