From 425a88193ca39e82dc3dbcae22b98dbdfd98a04c Mon Sep 17 00:00:00 2001 From: David Chase Date: Wed, 10 Dec 2025 16:50:21 -0500 Subject: [PATCH] cmd/compile: (arm64) optimize float32(round64(float64(x))) Not a fix because there are other architectures still to be done. Updates #75463. Change-Id: Ifca03975023e4e5d0ffa98d1f877314a1a291be0 Reviewed-on: https://go-review.googlesource.com/c/go/+/729161 Reviewed-by: Keith Randall Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI --- src/cmd/compile/internal/arm64/ssa.go | 8 +- src/cmd/compile/internal/ssa/_gen/ARM64.rules | 5 + src/cmd/compile/internal/ssa/_gen/ARM64Ops.go | 19 ++- src/cmd/compile/internal/ssa/opGen.go | 84 +++++++++++++ src/cmd/compile/internal/ssa/rewriteARM64.go | 111 ++++++++++++++++++ test/codegen/floats.go | 2 + 6 files changed, 222 insertions(+), 7 deletions(-) diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 74371104a3..031b4b6b4a 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -963,6 +963,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { case ssa.OpARM64MVN, ssa.OpARM64NEG, ssa.OpARM64FABSD, + ssa.OpARM64FABSS, ssa.OpARM64FMOVDfpgp, ssa.OpARM64FMOVDgpfp, ssa.OpARM64FMOVSfpgp, @@ -1001,7 +1002,12 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpARM64FRINTMD, ssa.OpARM64FRINTND, ssa.OpARM64FRINTPD, - ssa.OpARM64FRINTZD: + ssa.OpARM64FRINTZD, + ssa.OpARM64FRINTAS, + ssa.OpARM64FRINTMS, + ssa.OpARM64FRINTNS, + ssa.OpARM64FRINTPS, + ssa.OpARM64FRINTZS: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[0].Reg() diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules index 4ade43f1a1..9ac17827c1 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules @@ -543,6 +543,11 @@ // Optimizations +// Replace widen -> wide_unop -> narrow with narrow_unop when one exists. +(FCVTDS (F(ABS|SQRT|RINTP|RINTM|RINTA|RINTN|RINTZ)D (FCVTSD x))) => + (F(ABS|SQRT|RINTP|RINTM|RINTA|RINTN|RINTZ)S x) + + // Absorb boolean tests into block (NZ (Equal cc) yes no) => (EQ cc yes no) (NZ (NotEqual cc) yes no) => (NE cc yes no) diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go index c84b24cad1..8501c8c7c5 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go @@ -248,6 +248,7 @@ func init() { {name: "NEGSflags", argLength: 1, reg: gp11flags, typ: "(UInt64,Flags)", asm: "NEGS"}, // -arg0, set flags. {name: "NGCzerocarry", argLength: 1, reg: gp0flags1, typ: "UInt64", asm: "NGC"}, // -1 if borrowing, 0 otherwise. {name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD"}, // abs(arg0), float64 + {name: "FABSS", argLength: 1, reg: fp11, asm: "FABSS"}, // abs(arg0), float32 {name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"}, // -arg0, float32 {name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"}, // -arg0, float64 {name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"}, // sqrt(arg0), float64 @@ -495,12 +496,18 @@ func init() { {name: "FCVTSD", argLength: 1, reg: fp11, asm: "FCVTSD"}, // float32 -> float64 {name: "FCVTDS", argLength: 1, reg: fp11, asm: "FCVTDS"}, // float64 -> float32 - // floating-point round to integral - {name: "FRINTAD", argLength: 1, reg: fp11, asm: "FRINTAD"}, - {name: "FRINTMD", argLength: 1, reg: fp11, asm: "FRINTMD"}, - {name: "FRINTND", argLength: 1, reg: fp11, asm: "FRINTND"}, - {name: "FRINTPD", argLength: 1, reg: fp11, asm: "FRINTPD"}, - {name: "FRINTZD", argLength: 1, reg: fp11, asm: "FRINTZD"}, + // 64-bit floating-point round to integers in 64-bit FP format + {name: "FRINTAD", argLength: 1, reg: fp11, asm: "FRINTAD"}, // Round (ties Away from zero; 0.5 -> 1, -0.5 -> -1) + {name: "FRINTMD", argLength: 1, reg: fp11, asm: "FRINTMD"}, // Floor (towards Minus; 0.5 -> 0, -0.5 -> -1) + {name: "FRINTND", argLength: 1, reg: fp11, asm: "FRINTND"}, // Round (ties to even; ; 0.5 -> 0, 1.5 -> 2) + {name: "FRINTPD", argLength: 1, reg: fp11, asm: "FRINTPD"}, // Ceil (towards Positive; 0.5 -> 1, -0.5 -> 0) + {name: "FRINTZD", argLength: 1, reg: fp11, asm: "FRINTZD"}, // Trunc (towards Zero; 0.5 -> 0, -0.5 -> 0)) + // 32-bit floating-point round to integers in 32-bit FP format + {name: "FRINTAS", argLength: 1, reg: fp11, asm: "FRINTAS"}, // Round (ties Away from zero; 0.5 -> 1, -0.5 -> -1) + {name: "FRINTMS", argLength: 1, reg: fp11, asm: "FRINTMS"}, // Floor (towards Minus; 0.5 -> 0, -0.5 -> -1) + {name: "FRINTNS", argLength: 1, reg: fp11, asm: "FRINTNS"}, // Round (ties to even; ; 0.5 -> 0, 1.5 -> 2) + {name: "FRINTPS", argLength: 1, reg: fp11, asm: "FRINTPS"}, // Ceil (towards Positive; 0.5 -> 1, -0.5 -> 0) + {name: "FRINTZS", argLength: 1, reg: fp11, asm: "FRINTZS"}, // Trunc (towards Zero; 0.5 -> 0, -0.5 -> 0)) // conditional instructions; auxint is // one of the arm64 comparison pseudo-ops (LessThan, LessThanU, etc.) diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 153e38888f..d00c4c02ec 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -4189,6 +4189,7 @@ const ( OpARM64NEGSflags OpARM64NGCzerocarry OpARM64FABSD + OpARM64FABSS OpARM64FNEGS OpARM64FNEGD OpARM64FSQRTD @@ -4392,6 +4393,11 @@ const ( OpARM64FRINTND OpARM64FRINTPD OpARM64FRINTZD + OpARM64FRINTAS + OpARM64FRINTMS + OpARM64FRINTNS + OpARM64FRINTPS + OpARM64FRINTZS OpARM64CSEL OpARM64CSEL0 OpARM64CSINC @@ -65691,6 +65697,19 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "FABSS", + argLen: 1, + asm: arm64.AFABSS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + outputs: []outputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, { name: "FNEGS", argLen: 1, @@ -68477,6 +68496,71 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "FRINTAS", + argLen: 1, + asm: arm64.AFRINTAS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + outputs: []outputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, + { + name: "FRINTMS", + argLen: 1, + asm: arm64.AFRINTMS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + outputs: []outputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, + { + name: "FRINTNS", + argLen: 1, + asm: arm64.AFRINTNS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + outputs: []outputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, + { + name: "FRINTPS", + argLen: 1, + asm: arm64.AFRINTPS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + outputs: []outputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, + { + name: "FRINTZS", + argLen: 1, + asm: arm64.AFRINTZS, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + outputs: []outputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, { name: "CSEL", auxType: auxCCop, diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 25a1c9c0fc..7e109dbae3 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -106,6 +106,8 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpARM64FCMPD(v) case OpARM64FCMPS: return rewriteValueARM64_OpARM64FCMPS(v) + case OpARM64FCVTDS: + return rewriteValueARM64_OpARM64FCVTDS(v) case OpARM64FMOVDfpgp: return rewriteValueARM64_OpARM64FMOVDfpgp(v) case OpARM64FMOVDgpfp: @@ -4790,6 +4792,115 @@ func rewriteValueARM64_OpARM64FCMPS(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64FCVTDS(v *Value) bool { + v_0 := v.Args[0] + // match: (FCVTDS (FABSD (FCVTSD x))) + // result: (FABSS x) + for { + if v_0.Op != OpARM64FABSD { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpARM64FCVTSD { + break + } + x := v_0_0.Args[0] + v.reset(OpARM64FABSS) + v.AddArg(x) + return true + } + // match: (FCVTDS (FSQRTD (FCVTSD x))) + // result: (FSQRTS x) + for { + if v_0.Op != OpARM64FSQRTD { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpARM64FCVTSD { + break + } + x := v_0_0.Args[0] + v.reset(OpARM64FSQRTS) + v.AddArg(x) + return true + } + // match: (FCVTDS (FRINTPD (FCVTSD x))) + // result: (FRINTPS x) + for { + if v_0.Op != OpARM64FRINTPD { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpARM64FCVTSD { + break + } + x := v_0_0.Args[0] + v.reset(OpARM64FRINTPS) + v.AddArg(x) + return true + } + // match: (FCVTDS (FRINTMD (FCVTSD x))) + // result: (FRINTMS x) + for { + if v_0.Op != OpARM64FRINTMD { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpARM64FCVTSD { + break + } + x := v_0_0.Args[0] + v.reset(OpARM64FRINTMS) + v.AddArg(x) + return true + } + // match: (FCVTDS (FRINTAD (FCVTSD x))) + // result: (FRINTAS x) + for { + if v_0.Op != OpARM64FRINTAD { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpARM64FCVTSD { + break + } + x := v_0_0.Args[0] + v.reset(OpARM64FRINTAS) + v.AddArg(x) + return true + } + // match: (FCVTDS (FRINTND (FCVTSD x))) + // result: (FRINTNS x) + for { + if v_0.Op != OpARM64FRINTND { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpARM64FCVTSD { + break + } + x := v_0_0.Args[0] + v.reset(OpARM64FRINTNS) + v.AddArg(x) + return true + } + // match: (FCVTDS (FRINTZD (FCVTSD x))) + // result: (FRINTZS x) + for { + if v_0.Op != OpARM64FRINTZD { + break + } + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpARM64FCVTSD { + break + } + x := v_0_0.Args[0] + v.reset(OpARM64FRINTZS) + v.AddArg(x) + return true + } + return false +} func rewriteValueARM64_OpARM64FMOVDfpgp(v *Value) bool { v_0 := v.Args[0] b := v.Block diff --git a/test/codegen/floats.go b/test/codegen/floats.go index 21735ab19d..bf9e70d43e 100644 --- a/test/codegen/floats.go +++ b/test/codegen/floats.go @@ -282,11 +282,13 @@ func Float64ConstantStore(p *float64) { func WideCeilNarrow(x float32) float32 { // amd64/v3:"ROUNDSS" + // arm64:"FRINTPS" return float32(math.Ceil(float64(x))) } func WideTruncNarrow(x float32) float32 { // amd64/v3:"ROUNDSS" + // arm64:"FRINTZS" return float32(math.Trunc(float64(x))) }