cmd/compile: (arm64) optimize float32(round64(float64(x)))

Not a fix because there are other architectures
still to be done.

Updates #75463.

Change-Id: Ifca03975023e4e5d0ffa98d1f877314a1a291be0
Reviewed-on: https://go-review.googlesource.com/c/go/+/729161
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
David Chase
2025-12-10 16:50:21 -05:00
parent 7336381cd1
commit 425a88193c
6 changed files with 222 additions and 7 deletions

View File

@@ -963,6 +963,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
case ssa.OpARM64MVN,
ssa.OpARM64NEG,
ssa.OpARM64FABSD,
ssa.OpARM64FABSS,
ssa.OpARM64FMOVDfpgp,
ssa.OpARM64FMOVDgpfp,
ssa.OpARM64FMOVSfpgp,
@@ -1001,7 +1002,12 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpARM64FRINTMD,
ssa.OpARM64FRINTND,
ssa.OpARM64FRINTPD,
ssa.OpARM64FRINTZD:
ssa.OpARM64FRINTZD,
ssa.OpARM64FRINTAS,
ssa.OpARM64FRINTMS,
ssa.OpARM64FRINTNS,
ssa.OpARM64FRINTPS,
ssa.OpARM64FRINTZS:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[0].Reg()

View File

@@ -543,6 +543,11 @@
// Optimizations
// Replace widen -> wide_unop -> narrow with narrow_unop when one exists.
(FCVTDS (F(ABS|SQRT|RINTP|RINTM|RINTA|RINTN|RINTZ)D (FCVTSD x))) =>
(F(ABS|SQRT|RINTP|RINTM|RINTA|RINTN|RINTZ)S x)
// Absorb boolean tests into block
(NZ (Equal cc) yes no) => (EQ cc yes no)
(NZ (NotEqual cc) yes no) => (NE cc yes no)

View File

@@ -248,6 +248,7 @@ func init() {
{name: "NEGSflags", argLength: 1, reg: gp11flags, typ: "(UInt64,Flags)", asm: "NEGS"}, // -arg0, set flags.
{name: "NGCzerocarry", argLength: 1, reg: gp0flags1, typ: "UInt64", asm: "NGC"}, // -1 if borrowing, 0 otherwise.
{name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD"}, // abs(arg0), float64
{name: "FABSS", argLength: 1, reg: fp11, asm: "FABSS"}, // abs(arg0), float32
{name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"}, // -arg0, float32
{name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"}, // -arg0, float64
{name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"}, // sqrt(arg0), float64
@@ -495,12 +496,18 @@ func init() {
{name: "FCVTSD", argLength: 1, reg: fp11, asm: "FCVTSD"}, // float32 -> float64
{name: "FCVTDS", argLength: 1, reg: fp11, asm: "FCVTDS"}, // float64 -> float32
// floating-point round to integral
{name: "FRINTAD", argLength: 1, reg: fp11, asm: "FRINTAD"},
{name: "FRINTMD", argLength: 1, reg: fp11, asm: "FRINTMD"},
{name: "FRINTND", argLength: 1, reg: fp11, asm: "FRINTND"},
{name: "FRINTPD", argLength: 1, reg: fp11, asm: "FRINTPD"},
{name: "FRINTZD", argLength: 1, reg: fp11, asm: "FRINTZD"},
// 64-bit floating-point round to integers in 64-bit FP format
{name: "FRINTAD", argLength: 1, reg: fp11, asm: "FRINTAD"}, // Round (ties Away from zero; 0.5 -> 1, -0.5 -> -1)
{name: "FRINTMD", argLength: 1, reg: fp11, asm: "FRINTMD"}, // Floor (towards Minus; 0.5 -> 0, -0.5 -> -1)
{name: "FRINTND", argLength: 1, reg: fp11, asm: "FRINTND"}, // Round (ties to even; ; 0.5 -> 0, 1.5 -> 2)
{name: "FRINTPD", argLength: 1, reg: fp11, asm: "FRINTPD"}, // Ceil (towards Positive; 0.5 -> 1, -0.5 -> 0)
{name: "FRINTZD", argLength: 1, reg: fp11, asm: "FRINTZD"}, // Trunc (towards Zero; 0.5 -> 0, -0.5 -> 0))
// 32-bit floating-point round to integers in 32-bit FP format
{name: "FRINTAS", argLength: 1, reg: fp11, asm: "FRINTAS"}, // Round (ties Away from zero; 0.5 -> 1, -0.5 -> -1)
{name: "FRINTMS", argLength: 1, reg: fp11, asm: "FRINTMS"}, // Floor (towards Minus; 0.5 -> 0, -0.5 -> -1)
{name: "FRINTNS", argLength: 1, reg: fp11, asm: "FRINTNS"}, // Round (ties to even; ; 0.5 -> 0, 1.5 -> 2)
{name: "FRINTPS", argLength: 1, reg: fp11, asm: "FRINTPS"}, // Ceil (towards Positive; 0.5 -> 1, -0.5 -> 0)
{name: "FRINTZS", argLength: 1, reg: fp11, asm: "FRINTZS"}, // Trunc (towards Zero; 0.5 -> 0, -0.5 -> 0))
// conditional instructions; auxint is
// one of the arm64 comparison pseudo-ops (LessThan, LessThanU, etc.)

View File

@@ -4189,6 +4189,7 @@ const (
OpARM64NEGSflags
OpARM64NGCzerocarry
OpARM64FABSD
OpARM64FABSS
OpARM64FNEGS
OpARM64FNEGD
OpARM64FSQRTD
@@ -4392,6 +4393,11 @@ const (
OpARM64FRINTND
OpARM64FRINTPD
OpARM64FRINTZD
OpARM64FRINTAS
OpARM64FRINTMS
OpARM64FRINTNS
OpARM64FRINTPS
OpARM64FRINTZS
OpARM64CSEL
OpARM64CSEL0
OpARM64CSINC
@@ -65691,6 +65697,19 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FABSS",
argLen: 1,
asm: arm64.AFABSS,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FNEGS",
argLen: 1,
@@ -68477,6 +68496,71 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FRINTAS",
argLen: 1,
asm: arm64.AFRINTAS,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FRINTMS",
argLen: 1,
asm: arm64.AFRINTMS,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FRINTNS",
argLen: 1,
asm: arm64.AFRINTNS,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FRINTPS",
argLen: 1,
asm: arm64.AFRINTPS,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FRINTZS",
argLen: 1,
asm: arm64.AFRINTZS,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "CSEL",
auxType: auxCCop,

View File

@@ -106,6 +106,8 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64FCMPD(v)
case OpARM64FCMPS:
return rewriteValueARM64_OpARM64FCMPS(v)
case OpARM64FCVTDS:
return rewriteValueARM64_OpARM64FCVTDS(v)
case OpARM64FMOVDfpgp:
return rewriteValueARM64_OpARM64FMOVDfpgp(v)
case OpARM64FMOVDgpfp:
@@ -4790,6 +4792,115 @@ func rewriteValueARM64_OpARM64FCMPS(v *Value) bool {
}
return false
}
func rewriteValueARM64_OpARM64FCVTDS(v *Value) bool {
v_0 := v.Args[0]
// match: (FCVTDS (FABSD (FCVTSD x)))
// result: (FABSS x)
for {
if v_0.Op != OpARM64FABSD {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpARM64FCVTSD {
break
}
x := v_0_0.Args[0]
v.reset(OpARM64FABSS)
v.AddArg(x)
return true
}
// match: (FCVTDS (FSQRTD (FCVTSD x)))
// result: (FSQRTS x)
for {
if v_0.Op != OpARM64FSQRTD {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpARM64FCVTSD {
break
}
x := v_0_0.Args[0]
v.reset(OpARM64FSQRTS)
v.AddArg(x)
return true
}
// match: (FCVTDS (FRINTPD (FCVTSD x)))
// result: (FRINTPS x)
for {
if v_0.Op != OpARM64FRINTPD {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpARM64FCVTSD {
break
}
x := v_0_0.Args[0]
v.reset(OpARM64FRINTPS)
v.AddArg(x)
return true
}
// match: (FCVTDS (FRINTMD (FCVTSD x)))
// result: (FRINTMS x)
for {
if v_0.Op != OpARM64FRINTMD {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpARM64FCVTSD {
break
}
x := v_0_0.Args[0]
v.reset(OpARM64FRINTMS)
v.AddArg(x)
return true
}
// match: (FCVTDS (FRINTAD (FCVTSD x)))
// result: (FRINTAS x)
for {
if v_0.Op != OpARM64FRINTAD {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpARM64FCVTSD {
break
}
x := v_0_0.Args[0]
v.reset(OpARM64FRINTAS)
v.AddArg(x)
return true
}
// match: (FCVTDS (FRINTND (FCVTSD x)))
// result: (FRINTNS x)
for {
if v_0.Op != OpARM64FRINTND {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpARM64FCVTSD {
break
}
x := v_0_0.Args[0]
v.reset(OpARM64FRINTNS)
v.AddArg(x)
return true
}
// match: (FCVTDS (FRINTZD (FCVTSD x)))
// result: (FRINTZS x)
for {
if v_0.Op != OpARM64FRINTZD {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpARM64FCVTSD {
break
}
x := v_0_0.Args[0]
v.reset(OpARM64FRINTZS)
v.AddArg(x)
return true
}
return false
}
func rewriteValueARM64_OpARM64FMOVDfpgp(v *Value) bool {
v_0 := v.Args[0]
b := v.Block

View File

@@ -282,11 +282,13 @@ func Float64ConstantStore(p *float64) {
func WideCeilNarrow(x float32) float32 {
// amd64/v3:"ROUNDSS"
// arm64:"FRINTPS"
return float32(math.Ceil(float64(x)))
}
func WideTruncNarrow(x float32) float32 {
// amd64/v3:"ROUNDSS"
// arm64:"FRINTZS"
return float32(math.Trunc(float64(x)))
}