cmd/compile: (amd64) optimize float32(round64(float64(x)))

Not a fix because there are other architectures
still to be done.

Updates #75463.

Change-Id: I3d7754ce4a26af0f5c4ef0be1254d164e68f8442
Reviewed-on: https://go-review.googlesource.com/c/go/+/729160
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
This commit is contained in:
David Chase
2025-12-10 16:05:55 -05:00
parent 831c489f9c
commit 7336381cd1
7 changed files with 126 additions and 1 deletions

View File

@@ -1491,7 +1491,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
}
case ssa.OpAMD64LoweredRound32F, ssa.OpAMD64LoweredRound64F:
// input is already rounded
case ssa.OpAMD64ROUNDSD:
case ssa.OpAMD64ROUNDSD, ssa.OpAMD64ROUNDSS:
p := s.Prog(v.Op.Asm())
val := v.AuxInt
// 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc

View File

@@ -124,6 +124,8 @@
(Ceil x) => (ROUNDSD [2] x)
(Trunc x) => (ROUNDSD [3] x)
(CVTSD2SS (ROUNDSD [c] (CVTSS2SD x))) => (ROUNDSS [c] x)
(FMA x y z) => (VFMADD231SD z x y)
// Lowering extension

View File

@@ -798,6 +798,7 @@ func init() {
// ROUNDSD instruction is only guaraneteed to be available if GOAMD64>=v2.
// For GOAMD64<v2, any use must be preceded by a successful check of runtime.x86HasSSE41.
{name: "ROUNDSD", argLength: 1, reg: fp11, aux: "Int8", asm: "ROUNDSD"},
{name: "ROUNDSS", argLength: 1, reg: fp11, aux: "Int8", asm: "ROUNDSS"},
// See why we need those in issue #71204
{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},

View File

@@ -929,6 +929,7 @@ const (
OpAMD64SQRTSD
OpAMD64SQRTSS
OpAMD64ROUNDSD
OpAMD64ROUNDSS
OpAMD64LoweredRound32F
OpAMD64LoweredRound64F
OpAMD64VFMADD231SS
@@ -16231,6 +16232,20 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "ROUNDSS",
auxType: auxInt8,
argLen: 1,
asm: x86.AROUNDSS,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
name: "LoweredRound32F",
argLen: 1,

View File

@@ -222,6 +222,8 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpAMD64CMPXCHGLlock(v)
case OpAMD64CMPXCHGQlock:
return rewriteValueAMD64_OpAMD64CMPXCHGQlock(v)
case OpAMD64CVTSD2SS:
return rewriteValueAMD64_OpAMD64CVTSD2SS(v)
case OpAMD64DIVSD:
return rewriteValueAMD64_OpAMD64DIVSD(v)
case OpAMD64DIVSDload:
@@ -13461,6 +13463,27 @@ func rewriteValueAMD64_OpAMD64CMPXCHGQlock(v *Value) bool {
}
return false
}
func rewriteValueAMD64_OpAMD64CVTSD2SS(v *Value) bool {
v_0 := v.Args[0]
// match: (CVTSD2SS (ROUNDSD [c] (CVTSS2SD x)))
// result: (ROUNDSS [c] x)
for {
if v_0.Op != OpAMD64ROUNDSD {
break
}
c := auxIntToInt8(v_0.AuxInt)
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpAMD64CVTSS2SD {
break
}
x := v_0_0.Args[0]
v.reset(OpAMD64ROUNDSS)
v.AuxInt = int8ToAuxInt(c)
v.AddArg(x)
return true
}
return false
}
func rewriteValueAMD64_OpAMD64DIVSD(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]

View File

@@ -6,6 +6,8 @@ package ssa
import (
"cmd/compile/internal/rttype"
"math"
"math/rand"
"reflect"
"testing"
"unsafe"
@@ -42,6 +44,78 @@ func TestSubFlags(t *testing.T) {
}
}
//go:noinline
func unopt(f func(float64) float64, x float32) float32 {
return float32(f(float64(x)))
}
func differ(x, y float32) bool {
if x != x && y != y {
// if both are NaN, exact bit pattern of the NaN is uninteresting
return false
}
return math.Float32bits(x) != math.Float32bits(y)
}
func test32bitUnary(t *testing.T, x float32) {
if want, got := unopt(math.Round, x), float32(math.Round(float64(x))); differ(want, got) {
t.Errorf("Optimized 32-bit Round did not match, x=%f, want=%f, got=%f", x, want, got)
}
if want, got := unopt(math.RoundToEven, x), float32(math.RoundToEven(float64(x))); differ(want, got) {
t.Errorf("Optimized 32-bit RoundToEven did not match, x=%f, want=%f, got=%f", x, want, got)
}
if want, got := unopt(math.Trunc, x), float32(math.Trunc(float64(x))); differ(want, got) {
t.Errorf("Optimized 32-bit Trunc did not match, x=%f, want=%f, got=%f", x, want, got)
}
if want, got := unopt(math.Ceil, x), float32(math.Ceil(float64(x))); differ(want, got) {
t.Errorf("Optimized 32-bit Ceil did not match, x=%f, want=%f, got=%f", x, want, got)
}
if want, got := unopt(math.Floor, x), float32(math.Floor(float64(x))); differ(want, got) {
t.Errorf("Optimized 32-bit Floor did not match, x=%f, want=%f, got=%f", x, want, got)
}
if x >= 0 {
if want, got := unopt(math.Sqrt, x), float32(math.Sqrt(float64(x))); differ(want, got) {
t.Errorf("Optimized 32-bit Sqrt did not match, x=%f, want=%f, got=%f", x, want, got)
}
}
if want, got := unopt(math.Abs, x), float32(math.Abs(float64(x))); differ(want, got) {
t.Errorf("Optimized 32-bit Abs did not match, x=%f, want=%f, got=%f", x, want, got)
}
}
var zero float32
func Test32bitUnary(t *testing.T) {
// this is mostly for testing rounding.
test32bitUnary(t, -1.5)
test32bitUnary(t, -0.5)
test32bitUnary(t, 0.5)
test32bitUnary(t, 1.5)
test32bitUnary(t, -1.4)
test32bitUnary(t, -0.4)
test32bitUnary(t, 0.4)
test32bitUnary(t, 1.4)
test32bitUnary(t, -1.6)
test32bitUnary(t, -0.6)
test32bitUnary(t, 0.6)
test32bitUnary(t, 1.6)
// negative zero
test32bitUnary(t, 1/(-1/zero))
var rnd = rand.New(rand.NewSource(0))
for i := uint32(0); i <= 1<<20; i++ {
test32bitUnary(t, math.Float32frombits(math.Float32bits(math.MaxFloat32)-i))
test32bitUnary(t, float32(i)+1.5)
test32bitUnary(t, math.Float32frombits(rnd.Uint32()))
}
}
func TestIsPPC64WordRotateMask(t *testing.T) {
tests := []struct {
input int64

View File

@@ -280,6 +280,16 @@ func Float64ConstantStore(p *float64) {
*p = 5.432
}
func WideCeilNarrow(x float32) float32 {
// amd64/v3:"ROUNDSS"
return float32(math.Ceil(float64(x)))
}
func WideTruncNarrow(x float32) float32 {
// amd64/v3:"ROUNDSS"
return float32(math.Trunc(float64(x)))
}
// ------------------------ //
// Subnormal tests //
// ------------------------ //