mirror of
https://github.com/golang/go.git
synced 2026-04-02 17:30:01 +09:00
cmd/compile: on arm64 pair a load with a load in a subsequent block
Look into the following block(s) for a load that can be paired with the load we're trying to pair up. This particularly helps the generated equality functions. Instead of doing MOVD x(R0), R2 MOVD x(R1), R3 CMP R2, R3 BNE noteq MOVD x+8(R0), R2 MOVD x+8(R1), R3 CMP R2, R3 BNE noteq we do LDP x(R0), (R2, R4) LDP x(R1), (R3, R5) CMP R2, R3 BNE noteq CMP R4, R5 BNE noteq Removes 5296 bytes of code from cmd/go. Change-Id: I6368686892ac944783c8b07ed7252126d1ef4031 Reviewed-on: https://go-review.googlesource.com/c/go/+/740741 Reviewed-by: David Chase <drchase@google.com> Reviewed-by: Keith Randall <khr@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
@@ -7,6 +7,7 @@ package ssa
|
||||
import (
|
||||
"cmd/compile/internal/ir"
|
||||
"cmd/compile/internal/types"
|
||||
"cmd/internal/obj"
|
||||
"slices"
|
||||
)
|
||||
|
||||
@@ -206,6 +207,117 @@ func pairLoads(f *Func) {
|
||||
i++ // Skip y next time around the loop.
|
||||
}
|
||||
}
|
||||
|
||||
// Try to pair a load with a load from a subsequent block.
|
||||
// Note that this is always safe to do if the memory arguments match.
|
||||
// (But see the memory barrier case below.)
|
||||
type nextBlockKey struct {
|
||||
op Op
|
||||
ptr ID
|
||||
mem ID
|
||||
auxInt int64
|
||||
aux any
|
||||
}
|
||||
nextBlock := map[nextBlockKey]*Value{}
|
||||
for _, b := range f.Blocks {
|
||||
if memoryBarrierTest(b) {
|
||||
// TODO: Do we really need to skip write barrier test blocks?
|
||||
// type T struct {
|
||||
// a *byte
|
||||
// b int
|
||||
// }
|
||||
// func f(t *T) int {
|
||||
// r := t.b
|
||||
// t.a = nil
|
||||
// return r
|
||||
// }
|
||||
// This would issue a single LDP for both the t.a and t.b fields,
|
||||
// *before* we check the write barrier flag. (We load the t.a field
|
||||
// to put it in the write barrier buffer.) Not sure if that is ok.
|
||||
continue
|
||||
}
|
||||
// Find loads in the next block(s) that we can move to this one.
|
||||
// TODO: could maybe look further than just one successor hop.
|
||||
clear(nextBlock)
|
||||
for _, e := range b.Succs {
|
||||
if len(e.b.Preds) > 1 {
|
||||
continue
|
||||
}
|
||||
for _, v := range e.b.Values {
|
||||
info := pairableLoads[v.Op]
|
||||
if info.width == 0 {
|
||||
continue
|
||||
}
|
||||
if !offsetOk(v.Aux, v.AuxInt, info.width) {
|
||||
continue // not advisable
|
||||
}
|
||||
nextBlock[nextBlockKey{op: v.Op, ptr: v.Args[0].ID, mem: v.Args[1].ID, auxInt: v.AuxInt, aux: v.Aux}] = v
|
||||
}
|
||||
}
|
||||
if len(nextBlock) == 0 {
|
||||
continue
|
||||
}
|
||||
// don't move too many loads. Each requires a register across a basic block boundary.
|
||||
const maxMoved = 4
|
||||
nMoved := 0
|
||||
for i := len(b.Values) - 1; i >= 0 && nMoved < maxMoved; i-- {
|
||||
x := b.Values[i]
|
||||
info := pairableLoads[x.Op]
|
||||
if info.width == 0 {
|
||||
continue
|
||||
}
|
||||
if !offsetOk(x.Aux, x.AuxInt, info.width) {
|
||||
continue // not advisable
|
||||
}
|
||||
key := nextBlockKey{op: x.Op, ptr: x.Args[0].ID, mem: x.Args[1].ID, auxInt: x.AuxInt + info.width, aux: x.Aux}
|
||||
if y := nextBlock[key]; y != nil {
|
||||
delete(nextBlock, key)
|
||||
|
||||
// Make the 2-register load.
|
||||
load := b.NewValue2IA(x.Pos, info.pair, types.NewTuple(x.Type, y.Type), x.AuxInt, x.Aux, x.Args[0], x.Args[1])
|
||||
|
||||
// Modify x to be (Select0 load).
|
||||
x.reset(OpSelect0)
|
||||
x.SetArgs1(load)
|
||||
// Modify y to be (Copy (Select1 load)).
|
||||
// Note: the Select* needs to live in the load's block, not y's block.
|
||||
y.reset(OpCopy)
|
||||
y.SetArgs1(b.NewValue1(y.Pos, OpSelect1, y.Type, load))
|
||||
nMoved++
|
||||
continue
|
||||
}
|
||||
key.auxInt = x.AuxInt - info.width
|
||||
if y := nextBlock[key]; y != nil {
|
||||
delete(nextBlock, key)
|
||||
|
||||
// Make the 2-register load.
|
||||
load := b.NewValue2IA(x.Pos, info.pair, types.NewTuple(y.Type, x.Type), y.AuxInt, x.Aux, x.Args[0], x.Args[1])
|
||||
|
||||
// Modify x to be (Select1 load).
|
||||
x.reset(OpSelect1)
|
||||
x.SetArgs1(load)
|
||||
// Modify y to be (Copy (Select0 load)).
|
||||
y.reset(OpCopy)
|
||||
y.SetArgs1(b.NewValue1(y.Pos, OpSelect0, y.Type, load))
|
||||
nMoved++
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func memoryBarrierTest(b *Block) bool {
|
||||
if b.Kind != BlockARM64NZW {
|
||||
return false
|
||||
}
|
||||
c := b.Controls[0]
|
||||
if c.Op != OpARM64MOVWUload {
|
||||
return false
|
||||
}
|
||||
if globl, ok := c.Aux.(*obj.LSym); ok {
|
||||
return globl.Name == "runtime.writeBarrier"
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func pairStores(f *Func) {
|
||||
|
||||
@@ -1027,6 +1027,15 @@ func dwloadResult2(p *[2]int64) (int64, int64) {
|
||||
return p[1], p[0]
|
||||
}
|
||||
|
||||
func dwloadConditional(p *[2]int64) (int64, int64) {
|
||||
// arm64:"LDP \\(R0\\), \\(R0, R1\\)"
|
||||
x := p[0]
|
||||
if x == 0 {
|
||||
return x, 0
|
||||
}
|
||||
return x, p[1]
|
||||
}
|
||||
|
||||
// ---------------------------------- //
|
||||
// Arm64 double-register stores //
|
||||
// ---------------------------------- //
|
||||
|
||||
Reference in New Issue
Block a user