cmd/compile: arm64 add 128-bit vector load/store SSA ops

Add OpARM64FMOVQload, OpARM64FMOVQstore, OpARM64FLDPQ, and
OpARM64FSTPQ for loading and storing Vec128 values.
Includes offset folding and address combining rules.

These ops will be used by subsequent CLs.

Change-Id: I4ac86a0a31f878411f49d390cb8df01f81cfc4d6
Reviewed-on: https://go-review.googlesource.com/c/go/+/738260
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
This commit is contained in:
Alexander Musman
2026-01-21 22:40:14 +03:00
committed by Gopher Robot
parent 3a29ebeef9
commit ec3373e379
5 changed files with 320 additions and 3 deletions

View File

@@ -510,14 +510,15 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpARM64MOVWUload,
ssa.OpARM64MOVDload,
ssa.OpARM64FMOVSload,
ssa.OpARM64FMOVDload:
ssa.OpARM64FMOVDload,
ssa.OpARM64FMOVQload:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
ssagen.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpARM64LDP, ssa.OpARM64LDPW, ssa.OpARM64LDPSW, ssa.OpARM64FLDPD, ssa.OpARM64FLDPS:
case ssa.OpARM64LDP, ssa.OpARM64LDPW, ssa.OpARM64LDPSW, ssa.OpARM64FLDPD, ssa.OpARM64FLDPS, ssa.OpARM64FLDPQ:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
@@ -560,6 +561,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpARM64MOVDstore,
ssa.OpARM64FMOVSstore,
ssa.OpARM64FMOVDstore,
ssa.OpARM64FMOVQstore,
ssa.OpARM64STLRB,
ssa.OpARM64STLR,
ssa.OpARM64STLRW:
@@ -584,7 +586,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[2].Reg()
case ssa.OpARM64STP, ssa.OpARM64STPW, ssa.OpARM64FSTPD, ssa.OpARM64FSTPS:
case ssa.OpARM64STP, ssa.OpARM64STPW, ssa.OpARM64FSTPD, ssa.OpARM64FSTPS, ssa.OpARM64FSTPQ:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REGREG
p.From.Reg = v.Args[1].Reg()

View File

@@ -694,6 +694,12 @@
(FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOVDload [off1+int32(off2)] {sym} ptr mem)
(FMOVQload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOVQload [off1+int32(off2)] {sym} ptr mem)
(FLDPQ [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FLDPQ [off1+int32(off2)] {sym} ptr mem)
// register indexed load
(MOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVDloadidx ptr idx mem)
@@ -779,6 +785,12 @@
(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOVDstore [off1+int32(off2)] {sym} ptr val mem)
(FMOVQstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOVQstore [off1+int32(off2)] {sym} ptr val mem)
(FSTPQ [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FSTPQ [off1+int32(off2)] {sym} ptr val1 val2 mem)
// register indexed store
(MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx ptr idx val mem)
@@ -865,6 +877,14 @@
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(FMOVQload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOVQload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(FLDPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FLDPQ [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
@@ -894,6 +914,14 @@
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(FMOVQstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FMOVQstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(FSTPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) =>
(FSTPQ [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem)
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
// these seem to have bad interaction with other rules, resulting in slower code

View File

@@ -396,6 +396,7 @@ func init() {
{name: "MOVDload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVD", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem.
{name: "FMOVSload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVS", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem.
{name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem.
{name: "FMOVQload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVQ", typ: "Vec128", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem.
// LDP instructions load the contents of two adjacent locations in memory into registers.
// Address to start loading is addr = arg0 + auxInt + aux.
@@ -408,6 +409,7 @@ func init() {
{name: "LDPSW", argLength: 2, reg: gpload2, aux: "SymOff", asm: "LDPSW", typ: "(Int32,Int32)", faultOnNilArg0: true, symEffect: "Read"}, // T=int32 (gp reg destination) signed extension
{name: "FLDPD", argLength: 2, reg: fpload2, aux: "SymOff", asm: "FLDPD", typ: "(Float64,Float64)", faultOnNilArg0: true, symEffect: "Read"}, // T=float64 (fp reg destination)
{name: "FLDPS", argLength: 2, reg: fpload2, aux: "SymOff", asm: "FLDPS", typ: "(Float32,Float32)", faultOnNilArg0: true, symEffect: "Read"}, // T=float32 (fp reg destination)
{name: "FLDPQ", argLength: 2, reg: fpload2, aux: "SymOff", asm: "FLDPQ", typ: "(Vec128,Vec128)", faultOnNilArg0: true, symEffect: "Read"}, // T=vec128 (fp reg destination)
// register indexed load
{name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"}, // load 64-bit dword from arg0 + arg1, arg2 = mem.
@@ -435,6 +437,7 @@ func init() {
{name: "MOVDstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "FMOVQstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVQ", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
// STP instructions store the contents of two registers to adjacent locations in memory.
// Address to start storing is addr = arg0 + auxInt + aux.
@@ -445,6 +448,7 @@ func init() {
{name: "STPW", argLength: 4, reg: gpstore2, aux: "SymOff", asm: "STPW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=int32 (gp reg source)
{name: "FSTPD", argLength: 4, reg: fpstore2, aux: "SymOff", asm: "FSTPD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=float64 (fp reg source)
{name: "FSTPS", argLength: 4, reg: fpstore2, aux: "SymOff", asm: "FSTPS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=float32 (fp reg source)
{name: "FSTPQ", argLength: 4, reg: fpstore2, aux: "SymOff", asm: "FSTPQ", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=vec128 (fp reg source)
// register indexed store
{name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem"}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem.

View File

@@ -4319,11 +4319,13 @@ const (
OpARM64MOVDload
OpARM64FMOVSload
OpARM64FMOVDload
OpARM64FMOVQload
OpARM64LDP
OpARM64LDPW
OpARM64LDPSW
OpARM64FLDPD
OpARM64FLDPS
OpARM64FLDPQ
OpARM64MOVDloadidx
OpARM64MOVWloadidx
OpARM64MOVWUloadidx
@@ -4346,10 +4348,12 @@ const (
OpARM64MOVDstore
OpARM64FMOVSstore
OpARM64FMOVDstore
OpARM64FMOVQstore
OpARM64STP
OpARM64STPW
OpARM64FSTPD
OpARM64FSTPS
OpARM64FSTPQ
OpARM64MOVBstoreidx
OpARM64MOVHstoreidx
OpARM64MOVWstoreidx
@@ -67511,6 +67515,22 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FMOVQload",
auxType: auxSymOff,
argLen: 2,
faultOnNilArg0: true,
symEffect: SymRead,
asm: arm64.AFMOVQ,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "LDP",
auxType: auxSymOff,
@@ -67596,6 +67616,23 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FLDPQ",
auxType: auxSymOff,
argLen: 2,
faultOnNilArg0: true,
symEffect: SymRead,
asm: arm64.AFLDPQ,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "MOVDloadidx",
argLen: 3,
@@ -67904,6 +67941,20 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FMOVQstore",
auxType: auxSymOff,
argLen: 3,
faultOnNilArg0: true,
symEffect: SymWrite,
asm: arm64.AFMOVQ,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "STP",
auxType: auxSymOff,
@@ -67964,6 +68015,21 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FSTPQ",
auxType: auxSymOff,
argLen: 4,
faultOnNilArg0: true,
symEffect: SymWrite,
asm: arm64.AFSTPQ,
reg: regInfo{
inputs: []inputInfo{
{0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
{1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
{2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "MOVBstoreidx",
argLen: 4,

View File

@@ -108,6 +108,8 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64FCMPS(v)
case OpARM64FCVTDS:
return rewriteValueARM64_OpARM64FCVTDS(v)
case OpARM64FLDPQ:
return rewriteValueARM64_OpARM64FLDPQ(v)
case OpARM64FMOVDfpgp:
return rewriteValueARM64_OpARM64FMOVDfpgp(v)
case OpARM64FMOVDgpfp:
@@ -124,6 +126,10 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64FMOVDstoreidx(v)
case OpARM64FMOVDstoreidx8:
return rewriteValueARM64_OpARM64FMOVDstoreidx8(v)
case OpARM64FMOVQload:
return rewriteValueARM64_OpARM64FMOVQload(v)
case OpARM64FMOVQstore:
return rewriteValueARM64_OpARM64FMOVQstore(v)
case OpARM64FMOVSload:
return rewriteValueARM64_OpARM64FMOVSload(v)
case OpARM64FMOVSloadidx:
@@ -148,6 +154,8 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64FNMULD(v)
case OpARM64FNMULS:
return rewriteValueARM64_OpARM64FNMULS(v)
case OpARM64FSTPQ:
return rewriteValueARM64_OpARM64FSTPQ(v)
case OpARM64FSUBD:
return rewriteValueARM64_OpARM64FSUBD(v)
case OpARM64FSUBS:
@@ -4904,6 +4912,56 @@ func rewriteValueARM64_OpARM64FCVTDS(v *Value) bool {
}
return false
}
func rewriteValueARM64_OpARM64FLDPQ(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
config := b.Func.Config
// match: (FLDPQ [off1] {sym} (ADDconst [off2] ptr) mem)
// cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
// result: (FLDPQ [off1+int32(off2)] {sym} ptr mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
if v_0.Op != OpARM64ADDconst {
break
}
off2 := auxIntToInt64(v_0.AuxInt)
ptr := v_0.Args[0]
mem := v_1
if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
break
}
v.reset(OpARM64FLDPQ)
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
}
// match: (FLDPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
// result: (FLDPQ [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym1 := auxToSym(v.Aux)
if v_0.Op != OpARM64MOVDaddr {
break
}
off2 := auxIntToInt32(v_0.AuxInt)
sym2 := auxToSym(v_0.Aux)
ptr := v_0.Args[0]
mem := v_1
if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
break
}
v.reset(OpARM64FLDPQ)
v.AuxInt = int32ToAuxInt(off1 + off2)
v.Aux = symToAux(mergeSym(sym1, sym2))
v.AddArg2(ptr, mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64FMOVDfpgp(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
@@ -5352,6 +5410,109 @@ func rewriteValueARM64_OpARM64FMOVDstoreidx8(v *Value) bool {
}
return false
}
func rewriteValueARM64_OpARM64FMOVQload(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
config := b.Func.Config
// match: (FMOVQload [off1] {sym} (ADDconst [off2] ptr) mem)
// cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
// result: (FMOVQload [off1+int32(off2)] {sym} ptr mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
if v_0.Op != OpARM64ADDconst {
break
}
off2 := auxIntToInt64(v_0.AuxInt)
ptr := v_0.Args[0]
mem := v_1
if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
break
}
v.reset(OpARM64FMOVQload)
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
v.Aux = symToAux(sym)
v.AddArg2(ptr, mem)
return true
}
// match: (FMOVQload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
// result: (FMOVQload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym1 := auxToSym(v.Aux)
if v_0.Op != OpARM64MOVDaddr {
break
}
off2 := auxIntToInt32(v_0.AuxInt)
sym2 := auxToSym(v_0.Aux)
ptr := v_0.Args[0]
mem := v_1
if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
break
}
v.reset(OpARM64FMOVQload)
v.AuxInt = int32ToAuxInt(off1 + off2)
v.Aux = symToAux(mergeSym(sym1, sym2))
v.AddArg2(ptr, mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64FMOVQstore(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
config := b.Func.Config
// match: (FMOVQstore [off1] {sym} (ADDconst [off2] ptr) val mem)
// cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
// result: (FMOVQstore [off1+int32(off2)] {sym} ptr val mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
if v_0.Op != OpARM64ADDconst {
break
}
off2 := auxIntToInt64(v_0.AuxInt)
ptr := v_0.Args[0]
val := v_1
mem := v_2
if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
break
}
v.reset(OpARM64FMOVQstore)
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
v.Aux = symToAux(sym)
v.AddArg3(ptr, val, mem)
return true
}
// match: (FMOVQstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
// result: (FMOVQstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym1 := auxToSym(v.Aux)
if v_0.Op != OpARM64MOVDaddr {
break
}
off2 := auxIntToInt32(v_0.AuxInt)
sym2 := auxToSym(v_0.Aux)
ptr := v_0.Args[0]
val := v_1
mem := v_2
if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
break
}
v.reset(OpARM64FMOVQstore)
v.AuxInt = int32ToAuxInt(off1 + off2)
v.Aux = symToAux(mergeSym(sym1, sym2))
v.AddArg3(ptr, val, mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64FMOVSload(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@@ -5894,6 +6055,62 @@ func rewriteValueARM64_OpARM64FNMULS(v *Value) bool {
}
return false
}
func rewriteValueARM64_OpARM64FSTPQ(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
config := b.Func.Config
// match: (FSTPQ [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem)
// cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
// result: (FSTPQ [off1+int32(off2)] {sym} ptr val1 val2 mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym := auxToSym(v.Aux)
if v_0.Op != OpARM64ADDconst {
break
}
off2 := auxIntToInt64(v_0.AuxInt)
ptr := v_0.Args[0]
val1 := v_1
val2 := v_2
mem := v_3
if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
break
}
v.reset(OpARM64FSTPQ)
v.AuxInt = int32ToAuxInt(off1 + int32(off2))
v.Aux = symToAux(sym)
v.AddArg4(ptr, val1, val2, mem)
return true
}
// match: (FSTPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem)
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)
// result: (FSTPQ [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem)
for {
off1 := auxIntToInt32(v.AuxInt)
sym1 := auxToSym(v.Aux)
if v_0.Op != OpARM64MOVDaddr {
break
}
off2 := auxIntToInt32(v_0.AuxInt)
sym2 := auxToSym(v_0.Aux)
ptr := v_0.Args[0]
val1 := v_1
val2 := v_2
mem := v_3
if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) {
break
}
v.reset(OpARM64FSTPQ)
v.AuxInt = int32ToAuxInt(off1 + off2)
v.Aux = symToAux(mergeSym(sym1, sym2))
v.AddArg4(ptr, val1, val2, mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64FSUBD(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]