diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 941c2f2b8f..e32bf9695e 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -510,14 +510,15 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpARM64MOVWUload, ssa.OpARM64MOVDload, ssa.OpARM64FMOVSload, - ssa.OpARM64FMOVDload: + ssa.OpARM64FMOVDload, + ssa.OpARM64FMOVQload: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() ssagen.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() - case ssa.OpARM64LDP, ssa.OpARM64LDPW, ssa.OpARM64LDPSW, ssa.OpARM64FLDPD, ssa.OpARM64FLDPS: + case ssa.OpARM64LDP, ssa.OpARM64LDPW, ssa.OpARM64LDPSW, ssa.OpARM64FLDPD, ssa.OpARM64FLDPS, ssa.OpARM64FLDPQ: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = v.Args[0].Reg() @@ -560,6 +561,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpARM64MOVDstore, ssa.OpARM64FMOVSstore, ssa.OpARM64FMOVDstore, + ssa.OpARM64FMOVQstore, ssa.OpARM64STLRB, ssa.OpARM64STLR, ssa.OpARM64STLRW: @@ -584,7 +586,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() - case ssa.OpARM64STP, ssa.OpARM64STPW, ssa.OpARM64FSTPD, ssa.OpARM64FSTPS: + case ssa.OpARM64STP, ssa.OpARM64STPW, ssa.OpARM64FSTPD, ssa.OpARM64FSTPS, ssa.OpARM64FSTPQ: p := s.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_REGREG p.From.Reg = v.Args[1].Reg() diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64.rules b/src/cmd/compile/internal/ssa/_gen/ARM64.rules index c1ccc12ade..8e2b4f66d2 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules @@ -694,6 +694,12 @@ (FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVDload [off1+int32(off2)] {sym} ptr mem) +(FMOVQload [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FMOVQload [off1+int32(off2)] {sym} ptr mem) +(FLDPQ [off1] {sym} (ADDconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FLDPQ [off1+int32(off2)] {sym} ptr mem) // register indexed load (MOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil => (MOVDloadidx ptr idx mem) @@ -779,6 +785,12 @@ (FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVDstore [off1+int32(off2)] {sym} ptr val mem) +(FMOVQstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FMOVQstore [off1+int32(off2)] {sym} ptr val mem) +(FSTPQ [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem) && is32Bit(int64(off1)+off2) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FSTPQ [off1+int32(off2)] {sym} ptr val1 val2 mem) // register indexed store (MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil => (MOVDstoreidx ptr idx val mem) @@ -865,6 +877,14 @@ && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) +(FMOVQload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) + && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FMOVQload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) +(FLDPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) + && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FLDPQ [off1+off2] {mergeSym(sym1,sym2)} ptr mem) (MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) @@ -894,6 +914,14 @@ && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) +(FMOVQstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) + && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FMOVQstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) +(FSTPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem) + && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) + && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) => + (FSTPQ [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem) // replace load from same location as preceding store with zero/sign extension (or copy in case of full width) // these seem to have bad interaction with other rules, resulting in slower code diff --git a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go index 10feb99211..3314695ad0 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go @@ -396,6 +396,7 @@ func init() { {name: "MOVDload", argLength: 2, reg: gpload, aux: "SymOff", asm: "MOVD", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "FMOVSload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVS", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. + {name: "FMOVQload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVQ", typ: "Vec128", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. // LDP instructions load the contents of two adjacent locations in memory into registers. // Address to start loading is addr = arg0 + auxInt + aux. @@ -408,6 +409,7 @@ func init() { {name: "LDPSW", argLength: 2, reg: gpload2, aux: "SymOff", asm: "LDPSW", typ: "(Int32,Int32)", faultOnNilArg0: true, symEffect: "Read"}, // T=int32 (gp reg destination) signed extension {name: "FLDPD", argLength: 2, reg: fpload2, aux: "SymOff", asm: "FLDPD", typ: "(Float64,Float64)", faultOnNilArg0: true, symEffect: "Read"}, // T=float64 (fp reg destination) {name: "FLDPS", argLength: 2, reg: fpload2, aux: "SymOff", asm: "FLDPS", typ: "(Float32,Float32)", faultOnNilArg0: true, symEffect: "Read"}, // T=float32 (fp reg destination) + {name: "FLDPQ", argLength: 2, reg: fpload2, aux: "SymOff", asm: "FLDPQ", typ: "(Vec128,Vec128)", faultOnNilArg0: true, symEffect: "Read"}, // T=vec128 (fp reg destination) // register indexed load {name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"}, // load 64-bit dword from arg0 + arg1, arg2 = mem. @@ -435,6 +437,7 @@ func init() { {name: "MOVDstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. + {name: "FMOVQstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVQ", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. // STP instructions store the contents of two registers to adjacent locations in memory. // Address to start storing is addr = arg0 + auxInt + aux. @@ -445,6 +448,7 @@ func init() { {name: "STPW", argLength: 4, reg: gpstore2, aux: "SymOff", asm: "STPW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=int32 (gp reg source) {name: "FSTPD", argLength: 4, reg: fpstore2, aux: "SymOff", asm: "FSTPD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=float64 (fp reg source) {name: "FSTPS", argLength: 4, reg: fpstore2, aux: "SymOff", asm: "FSTPS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=float32 (fp reg source) + {name: "FSTPQ", argLength: 4, reg: fpstore2, aux: "SymOff", asm: "FSTPQ", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // T=vec128 (fp reg source) // register indexed store {name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem"}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem. diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 031d87bcf9..96bb288a96 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -4319,11 +4319,13 @@ const ( OpARM64MOVDload OpARM64FMOVSload OpARM64FMOVDload + OpARM64FMOVQload OpARM64LDP OpARM64LDPW OpARM64LDPSW OpARM64FLDPD OpARM64FLDPS + OpARM64FLDPQ OpARM64MOVDloadidx OpARM64MOVWloadidx OpARM64MOVWUloadidx @@ -4346,10 +4348,12 @@ const ( OpARM64MOVDstore OpARM64FMOVSstore OpARM64FMOVDstore + OpARM64FMOVQstore OpARM64STP OpARM64STPW OpARM64FSTPD OpARM64FSTPS + OpARM64FSTPQ OpARM64MOVBstoreidx OpARM64MOVHstoreidx OpARM64MOVWstoreidx @@ -67511,6 +67515,22 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "FMOVQload", + auxType: auxSymOff, + argLen: 2, + faultOnNilArg0: true, + symEffect: SymRead, + asm: arm64.AFMOVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, { name: "LDP", auxType: auxSymOff, @@ -67596,6 +67616,23 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "FLDPQ", + auxType: auxSymOff, + argLen: 2, + faultOnNilArg0: true, + symEffect: SymRead, + asm: arm64.AFLDPQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, { name: "MOVDloadidx", argLen: 3, @@ -67904,6 +67941,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "FMOVQstore", + auxType: auxSymOff, + argLen: 3, + faultOnNilArg0: true, + symEffect: SymWrite, + asm: arm64.AFMOVQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, { name: "STP", auxType: auxSymOff, @@ -67964,6 +68015,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "FSTPQ", + auxType: auxSymOff, + argLen: 4, + faultOnNilArg0: true, + symEffect: SymWrite, + asm: arm64.AFSTPQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 9223372038331170815}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + {1, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + {2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, { name: "MOVBstoreidx", argLen: 4, diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 735cda5db0..d243274732 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -108,6 +108,8 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpARM64FCMPS(v) case OpARM64FCVTDS: return rewriteValueARM64_OpARM64FCVTDS(v) + case OpARM64FLDPQ: + return rewriteValueARM64_OpARM64FLDPQ(v) case OpARM64FMOVDfpgp: return rewriteValueARM64_OpARM64FMOVDfpgp(v) case OpARM64FMOVDgpfp: @@ -124,6 +126,10 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpARM64FMOVDstoreidx(v) case OpARM64FMOVDstoreidx8: return rewriteValueARM64_OpARM64FMOVDstoreidx8(v) + case OpARM64FMOVQload: + return rewriteValueARM64_OpARM64FMOVQload(v) + case OpARM64FMOVQstore: + return rewriteValueARM64_OpARM64FMOVQstore(v) case OpARM64FMOVSload: return rewriteValueARM64_OpARM64FMOVSload(v) case OpARM64FMOVSloadidx: @@ -148,6 +154,8 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpARM64FNMULD(v) case OpARM64FNMULS: return rewriteValueARM64_OpARM64FNMULS(v) + case OpARM64FSTPQ: + return rewriteValueARM64_OpARM64FSTPQ(v) case OpARM64FSUBD: return rewriteValueARM64_OpARM64FSUBD(v) case OpARM64FSUBS: @@ -4904,6 +4912,56 @@ func rewriteValueARM64_OpARM64FCVTDS(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64FLDPQ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (FLDPQ [off1] {sym} (ADDconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FLDPQ [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpARM64ADDconst { + break + } + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FLDPQ) + v.AuxInt = int32ToAuxInt(off1 + int32(off2)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + // match: (FLDPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) + // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FLDPQ [off1+off2] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpARM64MOVDaddr { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FLDPQ) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(ptr, mem) + return true + } + return false +} func rewriteValueARM64_OpARM64FMOVDfpgp(v *Value) bool { v_0 := v.Args[0] b := v.Block @@ -5352,6 +5410,109 @@ func rewriteValueARM64_OpARM64FMOVDstoreidx8(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64FMOVQload(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (FMOVQload [off1] {sym} (ADDconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FMOVQload [off1+int32(off2)] {sym} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpARM64ADDconst { + break + } + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + mem := v_1 + if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FMOVQload) + v.AuxInt = int32ToAuxInt(off1 + int32(off2)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + // match: (FMOVQload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) + // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FMOVQload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpARM64MOVDaddr { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + mem := v_1 + if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FMOVQload) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64FMOVQstore(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (FMOVQstore [off1] {sym} (ADDconst [off2] ptr) val mem) + // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FMOVQstore [off1+int32(off2)] {sym} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpARM64ADDconst { + break + } + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FMOVQstore) + v.AuxInt = int32ToAuxInt(off1 + int32(off2)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, val, mem) + return true + } + // match: (FMOVQstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) + // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FMOVQstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpARM64MOVDaddr { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FMOVQstore) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(ptr, val, mem) + return true + } + return false +} func rewriteValueARM64_OpARM64FMOVSload(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -5894,6 +6055,62 @@ func rewriteValueARM64_OpARM64FNMULS(v *Value) bool { } return false } +func rewriteValueARM64_OpARM64FSTPQ(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + config := b.Func.Config + // match: (FSTPQ [off1] {sym} (ADDconst [off2] ptr) val1 val2 mem) + // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FSTPQ [off1+int32(off2)] {sym} ptr val1 val2 mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpARM64ADDconst { + break + } + off2 := auxIntToInt64(v_0.AuxInt) + ptr := v_0.Args[0] + val1 := v_1 + val2 := v_2 + mem := v_3 + if !(is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FSTPQ) + v.AuxInt = int32ToAuxInt(off1 + int32(off2)) + v.Aux = symToAux(sym) + v.AddArg4(ptr, val1, val2, mem) + return true + } + // match: (FSTPQ [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem) + // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) + // result: (FSTPQ [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + if v_0.Op != OpARM64MOVDaddr { + break + } + off2 := auxIntToInt32(v_0.AuxInt) + sym2 := auxToSym(v_0.Aux) + ptr := v_0.Args[0] + val1 := v_1 + val2 := v_2 + mem := v_3 + if !(canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink)) { + break + } + v.reset(OpARM64FSTPQ) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg4(ptr, val1, val2, mem) + return true + } + return false +} func rewriteValueARM64_OpARM64FSUBD(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0]