runtime, cmd/compile: use preemptible memclr for large pointer-free clears

Large memory clearing operations (via clear() or large slice allocation)
currently use non-preemptible assembly loops. This blocks the Garbage
Collector from performing a Stop The World (STW) event, leading to
significant tail latency or even indefinite hangs in tight loops.

This change introduces memclrNoHeapPointersPreemptible, which chunks
clears into 256KB blocks with preemption checks. The compiler's walk
phase is updated to emit this call for large pointer-free clears.

To prevent regressions, SSA rewrite rules are added to ensure that
constant-size clears (which are common and small) continue to be
inlined into OpZero assembly.

Benchmarks on darwin/arm64:
- STW with 50MB clear: Improved from 'Hung' to ~500µs max pause.
- Small clears (5-64B): No measurable regression.
- Large clears (1M-64M): No measurable regression.

Fixes #69327

Change-Id: Ide14d6bcdca1f60d6ac95443acb57da9a8822538
Reviewed-on: https://go-review.googlesource.com/c/go/+/750480
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Robert Griesemer <gri@google.com>
This commit is contained in:
“Muhammad
2026-03-01 23:53:03 +00:00
committed by Gopher Robot
parent 7aeb2f7e28
commit 50126a8e44
8 changed files with 47 additions and 9 deletions

View File

@@ -1598,11 +1598,13 @@
=> (AndB (MemEq p q (Const64 <typ.Int64> [16]) mem)
(MemEq (OffPtr <p.Type> p [16]) (OffPtr <q.Type> q [16]) (Const64 <typ.Int64> [c-16]) mem))
// Turn known-size calls to memclrNoHeapPointers into a Zero.
// Turn known-size calls to memclrNoHeapPointers or memclrNoHeapPointersPreemptible into a Zero.
// When the size is a known constant, inlining to OpZero is safe. Dynamic-size calls remain as
// runtime calls and go through the chunked preemptible path (memclrNoHeapPointersPreemptible).
// Note that we are using types.Types[types.TUINT8] instead of sptr.Type.Elem() - see issue 55122 and CL 431496 for more details.
(SelectN [0] call:(StaticCall {sym} sptr (Const(64|32) [c]) mem))
&& isInlinableMemclr(config, int64(c))
&& isSameCall(sym, "runtime.memclrNoHeapPointers")
&& (isSameCall(sym, "runtime.memclrNoHeapPointers") || isSameCall(sym, "runtime.memclrNoHeapPointersPreemptible"))
&& call.Uses == 1
&& clobber(call)
=> (Zero {types.Types[types.TUINT8]} [int64(c)] sptr mem)

View File

@@ -30029,7 +30029,7 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
return true
}
// match: (SelectN [0] call:(StaticCall {sym} sptr (Const64 [c]) mem))
// cond: isInlinableMemclr(config, int64(c)) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)
// cond: isInlinableMemclr(config, int64(c)) && (isSameCall(sym, "runtime.memclrNoHeapPointers") || isSameCall(sym, "runtime.memclrNoHeapPointersPreemptible")) && call.Uses == 1 && clobber(call)
// result: (Zero {types.Types[types.TUINT8]} [int64(c)] sptr mem)
for {
if auxIntToInt64(v.AuxInt) != 0 {
@@ -30047,7 +30047,7 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
break
}
c := auxIntToInt64(call_1.AuxInt)
if !(isInlinableMemclr(config, int64(c)) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)) {
if !(isInlinableMemclr(config, int64(c)) && (isSameCall(sym, "runtime.memclrNoHeapPointers") || isSameCall(sym, "runtime.memclrNoHeapPointersPreemptible")) && call.Uses == 1 && clobber(call)) {
break
}
v.reset(OpZero)
@@ -30057,7 +30057,7 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
return true
}
// match: (SelectN [0] call:(StaticCall {sym} sptr (Const32 [c]) mem))
// cond: isInlinableMemclr(config, int64(c)) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)
// cond: isInlinableMemclr(config, int64(c)) && (isSameCall(sym, "runtime.memclrNoHeapPointers") || isSameCall(sym, "runtime.memclrNoHeapPointersPreemptible")) && call.Uses == 1 && clobber(call)
// result: (Zero {types.Types[types.TUINT8]} [int64(c)] sptr mem)
for {
if auxIntToInt64(v.AuxInt) != 0 {
@@ -30075,7 +30075,7 @@ func rewriteValuegeneric_OpSelectN(v *Value) bool {
break
}
c := auxIntToInt32(call_1.AuxInt)
if !(isInlinableMemclr(config, int64(c)) && isSameCall(sym, "runtime.memclrNoHeapPointers") && call.Uses == 1 && clobber(call)) {
if !(isInlinableMemclr(config, int64(c)) && (isSameCall(sym, "runtime.memclrNoHeapPointers") || isSameCall(sym, "runtime.memclrNoHeapPointersPreemptible")) && call.Uses == 1 && clobber(call)) {
break
}
v.reset(OpZero)

View File

@@ -213,6 +213,7 @@ func moveSliceNoCapNoScan(elemSize uintptr, old *byte, len int) (*byte, int, int
func memmove(to *any, frm *any, length uintptr)
func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
func memclrNoHeapPointersPreemptible(ptr unsafe.Pointer, n uintptr)
func memclrHasPointers(ptr unsafe.Pointer, n uintptr)
func memequal(x, y unsafe.Pointer, size uintptr) bool

View File

@@ -176,6 +176,7 @@ var runtimeDecls = [...]struct {
{"moveSliceNoCapNoScan", funcTag, 134},
{"memmove", funcTag, 135},
{"memclrNoHeapPointers", funcTag, 136},
{"memclrNoHeapPointersPreemptible", funcTag, 136},
{"memclrHasPointers", funcTag, 136},
{"memequal", funcTag, 137},
{"memequal0", funcTag, 138},

View File

@@ -718,7 +718,7 @@ func extendSlice(n *ir.CallExpr, init *ir.Nodes) ir.Node {
// hn := l2 * sizeof(elem(s))
hn := typecheck.Conv(ir.NewBinaryExpr(base.Pos, ir.OMUL, l2, ir.NewInt(base.Pos, elemtype.Size())), types.Types[types.TUINTPTR])
clrname := "memclrNoHeapPointers"
clrname := "memclrNoHeapPointersPreemptible"
hasPointers := elemtype.HasPointers()
if hasPointers {
clrname = "memclrHasPointers"

View File

@@ -589,8 +589,8 @@ func arrayClear(wbPos src.XPos, a ir.Node, nrange *ir.RangeStmt) ir.Node {
ir.CurFunc.SetWBPos(wbPos)
fn = mkcallstmt("memclrHasPointers", hp, hn)
} else {
// memclrNoHeapPointers(hp, hn)
fn = mkcallstmt("memclrNoHeapPointers", hp, hn)
// memclrNoHeapPointersPreemptible(hp, hn)
fn = mkcallstmt("memclrNoHeapPointersPreemptible", hp, hn)
}
n.Body.Append(fn)

View File

@@ -2202,6 +2202,15 @@ func memclrNoHeapPointersChunked(size uintptr, x unsafe.Pointer) {
}
}
// memclrNoHeapPointersPreemptible is the compiler-callable entry point
// for clearing large buffers with preemption support. It has the same
// signature as memclrNoHeapPointers so the compiler can emit calls to it
// directly. It delegates to memclrNoHeapPointersChunked which splits the
// work into 256KB chunks with preemption checks between them.
func memclrNoHeapPointersPreemptible(ptr unsafe.Pointer, n uintptr) {
memclrNoHeapPointersChunked(n, ptr)
}
// implementation of new builtin
// compiler (both frontend and SSA backend) knows the signature
// of this function.

View File

@@ -1374,3 +1374,28 @@ func BenchmarkMemmoveKnownSize1024(b *testing.B) {
memclrSink = p.x[:]
}
func BenchmarkSTWLatency(b *testing.B) {
const bufSize = 50 << 20 // 50 MiB
buf := make([]byte, bufSize)
var stop atomic.Bool
go func() {
for !stop.Load() {
clear(buf)
}
}()
var maxPause int64
for i := 0; i < b.N; i++ {
start := Nanotime()
GC()
elapsed := Nanotime() - start
if elapsed > maxPause {
maxPause = elapsed
}
}
stop.Store(true)
b.ReportMetric(float64(maxPause)/1e3, "max-pause-µs")
}