From 094aacdb047e716ea5598514222bc8c70843d49e Mon Sep 17 00:00:00 2001 From: Mateusz Poliwczak Date: Fri, 26 Sep 2025 20:47:45 +0200 Subject: [PATCH] reflect: outlilne []runtimeSelect allocation in Select MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With CL 707255 doing so we don't cause heap alloaction of the sllice, instead it is stored on the stack. goos: linux goarch: amd64 pkg: reflect cpu: 11th Gen Intel(R) Core(TM) i7-1185G7 @ 3.00GHz │ /tmp/before │ /tmp/after │ │ sec/op │ sec/op vs base │ Select/1-8 41.66n ± 1% 41.89n ± 0% ~ (p=0.151 n=20) Select/4-8 149.3n ± 1% 149.1n ± 8% ~ (p=0.324 n=20) Select/8-8 355.0n ± 1% 358.1n ± 1% +0.87% (p=0.002 n=20) SelectStaticLit/[4]SelectCase-8 153.3n ± 0% 151.9n ± 1% -0.88% (p=0.005 n=20) SelectStaticLit/[8]SelectCase-8 363.1n ± 1% 299.9n ± 0% -17.42% (p=0.000 n=20) geomean 165.2n 159.1n -3.69% │ /tmp/before │ /tmp/after │ │ B/op │ B/op vs base │ Select/1-8 8.000 ± 0% 8.000 ± 0% ~ (p=1.000 n=20) ¹ Select/4-8 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=20) ¹ Select/8-8 512.0 ± 0% 512.0 ± 0% ~ (p=1.000 n=20) ¹ SelectStaticLit/[4]SelectCase-8 96.00 ± 0% 96.00 ± 0% ~ (p=1.000 n=20) ¹ SelectStaticLit/[8]SelectCase-8 512.0 ± 0% 256.0 ± 0% -50.00% (p=0.000 n=20) geomean 114.1 99.32 -12.94% ¹ all samples are equal │ /tmp/before │ /tmp/after │ │ allocs/op │ allocs/op vs base │ Select/1-8 1.000 ± 0% 1.000 ± 0% ~ (p=1.000 n=20) ¹ Select/4-8 5.000 ± 0% 5.000 ± 0% ~ (p=1.000 n=20) ¹ Select/8-8 11.00 ± 0% 11.00 ± 0% ~ (p=1.000 n=20) ¹ SelectStaticLit/[4]SelectCase-8 5.000 ± 0% 5.000 ± 0% ~ (p=1.000 n=20) ¹ SelectStaticLit/[8]SelectCase-8 11.00 ± 0% 10.00 ± 0% -9.09% (p=0.000 n=20) geomean 4.968 4.874 -1.89% Updates #75620 Change-Id: I6a6a696492a4c07d8a3c03de0a36edbf400af506 Reviewed-on: https://go-review.googlesource.com/c/go/+/707275 LUCI-TryBot-Result: Go LUCI Reviewed-by: Keith Randall Reviewed-by: Keith Randall Reviewed-by: David Chase --- src/reflect/benchmark_test.go | 18 ++++++++++++++ src/reflect/value.go | 44 +++++++++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/src/reflect/benchmark_test.go b/src/reflect/benchmark_test.go index d5cea2becf..6f1971627a 100644 --- a/src/reflect/benchmark_test.go +++ b/src/reflect/benchmark_test.go @@ -265,6 +265,24 @@ func BenchmarkSelect(b *testing.B) { } } +func BenchmarkSelectStaticLit(b *testing.B) { + channel := make(chan int) + close(channel) + + sc := SelectCase{Dir: SelectRecv, Chan: ValueOf(channel)} + b.Run("[4]SelectCase", func(b *testing.B) { + for range b.N { + _, _, _ = Select([]SelectCase{sc, sc, sc, sc}) + } + }) + + b.Run("[8]SelectCase", func(b *testing.B) { + for range b.N { + _, _, _ = Select([]SelectCase{sc, sc, sc, sc, sc, sc, sc, sc}) + } + }) +} + func BenchmarkCall(b *testing.B) { fv := ValueOf(func(a, b string) {}) b.ReportAllocs() diff --git a/src/reflect/value.go b/src/reflect/value.go index e4a686cdfd..49bc2dda46 100644 --- a/src/reflect/value.go +++ b/src/reflect/value.go @@ -2923,6 +2923,10 @@ type SelectCase struct { Send Value // value to send (for send) } +// stackAllocSelectCases represents the length of a slice that we +// pre-allocate in [Select] to avoid heap allocations. +const stackAllocSelectCases = 4 + // Select executes a select operation described by the list of cases. // Like the Go select statement, it blocks until at least one of the cases // can proceed, makes a uniform pseudo-random choice, @@ -2932,22 +2936,42 @@ type SelectCase struct { // (as opposed to a zero value received because the channel is closed). // Select supports a maximum of 65536 cases. func Select(cases []SelectCase) (chosen int, recv Value, recvOK bool) { + // This function is specially designed to be inlined, such that when called as: + // + // Select([]SelectCase{}) + // + // With a slice, that has a compile known length, the runcases slice + // will end up being stack allocated, since the compiler can infer + // the len([]SelectCase{}). + // + // We additionaly want to optimize Select(cases) for cases where len(cases) + // cannot be infered at compile-time, thus in [select0] we allocate a + // [stackAllocSelectCases]-length slice, which will avoid memory allocations + // when the len(cases) <= stackAllocSelectCases and len(cases) is not compile-known. + + var runcases []runtimeSelect + if len(cases) > stackAllocSelectCases { + runcases = make([]runtimeSelect, len(cases)) + } + chosen, recv, recvOK = select0(cases, runcases) + return +} + +func select0(cases []SelectCase, runcases []runtimeSelect) (chosen int, recv Value, recvOK bool) { if len(cases) > 65536 { panic("reflect.Select: too many cases (max 65536)") } - // NOTE: Do not trust that caller is not modifying cases data underfoot. - // The range is safe because the caller cannot modify our copy of the len - // and each iteration makes its own copy of the value c. - var runcases []runtimeSelect - if len(cases) > 4 { - // Slice is heap allocated due to runtime dependent capacity. - runcases = make([]runtimeSelect, len(cases)) - } else { - // Slice can be stack allocated due to constant capacity. - runcases = make([]runtimeSelect, len(cases), 4) + + // See [Select] for more details on this. + if runcases == nil { + runcases = make([]runtimeSelect, len(cases), stackAllocSelectCases) } haveDefault := false + + // NOTE: Do not trust that caller is not modifying cases data underfoot. + // The range is safe because the caller cannot modify our copy of the len + // and each iteration makes its own copy of the value c. for i, c := range cases { rc := &runcases[i] rc.dir = c.Dir