Files
golang.net/http2/writesched_benchmarks_test.go
Nicholas S. Husin e7c005de60 http2: implement a more efficient writeQueue that avoids unnecessary copies.
Our previous implementation of writeQueue relies on one
[]FrameWriteRequests, forcing us to copy the rest of the slice's content
whenever we remove an item from the front.

This change remedies this problem by implementing writeQueue using
two-stage queues, similar to Okasaki's purely functional queue.

With 25 frames per stream, we are observing the following performance
improvement:
goos: linux
goarch: amd64
pkg: golang.org/x/net/http2
cpu: AMD EPYC 7B13
              │  /tmp/old   │              /tmp/new               │
              │   sec/op    │   sec/op     vs base                │
WriteQueue-64   508.3n ± 3%   305.7n ± 3%  -39.86% (p=0.000 n=10)

              │  /tmp/old  │            /tmp/new            │
              │    B/op    │    B/op     vs base            │
WriteQueue-64   0.000 ± 0%   0.000 ± 0%  ~ (p=1.000 n=10) ¹
¹ all samples are equal

              │  /tmp/old  │            /tmp/new            │
              │ allocs/op  │ allocs/op   vs base            │
WriteQueue-64   0.000 ± 0%   0.000 ± 0%  ~ (p=1.000 n=10) ¹
¹ all samples are equal

As the number of frames increases, the performance difference becomes
more stark as the old implementation does a quadratic amount of copying
in total to be able to fully consume a queue.

Change-Id: Ide816ebdd89a41275b5829683c0f10d48321af50
Reviewed-on: https://go-review.googlesource.com/c/net/+/710635
Reviewed-by: Damien Neil <dneil@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Nicholas Husin <husin@google.com>
2025-10-09 10:55:28 -07:00

198 lines
5.1 KiB
Go

// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package http2
import (
"testing"
)
func benchmarkThroughput(b *testing.B, wsFunc func() WriteScheduler, priority PriorityParam) {
const maxFrameSize = 16
const streamCount = 100
ws := wsFunc()
sc := &serverConn{maxFrameSize: maxFrameSize}
streams := make([]*stream, streamCount)
// Possible stream payloads. We vary the payload size of different streams
// to simulate real traffic somewhat.
streamsFrame := [][]byte{
make([]byte, maxFrameSize*5),
make([]byte, maxFrameSize*10),
make([]byte, maxFrameSize*15),
make([]byte, maxFrameSize*20),
make([]byte, maxFrameSize*25),
}
for i := range streams {
streamID := uint32(i) + 1
streams[i] = &stream{
id: streamID,
sc: sc,
}
streams[i].flow.add(1 << 30) // arbitrary large value
ws.OpenStream(streamID, OpenStreamOptions{
priority: priority,
})
}
for b.Loop() {
for i := range streams {
streamID := uint32(i) + 1
ws.Push(FrameWriteRequest{
write: &writeData{
streamID: streamID,
p: streamsFrame[i%len(streamsFrame)],
endStream: false,
},
stream: streams[i],
})
}
for {
wr, ok := ws.Pop()
if !ok {
break
}
if wr.DataSize() != maxFrameSize {
b.Fatalf("wr.Pop() = %v data bytes, want %v", wr.DataSize(), maxFrameSize)
}
}
}
for i := range streams {
streamID := uint32(i) + 1
ws.CloseStream(streamID)
}
}
func benchmarkStreamLifetime(b *testing.B, wsFunc func() WriteScheduler, priority PriorityParam) {
const maxFrameSize = 16
const streamCount = 100
ws := wsFunc()
sc := &serverConn{maxFrameSize: maxFrameSize}
streams := make([]*stream, streamCount)
// Possible stream payloads. We vary the payload size of different streams
// to simulate real traffic somewhat.
streamsFrame := [][]byte{
make([]byte, maxFrameSize*5),
make([]byte, maxFrameSize*10),
make([]byte, maxFrameSize*15),
make([]byte, maxFrameSize*20),
make([]byte, maxFrameSize*25),
}
for i := range streams {
streamID := uint32(i) + 1
streams[i] = &stream{
id: streamID,
sc: sc,
}
streams[i].flow.add(1 << 30) // arbitrary large value
}
for b.Loop() {
for i := range streams {
streamID := uint32(i) + 1
ws.OpenStream(streamID, OpenStreamOptions{
priority: priority,
})
ws.Push(FrameWriteRequest{
write: &writeData{
streamID: streamID,
p: streamsFrame[i%len(streamsFrame)],
endStream: false,
},
stream: streams[i],
})
}
for {
wr, ok := ws.Pop()
if !ok {
break
}
if wr.DataSize() != maxFrameSize {
b.Fatalf("wr.Pop() = %v data bytes, want %v", wr.DataSize(), maxFrameSize)
}
}
for i := range streams {
streamID := uint32(i) + 1
ws.CloseStream(streamID)
}
}
}
func BenchmarkWriteSchedulerThroughputRoundRobin(b *testing.B) {
benchmarkThroughput(b, newRoundRobinWriteScheduler, PriorityParam{})
}
func BenchmarkWriteSchedulerLifetimeRoundRobin(b *testing.B) {
benchmarkStreamLifetime(b, newRoundRobinWriteScheduler, PriorityParam{})
}
func BenchmarkWriteSchedulerThroughputRandom(b *testing.B) {
benchmarkThroughput(b, NewRandomWriteScheduler, PriorityParam{})
}
func BenchmarkWriteSchedulerLifetimeRandom(b *testing.B) {
benchmarkStreamLifetime(b, NewRandomWriteScheduler, PriorityParam{})
}
func BenchmarkWriteSchedulerThroughputPriorityRFC7540(b *testing.B) {
benchmarkThroughput(b, func() WriteScheduler { return NewPriorityWriteScheduler(nil) }, PriorityParam{})
}
func BenchmarkWriteSchedulerLifetimePriorityRFC7540(b *testing.B) {
// RFC7540 priority scheduler does not always succeed in closing the
// stream, causing this benchmark to panic due to opening an already open
// stream.
b.SkipNow()
benchmarkStreamLifetime(b, func() WriteScheduler { return NewPriorityWriteScheduler(nil) }, PriorityParam{})
}
func BenchmarkWriteSchedulerThroughputPriorityRFC9218Incremental(b *testing.B) {
benchmarkThroughput(b, newPriorityWriteSchedulerRFC9128, PriorityParam{
urgency: defaultRFC9218Priority.urgency,
incremental: 1,
})
}
func BenchmarkWriteSchedulerLifetimePriorityRFC9218Incremental(b *testing.B) {
benchmarkStreamLifetime(b, newPriorityWriteSchedulerRFC9128, PriorityParam{
urgency: defaultRFC9218Priority.urgency,
incremental: 1,
})
}
func BenchmarkWriteSchedulerThroughputPriorityRFC9218NonIncremental(b *testing.B) {
benchmarkThroughput(b, newPriorityWriteSchedulerRFC9128, PriorityParam{
urgency: defaultRFC9218Priority.urgency,
incremental: 0,
})
}
func BenchmarkWriteSchedulerLifetimePriorityRFC9218NonIncremental(b *testing.B) {
benchmarkStreamLifetime(b, newPriorityWriteSchedulerRFC9128, PriorityParam{
urgency: defaultRFC9218Priority.urgency,
incremental: 0,
})
}
func BenchmarkWriteQueue(b *testing.B) {
var qp writeQueuePool
frameCount := 25
for b.Loop() {
q := qp.get()
for range frameCount {
q.push(FrameWriteRequest{})
}
for !q.empty() {
// Since we pushed empty frames, consuming 1 byte is enough to
// consume the entire frame.
q.consume(1)
}
qp.put(q)
}
}