mirror of
https://github.com/golang/go.git
synced 2026-04-01 17:07:17 +09:00
encoding/{base32,base64}: speed up Encode
This CL clarifies (*Encoding).Encode and speeds it up by reducing the
number of bounds checks in its loop.
Here are some benchmark results (no change to allocations):
goos: darwin
goarch: arm64
pkg: encoding/base32
cpu: Apple M4
│ old │ new │
│ sec/op │ sec/op vs base │
EncodeToString-10 7.310µ ± 0% 5.308µ ± 0% -27.39% (n=180)
Encode-10 5.651µ ± 0% 3.603µ ± 0% -36.25% (n=180)
geomean 6.427µ 4.373µ -31.96%
│ old │ new │
│ B/s │ B/s vs base │
EncodeToString-10 1.044Gi ± 0% 1.437Gi ± 0% +37.71% (p=0.000 n=180)
Encode-10 1.350Gi ± 0% 2.118Gi ± 0% +56.88% (p=0.000 n=180)
geomean 1.187Gi 1.745Gi +46.98%
pkg: encoding/base64
│ old │ new │
│ sec/op │ sec/op vs base │
EncodeToString-10 7.058µ ± 0% 6.034µ ± 0% -14.51% (n=180)
│ old │ new │
│ B/s │ B/s vs base │
EncodeToString-10 1.081Gi ± 0% 1.264Gi ± 0% +16.97% (p=0.000 n=180)
Updates #20206
Change-Id: I7d46891ddb4371df004bfd612a8efc6638715b94
GitHub-Last-Rev: 1caac3d655
GitHub-Pull-Request: golang/go#78344
Reviewed-on: https://go-review.googlesource.com/c/go/+/759100
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
committed by
Gopher Robot
parent
286a79658e
commit
e4fcdc6c55
@@ -127,61 +127,59 @@ func (enc *Encoding) Encode(dst, src []byte) {
|
||||
// outside of the loop to speed up the encoder.
|
||||
_ = enc.encode
|
||||
|
||||
di, si := 0, 0
|
||||
n := (len(src) / 5) * 5
|
||||
for si < n {
|
||||
for len(src) >= 5 {
|
||||
// Combining two 32 bit loads allows the same code to be used
|
||||
// for 32 and 64 bit platforms.
|
||||
hi := uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3])
|
||||
lo := hi<<8 | uint32(src[si+4])
|
||||
hi := uint32(src[0])<<24 | uint32(src[1])<<16 | uint32(src[2])<<8 | uint32(src[3])
|
||||
lo := hi<<8 | uint32(src[4])
|
||||
|
||||
dst[di+0] = enc.encode[(hi>>27)&0x1F]
|
||||
dst[di+1] = enc.encode[(hi>>22)&0x1F]
|
||||
dst[di+2] = enc.encode[(hi>>17)&0x1F]
|
||||
dst[di+3] = enc.encode[(hi>>12)&0x1F]
|
||||
dst[di+4] = enc.encode[(hi>>7)&0x1F]
|
||||
dst[di+5] = enc.encode[(hi>>2)&0x1F]
|
||||
dst[di+6] = enc.encode[(lo>>5)&0x1F]
|
||||
dst[di+7] = enc.encode[(lo)&0x1F]
|
||||
_ = dst[7] // Eliminate bounds checks below.
|
||||
dst[0] = enc.encode[(hi>>27)&0x1F]
|
||||
dst[1] = enc.encode[(hi>>22)&0x1F]
|
||||
dst[2] = enc.encode[(hi>>17)&0x1F]
|
||||
dst[3] = enc.encode[(hi>>12)&0x1F]
|
||||
dst[4] = enc.encode[(hi>>7)&0x1F]
|
||||
dst[5] = enc.encode[(hi>>2)&0x1F]
|
||||
dst[6] = enc.encode[(lo>>5)&0x1F]
|
||||
dst[7] = enc.encode[(lo)&0x1F]
|
||||
|
||||
si += 5
|
||||
di += 8
|
||||
src = src[5:]
|
||||
dst = dst[8:]
|
||||
}
|
||||
|
||||
// Add the remaining small block
|
||||
remain := len(src) - si
|
||||
if remain == 0 {
|
||||
if len(src) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Encode the remaining bytes in reverse order.
|
||||
val := uint32(0)
|
||||
switch remain {
|
||||
switch len(src) {
|
||||
case 4:
|
||||
val |= uint32(src[si+3])
|
||||
dst[di+6] = enc.encode[val<<3&0x1F]
|
||||
dst[di+5] = enc.encode[val>>2&0x1F]
|
||||
val |= uint32(src[3])
|
||||
dst[6] = enc.encode[val<<3&0x1F]
|
||||
dst[5] = enc.encode[val>>2&0x1F]
|
||||
fallthrough
|
||||
case 3:
|
||||
val |= uint32(src[si+2]) << 8
|
||||
dst[di+4] = enc.encode[val>>7&0x1F]
|
||||
val |= uint32(src[2]) << 8
|
||||
dst[4] = enc.encode[val>>7&0x1F]
|
||||
fallthrough
|
||||
case 2:
|
||||
val |= uint32(src[si+1]) << 16
|
||||
dst[di+3] = enc.encode[val>>12&0x1F]
|
||||
dst[di+2] = enc.encode[val>>17&0x1F]
|
||||
val |= uint32(src[1]) << 16
|
||||
dst[3] = enc.encode[val>>12&0x1F]
|
||||
dst[2] = enc.encode[val>>17&0x1F]
|
||||
fallthrough
|
||||
case 1:
|
||||
val |= uint32(src[si+0]) << 24
|
||||
dst[di+1] = enc.encode[val>>22&0x1F]
|
||||
dst[di+0] = enc.encode[val>>27&0x1F]
|
||||
val |= uint32(src[0]) << 24
|
||||
dst[1] = enc.encode[val>>22&0x1F]
|
||||
dst[0] = enc.encode[val>>27&0x1F]
|
||||
}
|
||||
|
||||
// Pad the final quantum
|
||||
if enc.padChar != NoPadding {
|
||||
nPad := (remain * 8 / 5) + 1
|
||||
nPad := (len(src) * 8 / 5) + 1
|
||||
for i := nPad; i < 8; i++ {
|
||||
dst[di+i] = byte(enc.padChar)
|
||||
dst[i] = byte(enc.padChar)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -151,44 +151,39 @@ func (enc *Encoding) Encode(dst, src []byte) {
|
||||
// outside of the loop to speed up the encoder.
|
||||
_ = enc.encode
|
||||
|
||||
di, si := 0, 0
|
||||
n := (len(src) / 3) * 3
|
||||
for si < n {
|
||||
for len(src) >= 3 {
|
||||
// Convert 3x 8bit source bytes into 4 bytes
|
||||
val := uint(src[si+0])<<16 | uint(src[si+1])<<8 | uint(src[si+2])
|
||||
val := uint(src[0])<<16 | uint(src[1])<<8 | uint(src[2])
|
||||
|
||||
dst[di+0] = enc.encode[val>>18&0x3F]
|
||||
dst[di+1] = enc.encode[val>>12&0x3F]
|
||||
dst[di+2] = enc.encode[val>>6&0x3F]
|
||||
dst[di+3] = enc.encode[val&0x3F]
|
||||
_ = dst[3] // Eliminate bounds checks below.
|
||||
dst[0] = enc.encode[val>>18&0x3F]
|
||||
dst[1] = enc.encode[val>>12&0x3F]
|
||||
dst[2] = enc.encode[val>>6&0x3F]
|
||||
dst[3] = enc.encode[val&0x3F]
|
||||
|
||||
si += 3
|
||||
di += 4
|
||||
src = src[3:]
|
||||
dst = dst[4:]
|
||||
}
|
||||
|
||||
remain := len(src) - si
|
||||
if remain == 0 {
|
||||
// Add the remaining small block (if any).
|
||||
switch len(src) {
|
||||
case 0:
|
||||
return
|
||||
}
|
||||
// Add the remaining small block
|
||||
val := uint(src[si+0]) << 16
|
||||
if remain == 2 {
|
||||
val |= uint(src[si+1]) << 8
|
||||
}
|
||||
|
||||
dst[di+0] = enc.encode[val>>18&0x3F]
|
||||
dst[di+1] = enc.encode[val>>12&0x3F]
|
||||
|
||||
switch remain {
|
||||
case 2:
|
||||
dst[di+2] = enc.encode[val>>6&0x3F]
|
||||
if enc.padChar != NoPadding {
|
||||
dst[di+3] = byte(enc.padChar)
|
||||
}
|
||||
case 1:
|
||||
val := uint(src[0]) << 16
|
||||
dst[0] = enc.encode[val>>18&0x3F]
|
||||
dst[1] = enc.encode[val>>12&0x3F]
|
||||
if enc.padChar != NoPadding {
|
||||
dst[di+2] = byte(enc.padChar)
|
||||
dst[di+3] = byte(enc.padChar)
|
||||
dst[2] = byte(enc.padChar)
|
||||
dst[3] = byte(enc.padChar)
|
||||
}
|
||||
case 2:
|
||||
val := uint(src[0])<<16 | uint(src[1])<<8
|
||||
dst[0] = enc.encode[val>>18&0x3F]
|
||||
dst[1] = enc.encode[val>>12&0x3F]
|
||||
dst[2] = enc.encode[val>>6&0x3F]
|
||||
if enc.padChar != NoPadding {
|
||||
dst[3] = byte(enc.padChar)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user