cmd/compile: use OpMove instead of memmove more on loong64

OpMove is faster for small moves of fixed size.

goos: linux
goarch: loong64
pkg: runtime
cpu: Loongson-3A6000 @ 2500.00MHz
                               |   old.txt   |                new.txt               |
                               |   sec/op    |    sec/op     vs base                |
Memmove/0                        2.225n ± 2%   2.424n ±  1%   +8.94% (p=0.000 n=8)
Memmove/1                        2.813n ± 0%   2.562n ±  1%   -8.94% (p=0.000 n=8)
Memmove/2                        2.538n ± 3%   2.572n ±  1%   +1.32% (p=0.001 n=8)
Memmove/3                        2.807n ± 0%   2.804n ±  0%   -0.11% (p=0.024 n=8)
Memmove/4                        2.531n ± 2%   2.803n ±  0%  +10.72% (p=0.000 n=8)
Memmove/5                        3.203n ± 0%   2.809n ±  0%  -12.29% (p=0.000 n=8)
Memmove/6                        3.203n ± 0%   2.811n ±  0%  -12.24% (p=0.000 n=8)
Memmove/7                        3.203n ± 0%   2.809n ±  0%  -12.30% (p=0.000 n=8)
Memmove/8                        2.804n ± 0%   2.803n ±  0%        ~ (p=0.321 n=8)
Memmove/9                        3.203n ± 0%   3.203n ±  0%        ~ (p=1.000 n=8)
Memmove/10                       3.203n ± 0%   3.203n ±  0%        ~ (p=1.000 n=8)
Memmove/11                       3.203n ± 0%   3.203n ±  0%        ~ (p=1.000 n=8)
Memmove/12                       3.202n ± 0%   3.202n ±  0%        ~ (p=0.282 n=8)
Memmove/13                       3.202n ± 0%   3.202n ±  0%        ~ (p=1.000 n=8)
Memmove/14                       3.203n ± 0%   3.203n ±  0%        ~ (p=0.318 n=8)
Memmove/15                       3.203n ± 0%   3.203n ±  0%        ~ (p=1.000 n=8)
Memmove/16                       3.203n ± 0%   3.203n ±  0%        ~ (p=1.000 n=8)
Memmove/32                       3.603n ± 0%   3.603n ±  0%        ~ (p=1.000 n=8)
Memmove/64                       4.204n ± 0%   4.204n ±  0%        ~ (p=1.000 n=8)
MemmoveOverlap/32                4.004n ± 0%   4.004n ±  0%        ~ (p=0.993 n=8)
MemmoveOverlap/64                4.804n ± 0%   4.604n ±  0%   -4.16% (p=0.000 n=8)
MemmoveOverlap/128               8.007n ± 0%   7.607n ±  0%   -5.00% (p=0.000 n=8)
MemmoveUnalignedDst/0            2.805n ± 0%   2.426n ±  0%  -13.53% (p=0.000 n=8)
MemmoveUnalignedDst/1            3.603n ± 0%   2.554n ±  1%  -29.13% (p=0.000 n=8)
MemmoveUnalignedDst/2            3.203n ± 0%   3.145n ±  0%   -1.83% (p=0.000 n=8)
MemmoveUnalignedDst/3            3.603n ± 0%   2.807n ±  0%  -22.10% (p=0.000 n=8)
MemmoveUnalignedDst/4            3.203n ± 0%   2.865n ±  0%  -10.57% (p=0.000 n=8)
MemmoveUnalignedDst/5            4.004n ± 0%   2.845n ±  0%  -28.96% (p=0.000 n=8)
MemmoveUnalignedDst/6            4.004n ± 0%   2.841n ±  0%  -29.06% (p=0.000 n=8)
MemmoveUnalignedDst/7            4.003n ± 0%   2.840n ±  0%  -29.06% (p=0.000 n=8)
MemmoveUnalignedDst/8            3.603n ± 0%   3.203n ±  0%  -11.10% (p=0.000 n=8)
MemmoveUnalignedDst/9            4.004n ± 0%   3.204n ±  0%  -19.98% (p=0.000 n=8)
MemmoveUnalignedDst/10           4.003n ± 0%   3.204n ±  0%  -19.97% (p=0.000 n=8)
MemmoveUnalignedDst/11           4.004n ± 0%   3.204n ±  0%  -19.98% (p=0.000 n=8)
MemmoveUnalignedDst/12           4.004n ± 0%   3.203n ±  0%  -20.00% (p=0.000 n=8)
MemmoveUnalignedDst/13           4.003n ± 0%   3.202n ±  0%  -20.01% (p=0.000 n=8)
MemmoveUnalignedDst/14           4.003n ± 0%   3.204n ±  0%  -19.97% (p=0.000 n=8)
MemmoveUnalignedDst/15           4.003n ± 0%   3.204n ±  0%  -19.97% (p=0.000 n=8)
MemmoveUnalignedDst/16           4.003n ± 0%   3.204n ±  0%  -19.97% (p=0.000 n=8)
MemmoveUnalignedDst/32           4.004n ± 0%   4.004n ±  0%        ~ (p=1.000 n=8)
MemmoveUnalignedDst/64           7.607n ± 0%   7.607n ±  0%        ~ (p=1.000 n=8)
MemmoveUnalignedDstOverlap/32    4.805n ± 2%   4.636n ±  1%   -3.51% (p=0.000 n=8)
MemmoveUnalignedDstOverlap/64    8.007n ± 0%   8.007n ±  0%        ~ (p=1.000 n=8)
MemmoveUnalignedDstOverlap/128   9.066n ± 0%   9.033n ±  1%   -0.37% (p=0.014 n=8)
MemmoveUnalignedSrc/0            2.803n ± 0%   2.418n ±  1%  -13.74% (p=0.000 n=8)
MemmoveUnalignedSrc/1            3.603n ± 0%   2.543n ±  0%  -29.42% (p=0.000 n=8)
MemmoveUnalignedSrc/2            3.203n ± 0%   2.478n ±  1%  -22.65% (p=0.000 n=8)
MemmoveUnalignedSrc/3            3.603n ± 0%   2.803n ±  0%  -22.20% (p=0.000 n=8)
MemmoveUnalignedSrc/4            3.203n ± 0%   2.804n ±  0%  -12.47% (p=0.000 n=8)
MemmoveUnalignedSrc/5            4.003n ± 0%   2.817n ±  0%  -29.62% (p=0.000 n=8)
MemmoveUnalignedSrc/6            4.004n ± 0%   2.817n ±  0%  -29.66% (p=0.000 n=8)
MemmoveUnalignedSrc/7            4.003n ± 0%   2.818n ±  0%  -29.60% (p=0.000 n=8)
MemmoveUnalignedSrc/8            3.603n ± 0%   3.202n ±  0%  -11.13% (p=0.000 n=8)
MemmoveUnalignedSrc/9            4.003n ± 0%   3.202n ±  0%  -20.01% (p=0.000 n=8)
MemmoveUnalignedSrc/10           4.004n ± 0%   3.203n ±  0%  -20.00% (p=0.000 n=8)
MemmoveUnalignedSrc/11           4.003n ± 0%   3.203n ±  0%  -19.99% (p=0.000 n=8)
MemmoveUnalignedSrc/12           4.003n ± 0%   3.203n ±  0%  -19.99% (p=0.000 n=8)
MemmoveUnalignedSrc/13           4.003n ± 0%   3.203n ±  0%  -20.00% (p=0.000 n=8)
MemmoveUnalignedSrc/14           4.004n ± 0%   3.203n ±  0%  -20.00% (p=0.000 n=8)
MemmoveUnalignedSrc/15           4.003n ± 0%   3.203n ±  0%  -20.00% (p=0.000 n=8)
MemmoveUnalignedSrc/16           4.004n ± 0%   3.203n ±  0%  -20.00% (p=0.000 n=8)
MemmoveUnalignedSrc/32           4.003n ± 0%   4.004n ±  0%        ~ (p=0.432 n=8)
MemmoveUnalignedSrc/64           4.804n ± 0%   4.417n ±  0%   -8.06% (p=0.000 n=8)
MemmoveUnalignedSrcDst/f_16_0    4.004n ± 0%   4.004n ±  0%        ~ (p=0.533 n=8)
MemmoveUnalignedSrcDst/b_16_0    4.004n ± 0%   4.004n ±  0%        ~ (p=0.546 n=8)
MemmoveUnalignedSrcDst/f_16_1    4.004n ± 0%   4.004n ±  0%        ~ (p=1.000 n=8)
MemmoveUnalignedSrcDst/b_16_1    4.004n ± 0%   4.004n ±  0%        ~ (p=1.000 n=8)
MemmoveUnalignedSrcDst/f_16_4    4.004n ± 0%   4.004n ±  0%        ~ (p=1.000 n=8)
MemmoveUnalignedSrcDst/b_16_4    4.004n ± 0%   4.004n ±  0%        ~ (p=1.000 n=8)
MemmoveUnalignedSrcDst/f_16_7    4.004n ± 0%   4.004n ±  0%        ~ (p=1.000 n=8)
MemmoveUnalignedSrcDst/b_16_7    4.003n ± 0%   4.004n ±  0%        ~ (p=0.641 n=8)
MemmoveUnalignedSrcDst/f_64_0    5.245n ± 1%   5.269n ±  0%        ~ (p=0.053 n=8)
MemmoveUnalignedSrcDst/b_64_0    5.401n ± 0%   5.402n ±  0%        ~ (p=0.194 n=8)
MemmoveUnalignedSrcDst/f_64_1    7.210n ± 1%   7.174n ±  0%   -0.49% (p=0.007 n=8)
MemmoveUnalignedSrcDst/b_64_1    6.710n ± 0%   6.709n ±  0%        ~ (p=0.542 n=8)
MemmoveUnalignedSrcDst/f_64_4    7.208n ± 0%   7.182n ±  0%   -0.36% (p=0.036 n=8)
MemmoveUnalignedSrcDst/b_64_4    6.717n ± 0%   6.708n ±  0%   -0.13% (p=0.001 n=8)
MemmoveUnalignedSrcDst/f_64_7    7.246n ± 0%   7.192n ±  0%   -0.75% (p=0.000 n=8)
MemmoveUnalignedSrcDst/b_64_7    6.718n ± 0%   6.708n ±  0%   -0.15% (p=0.001 n=8)
MemmoveUnalignedSrcOverlap/32    6.806n ± 0%   6.806n ±  0%        ~ (p=1.000 n=8)
MemmoveUnalignedSrcOverlap/64    10.01n ± 0%   10.01n ±  0%        ~ (p=1.000 n=8)
MemmoveUnalignedSrcOverlap/128   11.61n ± 0%   11.61n ±  0%        ~ (p=1.000 n=8)
MemmoveKnownSize112              4.070n ± 0%   4.070n ±  0%        ~ (p=0.733 n=8)
MemmoveKnownSize128              3.429n ± 0%   3.411n ±  0%   -0.52% (p=0.000 n=8)
MemmoveKnownSize192              6.686n ± 3%   5.285n ± 10%  -20.96% (p=0.003 n=8)
MemmoveKnownSize248              8.385n ± 4%   6.986n ±  0%  -16.69% (p=0.000 n=8)
MemmoveKnownSize256              9.354n ± 8%   7.895n ± 14%  -15.60% (p=0.000 n=8)
MemmoveKnownSize512              13.14n ± 1%   13.06n ±  0%        ~ (p=0.623 n=8)
MemmoveKnownSize1024             25.82n ± 0%   25.82n ±  0%        ~ (p=0.713 n=8)
geomean                          4.355n        3.936n         -9.63%

Change-Id: I72e3b513447948223ebba0a5a091ff552068574d
Reviewed-on: https://go-review.googlesource.com/c/go/+/749720
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: David Chase <drchase@google.com>
This commit is contained in:
Guoqi Chen
2026-02-12 14:09:27 +08:00
committed by Gopher Robot
parent c4de16a714
commit 93b78326c3

View File

@@ -1484,11 +1484,13 @@ func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
return sz <= 16 || (sz < 1024 && disjoint(dst, sz, src, sz))
case "arm64":
return sz <= 64 || (sz <= 1024 && disjoint(dst, sz, src, sz))
case "loong64":
return sz <= 16 || (sz <= 64 && disjoint(dst, sz, src, sz))
case "386":
return sz <= 8
case "s390x", "ppc64", "ppc64le":
return sz <= 8 || disjoint(dst, sz, src, sz)
case "arm", "loong64", "mips", "mips64", "mipsle", "mips64le":
case "arm", "mips", "mips64", "mipsle", "mips64le":
return sz <= 4
}
return false