publicsuffix: spruce up code gen and speed up PublicSuffix

Rely on functions from the slices package where convenient. Drop custom max functions in favor of max builtin. Remove unused non-exported functions.

Reduce the number of bounds checks. Replace calls to strings.LastIndex by calls to strings.LastIndexByte.

goos: darwin
goarch: amd64
pkg: golang.org/x/net/publicsuffix
cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz
               │     old     │                new                 │
               │   sec/op    │   sec/op     vs base               │
PublicSuffix-8   13.46µ ± 0%   13.23µ ± 0%  -1.67% (p=0.000 n=20)

               │    old     │              new               │
               │    B/op    │    B/op     vs base            │
PublicSuffix-8   0.000 ± 0%   0.000 ± 0%  ~ (p=1.000 n=20) ¹
¹ all samples are equal

               │    old     │              new               │
               │ allocs/op  │ allocs/op   vs base            │
PublicSuffix-8   0.000 ± 0%   0.000 ± 0%  ~ (p=1.000 n=20) ¹
¹ all samples are equal

Change-Id: Id72967560884d98a5c0791ccea73dbb27d120c2c
GitHub-Last-Rev: 87567e7cb5
GitHub-Pull-Request: golang/net#233
Reviewed-on: https://go-review.googlesource.com/c/net/+/652236
Reviewed-by: Damien Neil <dneil@google.com>
Commit-Queue: Ian Lance Taylor <iant@golang.org>
Auto-Submit: Ian Lance Taylor <iant@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
This commit is contained in:
Julien Cretel
2025-02-25 11:40:25 +00:00
committed by Gopher Robot
parent 459513d1f8
commit fe7f0391aa
2 changed files with 28 additions and 70 deletions

View File

@@ -21,6 +21,7 @@ package main
import (
"bufio"
"bytes"
"cmp"
"encoding/binary"
"flag"
"fmt"
@@ -29,7 +30,7 @@ import (
"net/http"
"os"
"regexp"
"sort"
"slices"
"strings"
"golang.org/x/net/idna"
@@ -62,20 +63,6 @@ var (
maxLo uint32
)
func max(a, b int) int {
if a < b {
return b
}
return a
}
func u32max(a, b uint32) uint32 {
if a < b {
return b
}
return a
}
const (
nodeTypeNormal = 0
nodeTypeException = 1
@@ -83,18 +70,6 @@ const (
numNodeType = 3
)
func nodeTypeStr(n int) string {
switch n {
case nodeTypeNormal:
return "+"
case nodeTypeException:
return "!"
case nodeTypeParentOnly:
return "o"
}
panic("unreachable")
}
const (
defaultURL = "https://publicsuffix.org/list/effective_tld_names.dat"
gitCommitURL = "https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat"
@@ -251,7 +226,7 @@ func main1() error {
for label := range labelsMap {
labelsList = append(labelsList, label)
}
sort.Strings(labelsList)
slices.Sort(labelsList)
combinedText = combineText(labelsList)
if combinedText == "" {
@@ -509,15 +484,13 @@ func (n *node) child(label string) *node {
icann: true,
}
n.children = append(n.children, c)
sort.Sort(byLabel(n.children))
slices.SortFunc(n.children, byLabel)
return c
}
type byLabel []*node
func (b byLabel) Len() int { return len(b) }
func (b byLabel) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
func (b byLabel) Less(i, j int) bool { return b[i].label < b[j].label }
func byLabel(a, b *node) int {
return strings.Compare(a.label, b.label)
}
var nextNodesIndex int
@@ -557,7 +530,7 @@ func assignIndexes(n *node) error {
n.childrenIndex = len(childrenEncoding)
lo := uint32(n.firstChild)
hi := lo + uint32(len(n.children))
maxLo, maxHi = u32max(maxLo, lo), u32max(maxHi, hi)
maxLo, maxHi = max(maxLo, lo), max(maxHi, hi)
if lo >= 1<<childrenBitsLo {
return fmt.Errorf("children lo %d is too large, or childrenBitsLo is too small", lo)
}
@@ -586,20 +559,6 @@ func printNodeLabel(w io.Writer, n *node) error {
return nil
}
func icannStr(icann bool) string {
if icann {
return "I"
}
return " "
}
func wildcardStr(wildcard bool) string {
if wildcard {
return "*"
}
return " "
}
// combineText combines all the strings in labelsList to form one giant string.
// Overlapping strings will be merged: "arpa" and "parliament" could yield
// "arparliament".
@@ -616,18 +575,15 @@ func combineText(labelsList []string) string {
return text
}
type byLength []string
func (s byLength) Len() int { return len(s) }
func (s byLength) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s byLength) Less(i, j int) bool { return len(s[i]) < len(s[j]) }
func byLength(a, b string) int {
return cmp.Compare(len(a), len(b))
}
// removeSubstrings returns a copy of its input with any strings removed
// that are substrings of other provided strings.
func removeSubstrings(input []string) []string {
// Make a copy of input.
ss := append(make([]string, 0, len(input)), input...)
sort.Sort(byLength(ss))
ss := slices.Clone(input)
slices.SortFunc(ss, byLength)
for i, shortString := range ss {
// For each string, only consider strings higher than it in sort order, i.e.
@@ -641,7 +597,7 @@ func removeSubstrings(input []string) []string {
}
// Remove the empty strings.
sort.Strings(ss)
slices.Sort(ss)
for len(ss) > 0 && ss[0] == "" {
ss = ss[1:]
}

View File

@@ -88,7 +88,7 @@ func PublicSuffix(domain string) (publicSuffix string, icann bool) {
s, suffix, icannNode, wildcard := domain, len(domain), false, false
loop:
for {
dot := strings.LastIndex(s, ".")
dot := strings.LastIndexByte(s, '.')
if wildcard {
icann = icannNode
suffix = 1 + dot
@@ -129,7 +129,7 @@ loop:
}
if suffix == len(domain) {
// If no rules match, the prevailing rule is "*".
return domain[1+strings.LastIndex(domain, "."):], icann
return domain[1+strings.LastIndexByte(domain, '.'):], icann
}
return domain[suffix:], icann
}
@@ -178,26 +178,28 @@ func EffectiveTLDPlusOne(domain string) (string, error) {
if domain[i] != '.' {
return "", fmt.Errorf("publicsuffix: invalid public suffix %q for domain %q", suffix, domain)
}
return domain[1+strings.LastIndex(domain[:i], "."):], nil
return domain[1+strings.LastIndexByte(domain[:i], '.'):], nil
}
type uint32String string
func (u uint32String) get(i uint32) uint32 {
off := i * 4
return (uint32(u[off])<<24 |
uint32(u[off+1])<<16 |
uint32(u[off+2])<<8 |
uint32(u[off+3]))
u = u[off:] // help the compiler reduce bounds checks
return uint32(u[3]) |
uint32(u[2])<<8 |
uint32(u[1])<<16 |
uint32(u[0])<<24
}
type uint40String string
func (u uint40String) get(i uint32) uint64 {
off := uint64(i * (nodesBits / 8))
return uint64(u[off])<<32 |
uint64(u[off+1])<<24 |
uint64(u[off+2])<<16 |
uint64(u[off+3])<<8 |
uint64(u[off+4])
u = u[off:] // help the compiler reduce bounds checks
return uint64(u[4]) |
uint64(u[3])<<8 |
uint64(u[2])<<16 |
uint64(u[1])<<24 |
uint64(u[0])<<32
}