mirror of
https://github.com/golang/net.git
synced 2026-03-31 18:37:08 +09:00
The current documentation as well as set of atoms and attributes has gotten slightly out of sync with the current state of the WHATWG html5 specification. The change adds and removes several of the atoms and attributes, updates the documentation (such as steps numbering in inBodyEndTagFormatting) and modifies the spec URLs to https:// Change-Id: I6dfa52785858c1521301b20b1e585e19a08b1e98 Reviewed-on: https://go-review.googlesource.com/6173 Reviewed-by: Nigel Tao <nigeltao@golang.org>
749 lines
14 KiB
Go
749 lines
14 KiB
Go
// Copyright 2010 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package html
|
|
|
|
import (
|
|
"bytes"
|
|
"io"
|
|
"io/ioutil"
|
|
"reflect"
|
|
"runtime"
|
|
"strings"
|
|
"testing"
|
|
)
|
|
|
|
type tokenTest struct {
|
|
// A short description of the test case.
|
|
desc string
|
|
// The HTML to parse.
|
|
html string
|
|
// The string representations of the expected tokens, joined by '$'.
|
|
golden string
|
|
}
|
|
|
|
var tokenTests = []tokenTest{
|
|
{
|
|
"empty",
|
|
"",
|
|
"",
|
|
},
|
|
// A single text node. The tokenizer should not break text nodes on whitespace,
|
|
// nor should it normalize whitespace within a text node.
|
|
{
|
|
"text",
|
|
"foo bar",
|
|
"foo bar",
|
|
},
|
|
// An entity.
|
|
{
|
|
"entity",
|
|
"one < two",
|
|
"one < two",
|
|
},
|
|
// A start, self-closing and end tag. The tokenizer does not care if the start
|
|
// and end tokens don't match; that is the job of the parser.
|
|
{
|
|
"tags",
|
|
"<a>b<c/>d</e>",
|
|
"<a>$b$<c/>$d$</e>",
|
|
},
|
|
// Angle brackets that aren't a tag.
|
|
{
|
|
"not a tag #0",
|
|
"<",
|
|
"<",
|
|
},
|
|
{
|
|
"not a tag #1",
|
|
"</",
|
|
"</",
|
|
},
|
|
{
|
|
"not a tag #2",
|
|
"</>",
|
|
"<!---->",
|
|
},
|
|
{
|
|
"not a tag #3",
|
|
"a</>b",
|
|
"a$<!---->$b",
|
|
},
|
|
{
|
|
"not a tag #4",
|
|
"</ >",
|
|
"<!-- -->",
|
|
},
|
|
{
|
|
"not a tag #5",
|
|
"</.",
|
|
"<!--.-->",
|
|
},
|
|
{
|
|
"not a tag #6",
|
|
"</.>",
|
|
"<!--.-->",
|
|
},
|
|
{
|
|
"not a tag #7",
|
|
"a < b",
|
|
"a < b",
|
|
},
|
|
{
|
|
"not a tag #8",
|
|
"<.>",
|
|
"<.>",
|
|
},
|
|
{
|
|
"not a tag #9",
|
|
"a<<<b>>>c",
|
|
"a<<$<b>$>>c",
|
|
},
|
|
{
|
|
"not a tag #10",
|
|
"if x<0 and y < 0 then x*y>0",
|
|
"if x<0 and y < 0 then x*y>0",
|
|
},
|
|
{
|
|
"not a tag #11",
|
|
"<<p>",
|
|
"<$<p>",
|
|
},
|
|
// EOF in a tag name.
|
|
{
|
|
"tag name eof #0",
|
|
"<a",
|
|
"",
|
|
},
|
|
{
|
|
"tag name eof #1",
|
|
"<a ",
|
|
"",
|
|
},
|
|
{
|
|
"tag name eof #2",
|
|
"a<b",
|
|
"a",
|
|
},
|
|
{
|
|
"tag name eof #3",
|
|
"<a><b",
|
|
"<a>",
|
|
},
|
|
{
|
|
"tag name eof #4",
|
|
`<a x`,
|
|
``,
|
|
},
|
|
// Some malformed tags that are missing a '>'.
|
|
{
|
|
"malformed tag #0",
|
|
`<p</p>`,
|
|
`<p< p="">`,
|
|
},
|
|
{
|
|
"malformed tag #1",
|
|
`<p </p>`,
|
|
`<p <="" p="">`,
|
|
},
|
|
{
|
|
"malformed tag #2",
|
|
`<p id`,
|
|
``,
|
|
},
|
|
{
|
|
"malformed tag #3",
|
|
`<p id=`,
|
|
``,
|
|
},
|
|
{
|
|
"malformed tag #4",
|
|
`<p id=>`,
|
|
`<p id="">`,
|
|
},
|
|
{
|
|
"malformed tag #5",
|
|
`<p id=0`,
|
|
``,
|
|
},
|
|
{
|
|
"malformed tag #6",
|
|
`<p id=0</p>`,
|
|
`<p id="0</p">`,
|
|
},
|
|
{
|
|
"malformed tag #7",
|
|
`<p id="0</p>`,
|
|
``,
|
|
},
|
|
{
|
|
"malformed tag #8",
|
|
`<p id="0"</p>`,
|
|
`<p id="0" <="" p="">`,
|
|
},
|
|
{
|
|
"malformed tag #9",
|
|
`<p></p id`,
|
|
`<p>`,
|
|
},
|
|
// Raw text and RCDATA.
|
|
{
|
|
"basic raw text",
|
|
"<script><a></b></script>",
|
|
"<script>$<a></b>$</script>",
|
|
},
|
|
{
|
|
"unfinished script end tag",
|
|
"<SCRIPT>a</SCR",
|
|
"<script>$a</SCR",
|
|
},
|
|
{
|
|
"broken script end tag",
|
|
"<SCRIPT>a</SCR ipt>",
|
|
"<script>$a</SCR ipt>",
|
|
},
|
|
{
|
|
"EOF in script end tag",
|
|
"<SCRIPT>a</SCRipt",
|
|
"<script>$a</SCRipt",
|
|
},
|
|
{
|
|
"scriptx end tag",
|
|
"<SCRIPT>a</SCRiptx",
|
|
"<script>$a</SCRiptx",
|
|
},
|
|
{
|
|
"' ' completes script end tag",
|
|
"<SCRIPT>a</SCRipt ",
|
|
"<script>$a",
|
|
},
|
|
{
|
|
"'>' completes script end tag",
|
|
"<SCRIPT>a</SCRipt>",
|
|
"<script>$a$</script>",
|
|
},
|
|
{
|
|
"self-closing script end tag",
|
|
"<SCRIPT>a</SCRipt/>",
|
|
"<script>$a$</script>",
|
|
},
|
|
{
|
|
"nested script tag",
|
|
"<SCRIPT>a</SCRipt<script>",
|
|
"<script>$a</SCRipt<script>",
|
|
},
|
|
{
|
|
"script end tag after unfinished",
|
|
"<SCRIPT>a</SCRipt</script>",
|
|
"<script>$a</SCRipt$</script>",
|
|
},
|
|
{
|
|
"script/style mismatched tags",
|
|
"<script>a</style>",
|
|
"<script>$a</style>",
|
|
},
|
|
{
|
|
"style element with entity",
|
|
"<style>'",
|
|
"<style>$&apos;",
|
|
},
|
|
{
|
|
"textarea with tag",
|
|
"<textarea><div></textarea>",
|
|
"<textarea>$<div>$</textarea>",
|
|
},
|
|
{
|
|
"title with tag and entity",
|
|
"<title><b>K&R C</b></title>",
|
|
"<title>$<b>K&R C</b>$</title>",
|
|
},
|
|
// DOCTYPE tests.
|
|
{
|
|
"Proper DOCTYPE",
|
|
"<!DOCTYPE html>",
|
|
"<!DOCTYPE html>",
|
|
},
|
|
{
|
|
"DOCTYPE with no space",
|
|
"<!doctypehtml>",
|
|
"<!DOCTYPE html>",
|
|
},
|
|
{
|
|
"DOCTYPE with two spaces",
|
|
"<!doctype html>",
|
|
"<!DOCTYPE html>",
|
|
},
|
|
{
|
|
"looks like DOCTYPE but isn't",
|
|
"<!DOCUMENT html>",
|
|
"<!--DOCUMENT html-->",
|
|
},
|
|
{
|
|
"DOCTYPE at EOF",
|
|
"<!DOCtype",
|
|
"<!DOCTYPE >",
|
|
},
|
|
// XML processing instructions.
|
|
{
|
|
"XML processing instruction",
|
|
"<?xml?>",
|
|
"<!--?xml?-->",
|
|
},
|
|
// Comments.
|
|
{
|
|
"comment0",
|
|
"abc<b><!-- skipme --></b>def",
|
|
"abc$<b>$<!-- skipme -->$</b>$def",
|
|
},
|
|
{
|
|
"comment1",
|
|
"a<!-->z",
|
|
"a$<!---->$z",
|
|
},
|
|
{
|
|
"comment2",
|
|
"a<!--->z",
|
|
"a$<!---->$z",
|
|
},
|
|
{
|
|
"comment3",
|
|
"a<!--x>-->z",
|
|
"a$<!--x>-->$z",
|
|
},
|
|
{
|
|
"comment4",
|
|
"a<!--x->-->z",
|
|
"a$<!--x->-->$z",
|
|
},
|
|
{
|
|
"comment5",
|
|
"a<!>z",
|
|
"a$<!---->$z",
|
|
},
|
|
{
|
|
"comment6",
|
|
"a<!->z",
|
|
"a$<!----->$z",
|
|
},
|
|
{
|
|
"comment7",
|
|
"a<!---<>z",
|
|
"a$<!---<>z-->",
|
|
},
|
|
{
|
|
"comment8",
|
|
"a<!--z",
|
|
"a$<!--z-->",
|
|
},
|
|
{
|
|
"comment9",
|
|
"a<!--z-",
|
|
"a$<!--z-->",
|
|
},
|
|
{
|
|
"comment10",
|
|
"a<!--z--",
|
|
"a$<!--z-->",
|
|
},
|
|
{
|
|
"comment11",
|
|
"a<!--z---",
|
|
"a$<!--z--->",
|
|
},
|
|
{
|
|
"comment12",
|
|
"a<!--z----",
|
|
"a$<!--z---->",
|
|
},
|
|
{
|
|
"comment13",
|
|
"a<!--x--!>z",
|
|
"a$<!--x-->$z",
|
|
},
|
|
// An attribute with a backslash.
|
|
{
|
|
"backslash",
|
|
`<p id="a\"b">`,
|
|
`<p id="a\" b"="">`,
|
|
},
|
|
// Entities, tag name and attribute key lower-casing, and whitespace
|
|
// normalization within a tag.
|
|
{
|
|
"tricky",
|
|
"<p \t\n iD=\"a"B\" foo=\"bar\"><EM>te<&;xt</em></p>",
|
|
`<p id="a"B" foo="bar">$<em>$te<&;xt$</em>$</p>`,
|
|
},
|
|
// A nonexistent entity. Tokenizing and converting back to a string should
|
|
// escape the "&" to become "&".
|
|
{
|
|
"noSuchEntity",
|
|
`<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`,
|
|
`<a b="c&noSuchEntity;d">$<&alsoDoesntExist;&`,
|
|
},
|
|
{
|
|
"entity without semicolon",
|
|
`¬it;∉<a b="q=z&=5¬ice=hello¬=world">`,
|
|
`¬it;∉$<a b="q=z&amp=5&notice=hello¬=world">`,
|
|
},
|
|
{
|
|
"entity with digits",
|
|
"½",
|
|
"½",
|
|
},
|
|
// Attribute tests:
|
|
// http://dev.w3.org/html5/pf-summary/Overview.html#attributes
|
|
{
|
|
"Empty attribute",
|
|
`<input disabled FOO>`,
|
|
`<input disabled="" foo="">`,
|
|
},
|
|
{
|
|
"Empty attribute, whitespace",
|
|
`<input disabled FOO >`,
|
|
`<input disabled="" foo="">`,
|
|
},
|
|
{
|
|
"Unquoted attribute value",
|
|
`<input value=yes FOO=BAR>`,
|
|
`<input value="yes" foo="BAR">`,
|
|
},
|
|
{
|
|
"Unquoted attribute value, spaces",
|
|
`<input value = yes FOO = BAR>`,
|
|
`<input value="yes" foo="BAR">`,
|
|
},
|
|
{
|
|
"Unquoted attribute value, trailing space",
|
|
`<input value=yes FOO=BAR >`,
|
|
`<input value="yes" foo="BAR">`,
|
|
},
|
|
{
|
|
"Single-quoted attribute value",
|
|
`<input value='yes' FOO='BAR'>`,
|
|
`<input value="yes" foo="BAR">`,
|
|
},
|
|
{
|
|
"Single-quoted attribute value, trailing space",
|
|
`<input value='yes' FOO='BAR' >`,
|
|
`<input value="yes" foo="BAR">`,
|
|
},
|
|
{
|
|
"Double-quoted attribute value",
|
|
`<input value="I'm an attribute" FOO="BAR">`,
|
|
`<input value="I'm an attribute" foo="BAR">`,
|
|
},
|
|
{
|
|
"Attribute name characters",
|
|
`<meta http-equiv="content-type">`,
|
|
`<meta http-equiv="content-type">`,
|
|
},
|
|
{
|
|
"Mixed attributes",
|
|
`a<P V="0 1" w='2' X=3 y>z`,
|
|
`a$<p v="0 1" w="2" x="3" y="">$z`,
|
|
},
|
|
{
|
|
"Attributes with a solitary single quote",
|
|
`<p id=can't><p id=won't>`,
|
|
`<p id="can't">$<p id="won't">`,
|
|
},
|
|
}
|
|
|
|
func TestTokenizer(t *testing.T) {
|
|
loop:
|
|
for _, tt := range tokenTests {
|
|
z := NewTokenizer(strings.NewReader(tt.html))
|
|
if tt.golden != "" {
|
|
for i, s := range strings.Split(tt.golden, "$") {
|
|
if z.Next() == ErrorToken {
|
|
t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
|
|
continue loop
|
|
}
|
|
actual := z.Token().String()
|
|
if s != actual {
|
|
t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
|
|
continue loop
|
|
}
|
|
}
|
|
}
|
|
z.Next()
|
|
if z.Err() != io.EOF {
|
|
t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestMaxBuffer(t *testing.T) {
|
|
// Exceeding the maximum buffer size generates ErrBufferExceeded.
|
|
z := NewTokenizer(strings.NewReader("<" + strings.Repeat("t", 10)))
|
|
z.SetMaxBuf(5)
|
|
tt := z.Next()
|
|
if got, want := tt, ErrorToken; got != want {
|
|
t.Fatalf("token type: got: %v want: %v", got, want)
|
|
}
|
|
if got, want := z.Err(), ErrBufferExceeded; got != want {
|
|
t.Errorf("error type: got: %v want: %v", got, want)
|
|
}
|
|
if got, want := string(z.Raw()), "<tttt"; got != want {
|
|
t.Fatalf("buffered before overflow: got: %q want: %q", got, want)
|
|
}
|
|
}
|
|
|
|
func TestMaxBufferReconstruction(t *testing.T) {
|
|
// Exceeding the maximum buffer size at any point while tokenizing permits
|
|
// reconstructing the original input.
|
|
tests:
|
|
for _, test := range tokenTests {
|
|
for maxBuf := 1; ; maxBuf++ {
|
|
r := strings.NewReader(test.html)
|
|
z := NewTokenizer(r)
|
|
z.SetMaxBuf(maxBuf)
|
|
var tokenized bytes.Buffer
|
|
for {
|
|
tt := z.Next()
|
|
tokenized.Write(z.Raw())
|
|
if tt == ErrorToken {
|
|
if err := z.Err(); err != io.EOF && err != ErrBufferExceeded {
|
|
t.Errorf("%s: unexpected error: %v", test.desc, err)
|
|
}
|
|
break
|
|
}
|
|
}
|
|
// Anything tokenized along with untokenized input or data left in the reader.
|
|
assembled, err := ioutil.ReadAll(io.MultiReader(&tokenized, bytes.NewReader(z.Buffered()), r))
|
|
if err != nil {
|
|
t.Errorf("%s: ReadAll: %v", test.desc, err)
|
|
continue tests
|
|
}
|
|
if got, want := string(assembled), test.html; got != want {
|
|
t.Errorf("%s: reassembled html:\n got: %q\nwant: %q", test.desc, got, want)
|
|
continue tests
|
|
}
|
|
// EOF indicates that we completed tokenization and hence found the max
|
|
// maxBuf that generates ErrBufferExceeded, so continue to the next test.
|
|
if z.Err() == io.EOF {
|
|
break
|
|
}
|
|
} // buffer sizes
|
|
} // tests
|
|
}
|
|
|
|
func TestPassthrough(t *testing.T) {
|
|
// Accumulating the raw output for each parse event should reconstruct the
|
|
// original input.
|
|
for _, test := range tokenTests {
|
|
z := NewTokenizer(strings.NewReader(test.html))
|
|
var parsed bytes.Buffer
|
|
for {
|
|
tt := z.Next()
|
|
parsed.Write(z.Raw())
|
|
if tt == ErrorToken {
|
|
break
|
|
}
|
|
}
|
|
if got, want := parsed.String(), test.html; got != want {
|
|
t.Errorf("%s: parsed output:\n got: %q\nwant: %q", test.desc, got, want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestBufAPI(t *testing.T) {
|
|
s := "0<a>1</a>2<b>3<a>4<a>5</a>6</b>7</a>8<a/>9"
|
|
z := NewTokenizer(bytes.NewBufferString(s))
|
|
var result bytes.Buffer
|
|
depth := 0
|
|
loop:
|
|
for {
|
|
tt := z.Next()
|
|
switch tt {
|
|
case ErrorToken:
|
|
if z.Err() != io.EOF {
|
|
t.Error(z.Err())
|
|
}
|
|
break loop
|
|
case TextToken:
|
|
if depth > 0 {
|
|
result.Write(z.Text())
|
|
}
|
|
case StartTagToken, EndTagToken:
|
|
tn, _ := z.TagName()
|
|
if len(tn) == 1 && tn[0] == 'a' {
|
|
if tt == StartTagToken {
|
|
depth++
|
|
} else {
|
|
depth--
|
|
}
|
|
}
|
|
}
|
|
}
|
|
u := "14567"
|
|
v := string(result.Bytes())
|
|
if u != v {
|
|
t.Errorf("TestBufAPI: want %q got %q", u, v)
|
|
}
|
|
}
|
|
|
|
func TestConvertNewlines(t *testing.T) {
|
|
testCases := map[string]string{
|
|
"Mac\rDOS\r\nUnix\n": "Mac\nDOS\nUnix\n",
|
|
"Unix\nMac\rDOS\r\n": "Unix\nMac\nDOS\n",
|
|
"DOS\r\nDOS\r\nDOS\r\n": "DOS\nDOS\nDOS\n",
|
|
"": "",
|
|
"\n": "\n",
|
|
"\n\r": "\n\n",
|
|
"\r": "\n",
|
|
"\r\n": "\n",
|
|
"\r\n\n": "\n\n",
|
|
"\r\n\r": "\n\n",
|
|
"\r\n\r\n": "\n\n",
|
|
"\r\r": "\n\n",
|
|
"\r\r\n": "\n\n",
|
|
"\r\r\n\n": "\n\n\n",
|
|
"\r\r\r\n": "\n\n\n",
|
|
"\r \n": "\n \n",
|
|
"xyz": "xyz",
|
|
}
|
|
for in, want := range testCases {
|
|
if got := string(convertNewlines([]byte(in))); got != want {
|
|
t.Errorf("input %q: got %q, want %q", in, got, want)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestReaderEdgeCases(t *testing.T) {
|
|
const s = "<p>An io.Reader can return (0, nil) or (n, io.EOF).</p>"
|
|
testCases := []io.Reader{
|
|
&zeroOneByteReader{s: s},
|
|
&eofStringsReader{s: s},
|
|
&stuckReader{},
|
|
}
|
|
for i, tc := range testCases {
|
|
got := []TokenType{}
|
|
z := NewTokenizer(tc)
|
|
for {
|
|
tt := z.Next()
|
|
if tt == ErrorToken {
|
|
break
|
|
}
|
|
got = append(got, tt)
|
|
}
|
|
if err := z.Err(); err != nil && err != io.EOF {
|
|
if err != io.ErrNoProgress {
|
|
t.Errorf("i=%d: %v", i, err)
|
|
}
|
|
continue
|
|
}
|
|
want := []TokenType{
|
|
StartTagToken,
|
|
TextToken,
|
|
EndTagToken,
|
|
}
|
|
if !reflect.DeepEqual(got, want) {
|
|
t.Errorf("i=%d: got %v, want %v", i, got, want)
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
|
|
// zeroOneByteReader is like a strings.Reader that alternates between
|
|
// returning 0 bytes and 1 byte at a time.
|
|
type zeroOneByteReader struct {
|
|
s string
|
|
n int
|
|
}
|
|
|
|
func (r *zeroOneByteReader) Read(p []byte) (int, error) {
|
|
if len(p) == 0 {
|
|
return 0, nil
|
|
}
|
|
if len(r.s) == 0 {
|
|
return 0, io.EOF
|
|
}
|
|
r.n++
|
|
if r.n%2 != 0 {
|
|
return 0, nil
|
|
}
|
|
p[0], r.s = r.s[0], r.s[1:]
|
|
return 1, nil
|
|
}
|
|
|
|
// eofStringsReader is like a strings.Reader but can return an (n, err) where
|
|
// n > 0 && err != nil.
|
|
type eofStringsReader struct {
|
|
s string
|
|
}
|
|
|
|
func (r *eofStringsReader) Read(p []byte) (int, error) {
|
|
n := copy(p, r.s)
|
|
r.s = r.s[n:]
|
|
if r.s != "" {
|
|
return n, nil
|
|
}
|
|
return n, io.EOF
|
|
}
|
|
|
|
// stuckReader is an io.Reader that always returns no data and no error.
|
|
type stuckReader struct{}
|
|
|
|
func (*stuckReader) Read(p []byte) (int, error) {
|
|
return 0, nil
|
|
}
|
|
|
|
const (
|
|
rawLevel = iota
|
|
lowLevel
|
|
highLevel
|
|
)
|
|
|
|
func benchmarkTokenizer(b *testing.B, level int) {
|
|
buf, err := ioutil.ReadFile("testdata/go1.html")
|
|
if err != nil {
|
|
b.Fatalf("could not read testdata/go1.html: %v", err)
|
|
}
|
|
b.SetBytes(int64(len(buf)))
|
|
runtime.GC()
|
|
b.ReportAllocs()
|
|
b.ResetTimer()
|
|
for i := 0; i < b.N; i++ {
|
|
z := NewTokenizer(bytes.NewBuffer(buf))
|
|
for {
|
|
tt := z.Next()
|
|
if tt == ErrorToken {
|
|
if err := z.Err(); err != nil && err != io.EOF {
|
|
b.Fatalf("tokenizer error: %v", err)
|
|
}
|
|
break
|
|
}
|
|
switch level {
|
|
case rawLevel:
|
|
// Calling z.Raw just returns the raw bytes of the token. It does
|
|
// not unescape < to <, or lower-case tag names and attribute keys.
|
|
z.Raw()
|
|
case lowLevel:
|
|
// Caling z.Text, z.TagName and z.TagAttr returns []byte values
|
|
// whose contents may change on the next call to z.Next.
|
|
switch tt {
|
|
case TextToken, CommentToken, DoctypeToken:
|
|
z.Text()
|
|
case StartTagToken, SelfClosingTagToken:
|
|
_, more := z.TagName()
|
|
for more {
|
|
_, _, more = z.TagAttr()
|
|
}
|
|
case EndTagToken:
|
|
z.TagName()
|
|
}
|
|
case highLevel:
|
|
// Calling z.Token converts []byte values to strings whose validity
|
|
// extend beyond the next call to z.Next.
|
|
z.Token()
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func BenchmarkRawLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, rawLevel) }
|
|
func BenchmarkLowLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, lowLevel) }
|
|
func BenchmarkHighLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, highLevel) }
|