html: add "in head noscript" im support

In the spec 12.2.6.4.5, the "in head noscript" insertion mode is defined.
However, this package and its parser doesn't have the insertion mode,
because the scripting=false case is not considered currently.

This commit adds a test and a support for the "in head noscript"
insertion mode. This change has no effect on the actual behavior.

Updates golang/go#16318

Change-Id: I9314c3342bea27fa2acf2fa7d980a127ee0fbf91
Reviewed-on: https://go-review.googlesource.com/c/net/+/172557
Reviewed-by: Nigel Tao <nigeltao@golang.org>
This commit is contained in:
Kunpei Sakai
2019-04-17 17:26:04 +09:00
committed by Nigel Tao
parent afa5a82059
commit 574d568418
2 changed files with 90 additions and 4 deletions

View File

@@ -630,7 +630,16 @@ func inHeadIM(p *parser) bool {
p.oe.pop()
p.acknowledgeSelfClosingTag()
return true
case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
case a.Noscript:
p.addElement()
if p.scripting {
p.setOriginalIM()
p.im = textIM
} else {
p.im = inHeadNoscriptIM
}
return true
case a.Script, a.Title, a.Noframes, a.Style:
p.addElement()
p.setOriginalIM()
p.im = textIM
@@ -692,6 +701,49 @@ func inHeadIM(p *parser) bool {
return false
}
// 12.2.6.4.5.
func inHeadNoscriptIM(p *parser) bool {
switch p.tok.Type {
case DoctypeToken:
// Ignore the token.
return true
case StartTagToken:
switch p.tok.DataAtom {
case a.Html:
return inBodyIM(p)
case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
return inHeadIM(p)
case a.Head, a.Noscript:
// Ignore the token.
return true
}
case EndTagToken:
switch p.tok.DataAtom {
case a.Noscript, a.Br:
default:
// Ignore the token.
return true
}
case TextToken:
s := strings.TrimLeft(p.tok.Data, whitespace)
if len(s) == 0 {
// It was all whitespace.
return inHeadIM(p)
}
case CommentToken:
return inHeadIM(p)
}
p.oe.pop()
if p.top().DataAtom != a.Head {
panic("html: the new current node will be a head element.")
}
p.im = inHeadIM
if p.tok.DataAtom == a.Noscript {
return true
}
return false
}
// Section 12.2.6.4.6.
func afterHeadIM(p *parser) bool {
switch p.tok.Type {

View File

@@ -228,7 +228,7 @@ func TestParser(t *testing.T) {
t.Fatal(err)
}
err = testParseCase(text, want, context)
err = testParseCase(text, want, context, Parse)
if err != nil {
t.Errorf("%s test #%d %q, %s", tf, i, text, err)
@@ -238,11 +238,45 @@ func TestParser(t *testing.T) {
}
}
// Issue 16318
func TestParserWithoutScripting(t *testing.T) {
text := `<noscript><img src='https://golang.org/doc/gopher/frontpage.png' /></noscript><p><img src='https://golang.org/doc/gopher/doc.png' /></p>`
want := `| <html>
| <head>
| <noscript>
| <body>
| "<img src='https://golang.org/doc/gopher/frontpage.png' />"
| <p>
| <img>
| src="https://golang.org/doc/gopher/doc.png"
`
err := testParseCase(text, want, "", func(r io.Reader) (*Node, error) {
p := &parser{
tokenizer: NewTokenizer(r),
doc: &Node{
Type: DocumentNode,
},
scripting: false,
framesetOK: true,
im: initialIM,
}
err := p.parse()
if err != nil {
return nil, err
}
return p.doc, nil
})
if err != nil {
t.Errorf("test with scripting is disabled, %q, %s", text, err)
}
}
// testParseCase tests one test case from the test files. If the test does not
// pass, it returns an error that explains the failure.
// text is the HTML to be parsed, want is a dump of the correct parse tree,
// and context is the name of the context node, if any.
func testParseCase(text, want, context string) (err error) {
func testParseCase(text, want, context string, parseFunc func(r io.Reader) (*Node, error)) (err error) {
defer func() {
if x := recover(); x != nil {
switch e := x.(type) {
@@ -256,7 +290,7 @@ func testParseCase(text, want, context string) (err error) {
var doc *Node
if context == "" {
doc, err = Parse(strings.NewReader(text))
doc, err = parseFunc(strings.NewReader(text))
if err != nil {
return err
}