From 574d568418ea6c9074eb41cf63c468dbca6a51a3 Mon Sep 17 00:00:00 2001 From: Kunpei Sakai Date: Wed, 17 Apr 2019 17:26:04 +0900 Subject: [PATCH] html: add "in head noscript" im support In the spec 12.2.6.4.5, the "in head noscript" insertion mode is defined. However, this package and its parser doesn't have the insertion mode, because the scripting=false case is not considered currently. This commit adds a test and a support for the "in head noscript" insertion mode. This change has no effect on the actual behavior. Updates golang/go#16318 Change-Id: I9314c3342bea27fa2acf2fa7d980a127ee0fbf91 Reviewed-on: https://go-review.googlesource.com/c/net/+/172557 Reviewed-by: Nigel Tao --- html/parse.go | 54 +++++++++++++++++++++++++++++++++++++++++++++- html/parse_test.go | 40 +++++++++++++++++++++++++++++++--- 2 files changed, 90 insertions(+), 4 deletions(-) diff --git a/html/parse.go b/html/parse.go index 1d3c198a..2aaf2ff0 100644 --- a/html/parse.go +++ b/html/parse.go @@ -630,7 +630,16 @@ func inHeadIM(p *parser) bool { p.oe.pop() p.acknowledgeSelfClosingTag() return true - case a.Script, a.Title, a.Noscript, a.Noframes, a.Style: + case a.Noscript: + p.addElement() + if p.scripting { + p.setOriginalIM() + p.im = textIM + } else { + p.im = inHeadNoscriptIM + } + return true + case a.Script, a.Title, a.Noframes, a.Style: p.addElement() p.setOriginalIM() p.im = textIM @@ -692,6 +701,49 @@ func inHeadIM(p *parser) bool { return false } +// 12.2.6.4.5. +func inHeadNoscriptIM(p *parser) bool { + switch p.tok.Type { + case DoctypeToken: + // Ignore the token. + return true + case StartTagToken: + switch p.tok.DataAtom { + case a.Html: + return inBodyIM(p) + case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style: + return inHeadIM(p) + case a.Head, a.Noscript: + // Ignore the token. + return true + } + case EndTagToken: + switch p.tok.DataAtom { + case a.Noscript, a.Br: + default: + // Ignore the token. + return true + } + case TextToken: + s := strings.TrimLeft(p.tok.Data, whitespace) + if len(s) == 0 { + // It was all whitespace. + return inHeadIM(p) + } + case CommentToken: + return inHeadIM(p) + } + p.oe.pop() + if p.top().DataAtom != a.Head { + panic("html: the new current node will be a head element.") + } + p.im = inHeadIM + if p.tok.DataAtom == a.Noscript { + return true + } + return false +} + // Section 12.2.6.4.6. func afterHeadIM(p *parser) bool { switch p.tok.Type { diff --git a/html/parse_test.go b/html/parse_test.go index 9bba918c..b49181c1 100644 --- a/html/parse_test.go +++ b/html/parse_test.go @@ -228,7 +228,7 @@ func TestParser(t *testing.T) { t.Fatal(err) } - err = testParseCase(text, want, context) + err = testParseCase(text, want, context, Parse) if err != nil { t.Errorf("%s test #%d %q, %s", tf, i, text, err) @@ -238,11 +238,45 @@ func TestParser(t *testing.T) { } } +// Issue 16318 +func TestParserWithoutScripting(t *testing.T) { + text := `

` + want := `| +| +|