html: add "in head noscript" im support

In the spec 12.2.6.4.5, the "in head noscript" insertion mode is defined. However, this package and its parser doesn't have the insertion mode, because the scripting=false case is not considered currently. This commit adds a test and a support for the "in head noscript" insertion mode. This change has no effect on the actual behavior. Updates golang/go#16318 Change-Id: I9314c3342bea27fa2acf2fa7d980a127ee0fbf91 Reviewed-on: https://go-review.googlesource.com/c/net/+/172557 Reviewed-by: Nigel Tao <nigeltao@golang.org>
2026-03-31 10:27:08 +09:00 · 2019-04-17 17:26:04 +09:00
parent afa5a82059
commit 574d568418
2 changed files with 90 additions and 4 deletions
--- a/html/parse.go
+++ b/html/parse.go
@@ -630,7 +630,16 @@ func inHeadIM(p *parser) bool {
 			p.oe.pop()
 			p.acknowledgeSelfClosingTag()
 			return true
-		case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
+		case a.Noscript:
+			p.addElement()
+			if p.scripting {
+				p.setOriginalIM()
+				p.im = textIM
+			} else {
+				p.im = inHeadNoscriptIM
+			}
+			return true
+		case a.Script, a.Title, a.Noframes, a.Style:
 			p.addElement()
 			p.setOriginalIM()
 			p.im = textIM
@@ -692,6 +701,49 @@ func inHeadIM(p *parser) bool {
 	return false
 }

+// 12.2.6.4.5.
+func inHeadNoscriptIM(p *parser) bool {
+	switch p.tok.Type {
+	case DoctypeToken:
+		// Ignore the token.
+		return true
+	case StartTagToken:
+		switch p.tok.DataAtom {
+		case a.Html:
+			return inBodyIM(p)
+		case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
+			return inHeadIM(p)
+		case a.Head, a.Noscript:
+			// Ignore the token.
+			return true
+		}
+	case EndTagToken:
+		switch p.tok.DataAtom {
+		case a.Noscript, a.Br:
+		default:
+			// Ignore the token.
+			return true
+		}
+	case TextToken:
+		s := strings.TrimLeft(p.tok.Data, whitespace)
+		if len(s) == 0 {
+			// It was all whitespace.
+			return inHeadIM(p)
+		}
+	case CommentToken:
+		return inHeadIM(p)
+	}
+	p.oe.pop()
+	if p.top().DataAtom != a.Head {
+		panic("html: the new current node will be a head element.")
+	}
+	p.im = inHeadIM
+	if p.tok.DataAtom == a.Noscript {
+		return true
+	}
+	return false
+}
+
 // Section 12.2.6.4.6.
 func afterHeadIM(p *parser) bool {
 	switch p.tok.Type {
--- a/html/parse_test.go
+++ b/html/parse_test.go
@@ -228,7 +228,7 @@ func TestParser(t *testing.T) {
 					t.Fatal(err)
 				}

-				err = testParseCase(text, want, context)
+				err = testParseCase(text, want, context, Parse)

 				if err != nil {
 					t.Errorf("%s test #%d %q, %s", tf, i, text, err)
@@ -238,11 +238,45 @@ func TestParser(t *testing.T) {
 	}
 }

+// Issue 16318
+func TestParserWithoutScripting(t *testing.T) {
+	text := `<noscript><img src='https://golang.org/doc/gopher/frontpage.png' /></noscript><p><img src='https://golang.org/doc/gopher/doc.png' /></p>`
+	want := `| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     "<img src='https://golang.org/doc/gopher/frontpage.png' />"
+|     <p>
+|       <img>
+|         src="https://golang.org/doc/gopher/doc.png"
+`
+	err := testParseCase(text, want, "", func(r io.Reader) (*Node, error) {
+		p := &parser{
+			tokenizer: NewTokenizer(r),
+			doc: &Node{
+				Type: DocumentNode,
+			},
+			scripting:  false,
+			framesetOK: true,
+			im:         initialIM,
+		}
+		err := p.parse()
+		if err != nil {
+			return nil, err
+		}
+		return p.doc, nil
+	})
+
+	if err != nil {
+		t.Errorf("test with scripting is disabled, %q, %s", text, err)
+	}
+}
+
 // testParseCase tests one test case from the test files. If the test does not
 // pass, it returns an error that explains the failure.
 // text is the HTML to be parsed, want is a dump of the correct parse tree,
 // and context is the name of the context node, if any.
-func testParseCase(text, want, context string) (err error) {
+func testParseCase(text, want, context string, parseFunc func(r io.Reader) (*Node, error)) (err error) {
 	defer func() {
 		if x := recover(); x != nil {
 			switch e := x.(type) {
@@ -256,7 +290,7 @@ func testParseCase(text, want, context string) (err error) {

 	var doc *Node
 	if context == "" {
-		doc, err = Parse(strings.NewReader(text))
+		doc, err = parseFunc(strings.NewReader(text))
 		if err != nil {
 			return err
 		}