html: only render content literally in the HTML namespace

Per the WHATWG HTML specification, section 13.3, only append the literal
content of a text node if we are in the HTML namespace.

Thanks to Mohammad Thoriq Aziz for reporting this issue.

Fixes golang/go#61615
Fixes CVE-2023-3978

Change-Id: I332152904d4e7646bd2441602bcbe591fc655fa4
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1942896
Reviewed-by: Tatiana Bradley <tatianabradley@google.com>
Run-TryBot: Roland Shoemaker <bracewell@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
TryBot-Result: Security TryBots <security-trybots@go-security-trybots.iam.gserviceaccount.com>
Reviewed-on: https://go-review.googlesource.com/c/net/+/514896
Reviewed-by: Roland Shoemaker <roland@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Damien Neil <dneil@google.com>
This commit is contained in:
Roland Shoemaker
2023-07-06 10:25:47 -07:00
committed by Damien Neil
parent 63fe334ad5
commit 8ffa475fbd
2 changed files with 70 additions and 14 deletions

View File

@@ -194,9 +194,8 @@ func render1(w writer, n *Node) error {
}
}
// Render any child nodes.
switch n.Data {
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
// Render any child nodes
if childTextNodesAreLiteral(n) {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == TextNode {
if _, err := w.WriteString(c.Data); err != nil {
@@ -213,7 +212,7 @@ func render1(w writer, n *Node) error {
// last element in the file, with no closing tag.
return plaintextAbort
}
default:
} else {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := render1(w, c); err != nil {
return err
@@ -231,6 +230,27 @@ func render1(w writer, n *Node) error {
return w.WriteByte('>')
}
func childTextNodesAreLiteral(n *Node) bool {
// Per WHATWG HTML 13.3, if the parent of the current node is a style,
// script, xmp, iframe, noembed, noframes, or plaintext element, and the
// current node is a text node, append the value of the node's data
// literally. The specification is not explicit about it, but we only
// enforce this if we are in the HTML namespace (i.e. when the namespace is
// "").
// NOTE: we also always include noscript elements, although the
// specification states that they should only be rendered as such if
// scripting is enabled for the node (which is not something we track).
if n.Namespace != "" {
return false
}
switch n.Data {
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
return true
default:
return false
}
}
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
// quotes, but if s contains a double quote, it will use single quotes.
// It is used for writing the identifiers in a doctype declaration.

View File

@@ -6,6 +6,8 @@ package html
import (
"bytes"
"fmt"
"strings"
"testing"
)
@@ -108,16 +110,16 @@ func TestRenderer(t *testing.T) {
// just commentary. The "0:" prefixes are for easy cross-reference with
// the nodes array.
treeAsText := [...]string{
0: `<html>`,
1: `. <head>`,
2: `. <body>`,
3: `. . "0&lt;1"`,
4: `. . <p id="A" foo="abc&#34;def">`,
5: `. . . "2"`,
6: `. . . <b empty="">`,
7: `. . . . "3"`,
8: `. . . <i backslash="\">`,
9: `. . . . "&amp;4"`,
0: `<html>`,
1: `. <head>`,
2: `. <body>`,
3: `. . "0&lt;1"`,
4: `. . <p id="A" foo="abc&#34;def">`,
5: `. . . "2"`,
6: `. . . <b empty="">`,
7: `. . . . "3"`,
8: `. . . <i backslash="\">`,
9: `. . . . "&amp;4"`,
10: `. . "5"`,
11: `. . <blockquote>`,
12: `. . <br>`,
@@ -169,3 +171,37 @@ func TestRenderer(t *testing.T) {
t.Errorf("got vs want:\n%s\n%s\n", got, want)
}
}
func TestRenderTextNodes(t *testing.T) {
elements := []string{"style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"}
for _, namespace := range []string{
"", // html
"svg",
"math",
} {
for _, e := range elements {
var namespaceOpen, namespaceClose string
if namespace != "" {
namespaceOpen, namespaceClose = fmt.Sprintf("<%s>", namespace), fmt.Sprintf("</%s>", namespace)
}
doc := fmt.Sprintf(`<html><head></head><body>%s<%s>&</%s>%s</body></html>`, namespaceOpen, e, e, namespaceClose)
n, err := Parse(strings.NewReader(doc))
if err != nil {
t.Fatal(err)
}
b := bytes.NewBuffer(nil)
if err := Render(b, n); err != nil {
t.Fatal(err)
}
expected := doc
if namespace != "" {
expected = strings.Replace(expected, "&", "&amp;", 1)
}
if b.String() != expected {
t.Errorf("unexpected output: got %q, want %q", b.String(), expected)
}
}
}
}