html: only render content literally in the HTML namespace

Per the WHATWG HTML specification, section 13.3, only append the literal
content of a text node if we are in the HTML namespace.

Thanks to Mohammad Thoriq Aziz for reporting this issue.

Fixes golang/go#61615
Fixes CVE-2023-3978

Change-Id: I332152904d4e7646bd2441602bcbe591fc655fa4
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1942896
Reviewed-by: Tatiana Bradley <tatianabradley@google.com>
Run-TryBot: Roland Shoemaker <bracewell@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
TryBot-Result: Security TryBots <security-trybots@go-security-trybots.iam.gserviceaccount.com>
Reviewed-on: https://go-review.googlesource.com/c/net/+/514896
Reviewed-by: Roland Shoemaker <roland@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Damien Neil <dneil@google.com>
This commit is contained in:
Roland Shoemaker
2023-07-06 10:25:47 -07:00
committed by Damien Neil
parent 63fe334ad5
commit 8ffa475fbd
2 changed files with 70 additions and 14 deletions

View File

@@ -6,6 +6,8 @@ package html
import (
"bytes"
"fmt"
"strings"
"testing"
)
@@ -108,16 +110,16 @@ func TestRenderer(t *testing.T) {
// just commentary. The "0:" prefixes are for easy cross-reference with
// the nodes array.
treeAsText := [...]string{
0: `<html>`,
1: `. <head>`,
2: `. <body>`,
3: `. . "0&lt;1"`,
4: `. . <p id="A" foo="abc&#34;def">`,
5: `. . . "2"`,
6: `. . . <b empty="">`,
7: `. . . . "3"`,
8: `. . . <i backslash="\">`,
9: `. . . . "&amp;4"`,
0: `<html>`,
1: `. <head>`,
2: `. <body>`,
3: `. . "0&lt;1"`,
4: `. . <p id="A" foo="abc&#34;def">`,
5: `. . . "2"`,
6: `. . . <b empty="">`,
7: `. . . . "3"`,
8: `. . . <i backslash="\">`,
9: `. . . . "&amp;4"`,
10: `. . "5"`,
11: `. . <blockquote>`,
12: `. . <br>`,
@@ -169,3 +171,37 @@ func TestRenderer(t *testing.T) {
t.Errorf("got vs want:\n%s\n%s\n", got, want)
}
}
func TestRenderTextNodes(t *testing.T) {
elements := []string{"style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"}
for _, namespace := range []string{
"", // html
"svg",
"math",
} {
for _, e := range elements {
var namespaceOpen, namespaceClose string
if namespace != "" {
namespaceOpen, namespaceClose = fmt.Sprintf("<%s>", namespace), fmt.Sprintf("</%s>", namespace)
}
doc := fmt.Sprintf(`<html><head></head><body>%s<%s>&</%s>%s</body></html>`, namespaceOpen, e, e, namespaceClose)
n, err := Parse(strings.NewReader(doc))
if err != nil {
t.Fatal(err)
}
b := bytes.NewBuffer(nil)
if err := Render(b, n); err != nil {
t.Fatal(err)
}
expected := doc
if namespace != "" {
expected = strings.Replace(expected, "&", "&amp;", 1)
}
if b.String() != expected {
t.Errorf("unexpected output: got %q, want %q", b.String(), expected)
}
}
}
}