mirror of
https://github.com/golang/net.git
synced 2026-03-31 02:17:08 +09:00
html: have Render escape comments less often
Fixes golang/go#58246 Change-Id: I3effbd2afd7e363a42baa4db20691e57c9a08389 Reviewed-on: https://go-review.googlesource.com/c/net/+/469056 TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Nigel Tao <nigeltao@golang.org> Reviewed-by: Bryan Mills <bcmills@google.com> Reviewed-by: Kunpei Sakai <namusyaka@gmail.com> Reviewed-by: Damien Neil <dneil@google.com>
This commit is contained in:
@@ -6,12 +6,13 @@ package html
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestComments exhaustively tests every 'interesting' N-byte string is
|
||||
// correctly parsed as a comment. N ranges from 4+1 to 4+suffixLen inclusive,
|
||||
// where 4 is the length of the "<!--" prefix that starts an HTML comment.
|
||||
// correctly parsed as a comment. N ranges from 4+1 to 4+maxSuffixLen
|
||||
// inclusive. 4 is the length of the "<!--" prefix that starts an HTML comment.
|
||||
//
|
||||
// 'Interesting' means that the N-4 byte suffix consists entirely of bytes
|
||||
// sampled from the interestingCommentBytes const string, below. These cover
|
||||
@@ -27,8 +28,8 @@ import (
|
||||
// two algorithms match.
|
||||
func TestComments(t *testing.T) {
|
||||
const prefix = "<!--"
|
||||
const suffixLen = 6
|
||||
buffer := make([]byte, 0, len(prefix)+suffixLen)
|
||||
const maxSuffixLen = 6
|
||||
buffer := make([]byte, 0, len(prefix)+maxSuffixLen)
|
||||
testAllComments(t, append(buffer, prefix...))
|
||||
}
|
||||
|
||||
@@ -205,6 +206,26 @@ loop:
|
||||
if (gotComment != wantComment) || (gotRemainder != wantRemainder) {
|
||||
t.Errorf("input=%q\ngot: %q + %q\nwant: %q + %q",
|
||||
b, gotComment, gotRemainder, wantComment, wantRemainder)
|
||||
return
|
||||
}
|
||||
|
||||
// suffix is the "N-4 byte suffix" per the TestComments comment.
|
||||
suffix := string(b[4:])
|
||||
|
||||
// Test that a round trip, rendering (escaped) and re-parsing, of a comment
|
||||
// token (with that suffix as the Token.Data) preserves that string.
|
||||
tok := Token{
|
||||
Type: CommentToken,
|
||||
Data: suffix,
|
||||
}
|
||||
z2 := NewTokenizer(strings.NewReader(tok.String()))
|
||||
if next := z2.Next(); next != CommentToken {
|
||||
t.Fatalf("round-trip Next(%q): got %v, want %v", suffix, next, CommentToken)
|
||||
}
|
||||
gotComment2 := string(z2.Text())
|
||||
if gotComment2 != suffix {
|
||||
t.Errorf("round-trip\ngot: %q\nwant: %q", gotComment2, suffix)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -193,6 +193,87 @@ func lower(b []byte) []byte {
|
||||
return b
|
||||
}
|
||||
|
||||
// escapeComment is like func escape but escapes its input bytes less often.
|
||||
// Per https://github.com/golang/go/issues/58246 some HTML comments are (1)
|
||||
// meaningful and (2) contain angle brackets that we'd like to avoid escaping
|
||||
// unless we have to.
|
||||
//
|
||||
// "We have to" includes the '&' byte, since that introduces other escapes.
|
||||
//
|
||||
// It also includes those bytes (not including EOF) that would otherwise end
|
||||
// the comment. Per the summary table at the bottom of comment_test.go, this is
|
||||
// the '>' byte that, per above, we'd like to avoid escaping unless we have to.
|
||||
//
|
||||
// Studying the summary table (and T actions in its '>' column) closely, we
|
||||
// only need to escape in states 43, 44, 49, 51 and 52. State 43 is at the
|
||||
// start of the comment data. State 52 is after a '!'. The other three states
|
||||
// are after a '-'.
|
||||
//
|
||||
// Our algorithm is thus to escape every '&' and to escape '>' if and only if:
|
||||
// - The '>' is after a '!' or '-' (in the unescaped data) or
|
||||
// - The '>' is at the start of the comment data (after the opening "<!--").
|
||||
func escapeComment(w writer, s string) error {
|
||||
// When modifying this function, consider manually increasing the
|
||||
// maxSuffixLen constant in func TestComments, from 6 to e.g. 9 or more.
|
||||
// That increase should only be temporary, not committed, as it
|
||||
// exponentially affects the test running time.
|
||||
|
||||
if len(s) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Loop:
|
||||
// - Grow j such that s[i:j] does not need escaping.
|
||||
// - If s[j] does need escaping, output s[i:j] and an escaped s[j],
|
||||
// resetting i and j to point past that s[j] byte.
|
||||
i := 0
|
||||
for j := 0; j < len(s); j++ {
|
||||
escaped := ""
|
||||
switch s[j] {
|
||||
case '&':
|
||||
escaped = "&"
|
||||
|
||||
case '>':
|
||||
if j > 0 {
|
||||
if prev := s[j-1]; (prev != '!') && (prev != '-') {
|
||||
continue
|
||||
}
|
||||
}
|
||||
escaped = ">"
|
||||
|
||||
default:
|
||||
continue
|
||||
}
|
||||
|
||||
if i < j {
|
||||
if _, err := w.WriteString(s[i:j]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if _, err := w.WriteString(escaped); err != nil {
|
||||
return err
|
||||
}
|
||||
i = j + 1
|
||||
}
|
||||
|
||||
if i < len(s) {
|
||||
if _, err := w.WriteString(s[i:]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// escapeCommentString is to EscapeString as escapeComment is to escape.
|
||||
func escapeCommentString(s string) string {
|
||||
if strings.IndexAny(s, "&>") == -1 {
|
||||
return s
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
escapeComment(&buf, s)
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
const escapedChars = "&'<>\"\r"
|
||||
|
||||
func escape(w writer, s string) error {
|
||||
|
||||
@@ -85,7 +85,7 @@ func render1(w writer, n *Node) error {
|
||||
if _, err := w.WriteString("<!--"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := escape(w, n.Data); err != nil {
|
||||
if err := escapeComment(w, n.Data); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := w.WriteString("-->"); err != nil {
|
||||
|
||||
@@ -110,7 +110,7 @@ func (t Token) String() string {
|
||||
case SelfClosingTagToken:
|
||||
return "<" + t.tagString() + "/>"
|
||||
case CommentToken:
|
||||
return "<!--" + EscapeString(t.Data) + "-->"
|
||||
return "<!--" + escapeCommentString(t.Data) + "-->"
|
||||
case DoctypeToken:
|
||||
return "<!DOCTYPE " + EscapeString(t.Data) + ">"
|
||||
}
|
||||
@@ -598,10 +598,10 @@ scriptDataDoubleEscapeEnd:
|
||||
// readComment reads the next comment token starting with "<!--". The opening
|
||||
// "<!--" has already been consumed.
|
||||
func (z *Tokenizer) readComment() {
|
||||
// When modifying this function, consider manually increasing the suffixLen
|
||||
// constant in func TestComments, from 6 to e.g. 9 or more. That increase
|
||||
// should only be temporary, not committed, as it exponentially affects the
|
||||
// test running time.
|
||||
// When modifying this function, consider manually increasing the
|
||||
// maxSuffixLen constant in func TestComments, from 6 to e.g. 9 or more.
|
||||
// That increase should only be temporary, not committed, as it
|
||||
// exponentially affects the test running time.
|
||||
|
||||
z.data.start = z.raw.end
|
||||
defer func() {
|
||||
|
||||
@@ -23,14 +23,6 @@ const issue58246 = `<!--[if gte mso 12]>
|
||||
</o:OfficeDocumentSettings>
|
||||
</xml>
|
||||
<![endif]-->`
|
||||
const issue58246Rendered = `<!--[if gte mso 12]>
|
||||
<xml>
|
||||
<o:OfficeDocumentSettings>
|
||||
<o:AllowPNG/>
|
||||
<o:PixelsPerInch>96</o:PixelsPerInch>
|
||||
</o:OfficeDocumentSettings>
|
||||
</xml>
|
||||
<![endif]-->`
|
||||
|
||||
type tokenTest struct {
|
||||
// A short description of the test case.
|
||||
@@ -332,7 +324,7 @@ var tokenTests = []tokenTest{
|
||||
{
|
||||
"comment3",
|
||||
"a<!--x>-->z",
|
||||
"a$<!--x>-->$z",
|
||||
"a$<!--x>-->$z",
|
||||
},
|
||||
{
|
||||
"comment4",
|
||||
@@ -352,7 +344,7 @@ var tokenTests = []tokenTest{
|
||||
{
|
||||
"comment7",
|
||||
"a<!---<>z",
|
||||
"a$<!---<>z-->",
|
||||
"a$<!---<>z-->",
|
||||
},
|
||||
{
|
||||
"comment8",
|
||||
@@ -407,12 +399,12 @@ var tokenTests = []tokenTest{
|
||||
{
|
||||
"comment18",
|
||||
"a<!--<!-->z",
|
||||
"a$<!--<!-->$z",
|
||||
"a$<!--<!-->$z",
|
||||
},
|
||||
{
|
||||
"comment19",
|
||||
"a<!--<!--",
|
||||
"a$<!--<!-->",
|
||||
"a$<!--<!-->",
|
||||
},
|
||||
{
|
||||
"comment20",
|
||||
@@ -427,7 +419,7 @@ var tokenTests = []tokenTest{
|
||||
{
|
||||
"comment22",
|
||||
"a<!--!--!<--!-->z",
|
||||
"a$<!--!--!<--!-->$z",
|
||||
"a$<!--!--!<--!-->$z",
|
||||
},
|
||||
{
|
||||
"comment23",
|
||||
@@ -437,27 +429,27 @@ var tokenTests = []tokenTest{
|
||||
{
|
||||
"comment24",
|
||||
"a<!-->>x",
|
||||
"a$<!-->>x-->",
|
||||
"a$<!-->>x-->",
|
||||
},
|
||||
{
|
||||
"comment25",
|
||||
"a<!-->>",
|
||||
"a$<!-->>-->",
|
||||
"a$<!-->>-->",
|
||||
},
|
||||
{
|
||||
"comment26",
|
||||
"a<!-->>-",
|
||||
"a$<!-->>-->",
|
||||
"a$<!-->>-->",
|
||||
},
|
||||
{
|
||||
"comment27",
|
||||
"a<!-->>-->z",
|
||||
"a$<!-->>-->$z",
|
||||
"a$<!-->>-->$z",
|
||||
},
|
||||
{
|
||||
"comment28",
|
||||
"a<!--&>-->z",
|
||||
"a$<!--&>-->$z",
|
||||
"a$<!--&>-->$z",
|
||||
},
|
||||
{
|
||||
"comment29",
|
||||
@@ -469,10 +461,20 @@ var tokenTests = []tokenTest{
|
||||
"a<!--&nosuchentity;-->z",
|
||||
"a$<!--&nosuchentity;-->$z",
|
||||
},
|
||||
{
|
||||
"comment31",
|
||||
"a<!--i>>j-->z",
|
||||
"a$<!--i>>j-->$z",
|
||||
},
|
||||
{
|
||||
"comment32",
|
||||
"a<!--i!>>j-->z",
|
||||
"a$<!--i!>>j-->$z",
|
||||
},
|
||||
// https://stackoverflow.design/email/base/mso/#targeting-specific-outlook-versions
|
||||
// says "[For] Windows Outlook 2003 and above... conditional comments allow
|
||||
// us to add bits of HTML that are only read by the Word-based versions of
|
||||
// Outlook". TODO: these comments (with angle brackets) should pass through
|
||||
// Outlook". These comments (with angle brackets) should pass through
|
||||
// unchanged (by this Go package) when rendering.
|
||||
//
|
||||
// We should also still escape ">" as ">" when necessary.
|
||||
@@ -484,22 +486,22 @@ var tokenTests = []tokenTest{
|
||||
{
|
||||
"issue48237CommentWithAmpgtsemi1",
|
||||
"a<!--<p></p><!--[video]-->-->z",
|
||||
"a$<!--<p></p><!--[video]-->-->$z",
|
||||
"a$<!--<p></p><!--[video]-->-->$z",
|
||||
},
|
||||
{
|
||||
"issue48237CommentWithAmpgtsemi2",
|
||||
"a<!--<p></p><!--[video]--!>-->z",
|
||||
"a$<!--<p></p><!--[video]--!>-->$z",
|
||||
"a$<!--<p></p><!--[video]--!>-->$z",
|
||||
},
|
||||
{
|
||||
"issue58246MicrosoftOutlookComment1",
|
||||
"a<!--[if mso]> your code <![endif]-->z",
|
||||
"a$<!--[if mso]> your code <![endif]-->$z",
|
||||
"a$<!--[if mso]> your code <![endif]-->$z",
|
||||
},
|
||||
{
|
||||
"issue58246MicrosoftOutlookComment2",
|
||||
"a" + issue58246 + "z",
|
||||
"a$" + issue58246Rendered + "$z",
|
||||
"a$" + issue58246 + "$z",
|
||||
},
|
||||
// An attribute with a backslash.
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user