diff --git a/html/token.go b/html/token.go
index 3c57880d..6598c1f7 100644
--- a/html/token.go
+++ b/html/token.go
@@ -839,8 +839,22 @@ func (z *Tokenizer) readStartTag() TokenType {
if raw {
z.rawTag = strings.ToLower(string(z.buf[z.data.start:z.data.end]))
}
- // Look for a self-closing token like "
".
- if z.err == nil && z.buf[z.raw.end-2] == '/' {
+ // Look for a self-closing token (e.g.
).
+ //
+ // Originally, we did this by just checking that the last character of the
+ // tag (ignoring the closing bracket) was a solidus (/) character, but this
+ // is not always accurate.
+ //
+ // We need to be careful that we don't misinterpret a non-self-closing tag
+ // as self-closing, as can happen if the tag contains unquoted attribute
+ // values (i.e.
). + // + // To avoid this, we check that the last non-bracket character of the tag + // (z.raw.end-2) isn't the same character as the last non-quote character of + // the last attribute of the tag (z.pendingAttr[1].end-1), if the tag has + // attributes. + nAttrs := len(z.attr) + if z.err == nil && z.buf[z.raw.end-2] == '/' && (nAttrs == 0 || z.raw.end-2 != z.attr[nAttrs-1][1].end-1) { return SelfClosingTagToken } return StartTagToken diff --git a/html/token_test.go b/html/token_test.go index a36d112d..44773f17 100644 --- a/html/token_test.go +++ b/html/token_test.go @@ -616,6 +616,16 @@ var tokenTests = []tokenTest{ `
`, `
`, }, + { + "slash at end of unquoted attribute value", + `
`, + `
`, + }, + { + "self-closing tag with attribute", + `
`, + `
`, + }, } func TestTokenizer(t *testing.T) { @@ -815,6 +825,14 @@ func TestReaderEdgeCases(t *testing.T) { } } +func TestSelfClosingTagValueConfusion(t *testing.T) { + z := NewTokenizer(strings.NewReader(`
`)) + tok := z.Next() + if tok != StartTagToken { + t.Fatalf("unexpected token type: got %s, want %s", tok, StartTagToken) + } +} + // zeroOneByteReader is like a strings.Reader that alternates between // returning 0 bytes and 1 byte at a time. type zeroOneByteReader struct {