html: fix SOLIDUS '/' handling in attribute parsing

Calling the Tokenizer with HTML element containing SOLIDUS (/) in the attribute name results in incorrect tokenization.

This is due to violation of the following rule transitions in the WHATWG spec:
- https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state (we are not reconsuming the character if '/' is encountered)
- https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-name-state (we are not switching to self closing state)

Fixes golang/go#63402

Signed-off-by: Maciej Mionskowski <maciej@mionskowski.pl>
This commit is contained in:
Maciej Mionskowski
2023-10-07 20:16:52 +02:00
parent 88194ad8ab
commit 3546db808c
2 changed files with 23 additions and 4 deletions

View File

@@ -910,9 +910,6 @@ func (z *Tokenizer) readTagAttrKey() {
return
}
switch c {
case ' ', '\n', '\r', '\t', '\f', '/':
z.pendingAttr[0].end = z.raw.end - 1
return
case '=':
if z.pendingAttr[0].start+1 == z.raw.end {
// WHATWG 13.2.5.32, if we see an equals sign before the attribute name
@@ -920,7 +917,9 @@ func (z *Tokenizer) readTagAttrKey() {
continue
}
fallthrough
case '>':
case ' ', '\n', '\r', '\t', '\f', '/', '>':
// WHATWG 13.2.5.33 Attribute name state
// We need to reconsume the char in the after attribute name state to support the / character
z.raw.end--
z.pendingAttr[0].end = z.raw.end
return
@@ -939,6 +938,11 @@ func (z *Tokenizer) readTagAttrVal() {
if z.err != nil {
return
}
if c == '/' {
// WHATWG 13.2.5.34 After attribute name state
// U+002F SOLIDUS (/) - Switch to the self-closing start tag state.
return
}
if c != '=' {
z.raw.end--
return

View File

@@ -601,6 +601,21 @@ var tokenTests = []tokenTest{
`<p =asd>`,
`<p =asd="">`,
},
{
"forward slash before attribute name",
`<p/=">`,
`<p ="="">`,
},
{
"forward slash before attribute name with spaces around",
`<p / =">`,
`<p ="="">`,
},
{
"forward slash after attribute name followed by a character",
`<p a/ ="">`,
`<p a="" =""="">`,
},
}
func TestTokenizer(t *testing.T) {