From 6ec8895aa5f6594da7356da7d341b98133629009 Mon Sep 17 00:00:00 2001 From: Roland Shoemaker Date: Mon, 29 Sep 2025 19:38:24 -0700 Subject: [PATCH] html: align in row insertion mode with spec Update inRowIM to match the HTML specification. This fixes an issue where a specific HTML document could cause the parser to enter an infinite loop when trying to parse a and implied next to each other. Fixes CVE-2025-58190 Fixes golang/go#70179 Change-Id: Idcb133c87c7d475cc8c7eb1f1550ea21d8bdddea Reviewed-on: https://go-review.googlesource.com/c/net/+/709875 LUCI-TryBot-Result: Go LUCI Reviewed-by: Damien Neil --- html/parse.go | 36 ++++++++++++++++++++++------------ html/parse_test.go | 49 ++++++++++++++++++++++++++++------------------ 2 files changed, 54 insertions(+), 31 deletions(-) diff --git a/html/parse.go b/html/parse.go index 518ee4c9..722e9277 100644 --- a/html/parse.go +++ b/html/parse.go @@ -136,7 +136,7 @@ func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int { return -1 } default: - panic("unreachable") + panic(fmt.Sprintf("html: internal error: indexOfElementInScope unknown scope: %d", s)) } } switch s { @@ -179,7 +179,7 @@ func (p *parser) clearStackToContext(s scope) { return } default: - panic("unreachable") + panic(fmt.Sprintf("html: internal error: clearStackToContext unknown scope: %d", s)) } } } @@ -1678,7 +1678,7 @@ func inTableBodyIM(p *parser) bool { return inTableIM(p) } -// Section 12.2.6.4.14. +// Section 13.2.6.4.14. func inRowIM(p *parser) bool { switch p.tok.Type { case StartTagToken: @@ -1690,7 +1690,9 @@ func inRowIM(p *parser) bool { p.im = inCellIM return true case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr: - if p.popUntil(tableScope, a.Tr) { + if p.elementInScope(tableScope, a.Tr) { + p.clearStackToContext(tableRowScope) + p.oe.pop() p.im = inTableBodyIM return false } @@ -1700,22 +1702,28 @@ func inRowIM(p *parser) bool { case EndTagToken: switch p.tok.DataAtom { case a.Tr: - if p.popUntil(tableScope, a.Tr) { + if p.elementInScope(tableScope, a.Tr) { + p.clearStackToContext(tableRowScope) + p.oe.pop() p.im = inTableBodyIM return true } // Ignore the token. return true case a.Table: - if p.popUntil(tableScope, a.Tr) { + if p.elementInScope(tableScope, a.Tr) { + p.clearStackToContext(tableRowScope) + p.oe.pop() p.im = inTableBodyIM return false } // Ignore the token. return true case a.Tbody, a.Tfoot, a.Thead: - if p.elementInScope(tableScope, p.tok.DataAtom) { - p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String()) + if p.elementInScope(tableScope, p.tok.DataAtom) && p.elementInScope(tableScope, a.Tr) { + p.clearStackToContext(tableRowScope) + p.oe.pop() + p.im = inTableBodyIM return false } // Ignore the token. @@ -2222,16 +2230,20 @@ func parseForeignContent(p *parser) bool { p.acknowledgeSelfClosingTag() } case EndTagToken: + if strings.EqualFold(p.oe[len(p.oe)-1].Data, p.tok.Data) { + p.oe = p.oe[:len(p.oe)-1] + return true + } for i := len(p.oe) - 1; i >= 0; i-- { - if p.oe[i].Namespace == "" { - return p.im(p) - } if strings.EqualFold(p.oe[i].Data, p.tok.Data) { p.oe = p.oe[:i] + return true + } + if i > 0 && p.oe[i-1].Namespace == "" { break } } - return true + return p.im(p) default: // Ignore the token. } diff --git a/html/parse_test.go b/html/parse_test.go index fea110a4..ed9e9155 100644 --- a/html/parse_test.go +++ b/html/parse_test.go @@ -251,31 +251,35 @@ func TestParser(t *testing.T) { t.Fatal(err) } for _, tf := range testFiles { - f, err := os.Open(tf) - if err != nil { - t.Fatal(err) - } - defer f.Close() - r := bufio.NewReader(f) - - for i := 0; ; i++ { - ta, err := readParseTest(r) - if err == io.EOF { - break - } + t.Run(tf, func(t *testing.T) { + f, err := os.Open(tf) if err != nil { t.Fatal(err) } - if parseTestBlacklist[ta.text] { - continue - } + defer f.Close() + r := bufio.NewReader(f) - err = testParseCase(ta.text, ta.want, ta.context, ParseOptionEnableScripting(ta.scripting)) + for i := 0; ; i++ { + ta, err := readParseTest(r) + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + if parseTestBlacklist[ta.text] { + continue + } - if err != nil { - t.Errorf("%s test #%d %q, %s", tf, i, ta.text, err) + t.Run(fmt.Sprint(i), func(t *testing.T) { + err = testParseCase(ta.text, ta.want, ta.context, ParseOptionEnableScripting(ta.scripting)) + + if err != nil { + t.Errorf("%s test #%d %q, %s", tf, i, ta.text, err) + } + }) } - } + }) } } } @@ -506,3 +510,10 @@ func BenchmarkParser(b *testing.B) { Parse(bytes.NewBuffer(buf)) } } + +func TestIssue70179(t *testing.T) { + _, err := Parse(strings.NewReader("")) + if err != nil { + t.Fatalf("unexpected failure: %v", err) + } +}