mirror of
https://github.com/golang/go.git
synced 2026-04-01 17:07:17 +09:00
runtime: printquoted: preserve bytes of invalid UTF-8 encodings
Previously, printquoted would render each byte of an invalid UTF-8 sequence as "\uFFFD", which was lossy. This CL adjusts printquoted to distinguish valid encodings of U+FFFD from invalid encodings; each byte of the latter is now printed losslessly as "\xXX" Updates #76349 Change-Id: If9a877f01b497763425d9d11a58eb2a6e2c816b2 Reviewed-on: https://go-review.googlesource.com/c/go/+/742305 Reviewed-by: Alan Donovan <adonovan@google.com> Auto-Submit: Michael Pratt <mpratt@google.com> Reviewed-by: Michael Pratt <mpratt@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
committed by
Gopher Robot
parent
215a070a04
commit
cdf8951060
@@ -200,7 +200,7 @@ func printhex(v uint64) {
|
||||
func printquoted(s string) {
|
||||
printlock()
|
||||
gwrite([]byte(`"`))
|
||||
for _, r := range s {
|
||||
for i, r := range s {
|
||||
switch r {
|
||||
case '\n':
|
||||
gwrite([]byte(`\n`))
|
||||
@@ -215,6 +215,14 @@ func printquoted(s string) {
|
||||
case '\\', '"':
|
||||
gwrite([]byte{byte('\\'), byte(r)})
|
||||
continue
|
||||
case runeError:
|
||||
// Distinguish errors from a valid encoding of U+FFFD.
|
||||
if _, j := decoderune(s, i); j == i+1 {
|
||||
gwrite(bytes(`\x`))
|
||||
printhexopts(false, 2, uint64(s[i]))
|
||||
continue
|
||||
}
|
||||
// Fall through to quoting.
|
||||
}
|
||||
// For now, only allow basic printable ascii through unescaped
|
||||
if r >= ' ' && r <= '~' {
|
||||
|
||||
@@ -20,6 +20,12 @@ func TestPrintQuoted(t *testing.T) {
|
||||
// make sure null and escape bytes are properly escaped
|
||||
{in: "b\033it", expected: `"b\x1bit"`},
|
||||
{in: "b\000ar", expected: `"b\x00ar"`},
|
||||
// Make sure invalid UTF8 bytes make it through as expected
|
||||
{in: "b\xfdar", expected: `"b\xfdar"`},
|
||||
{in: "b\xfda", expected: `"b\xfda"`},
|
||||
{in: "b\xfd\xffar", expected: `"b\xfd\xffar"`},
|
||||
// make sure the unicode replacement character gets correctly escaped
|
||||
{in: "\ufffd!!!!", expected: `"\ufffd!!!!"`},
|
||||
// verify that simple 16-bit unicode runes are escaped with \u, including a greek upper-case sigma and an arbitrary unicode character.
|
||||
{in: "\u1234Σ", expected: `"\u1234\u03a3"`},
|
||||
// verify that 32-bit unicode runes are escaped with \U along with tabs
|
||||
|
||||
Reference in New Issue
Block a user