From cdf8951060a3e86736adc8a8b3f702a8cb22a3fa Mon Sep 17 00:00:00 2001
From: David Finkel <david.finkel@gmail.com>
Date: Thu, 5 Feb 2026 18:10:43 -0500
Subject: [PATCH] runtime: printquoted: preserve bytes of invalid UTF-8
 encodings

Previously, printquoted would render each byte of an invalid UTF-8
sequence as "\uFFFD", which was lossy.

This CL adjusts printquoted to distinguish valid encodings of U+FFFD
from invalid encodings; each byte of the latter is now printed
losslessly as "\xXX"

Updates #76349

Change-Id: If9a877f01b497763425d9d11a58eb2a6e2c816b2
Reviewed-on: https://go-review.googlesource.com/c/go/+/742305
Reviewed-by: Alan Donovan <adonovan@google.com>
Auto-Submit: Michael Pratt <mpratt@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 src/runtime/print.go             | 10 +++++++++-
 src/runtime/print_quoted_test.go |  6 ++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/runtime/print.go b/src/runtime/print.go
index 5d1bc22809..f39df39d79 100644
--- a/src/runtime/print.go
+++ b/src/runtime/print.go
@@ -200,7 +200,7 @@ func printhex(v uint64) {
 func printquoted(s string) {
 	printlock()
 	gwrite([]byte(`"`))
-	for _, r := range s {
+	for i, r := range s {
 		switch r {
 		case '\n':
 			gwrite([]byte(`\n`))
@@ -215,6 +215,14 @@ func printquoted(s string) {
 		case '\\', '"':
 			gwrite([]byte{byte('\\'), byte(r)})
 			continue
+		case runeError:
+			// Distinguish errors from a valid encoding of U+FFFD.
+			if _, j := decoderune(s, i); j == i+1 {
+				gwrite(bytes(`\x`))
+				printhexopts(false, 2, uint64(s[i]))
+				continue
+			}
+			// Fall through to quoting.
 		}
 		// For now, only allow basic printable ascii through unescaped
 		if r >= ' ' && r <= '~' {
diff --git a/src/runtime/print_quoted_test.go b/src/runtime/print_quoted_test.go
index f9e947b569..a3a87a07c5 100644
--- a/src/runtime/print_quoted_test.go
+++ b/src/runtime/print_quoted_test.go
@@ -20,6 +20,12 @@ func TestPrintQuoted(t *testing.T) {
 		// make sure null and escape bytes are properly escaped
 		{in: "b\033it", expected: `"b\x1bit"`},
 		{in: "b\000ar", expected: `"b\x00ar"`},
+		// Make sure invalid UTF8 bytes make it through as expected
+		{in: "b\xfdar", expected: `"b\xfdar"`},
+		{in: "b\xfda", expected: `"b\xfda"`},
+		{in: "b\xfd\xffar", expected: `"b\xfd\xffar"`},
+		// make sure the unicode replacement character gets correctly escaped
+		{in: "\ufffd!!!!", expected: `"\ufffd!!!!"`},
 		// verify that simple 16-bit unicode runes are escaped with \u, including a greek upper-case sigma and an arbitrary unicode character.
 		{in: "\u1234Σ", expected: `"\u1234\u03a3"`},
 		// verify that 32-bit unicode runes are escaped with \U along with tabs