diff --git a/html/charset/charset.go b/html/charset/charset.go
index f91c5ea4..13bed159 100644
--- a/html/charset/charset.go
+++ b/html/charset/charset.go
@@ -26,14 +26,23 @@ import (
// Lookup returns the encoding with the specified label, and its canonical
// name. It returns nil and the empty string if label is not one of the
// standard encodings for HTML. Matching is case-insensitive and ignores
-// leading and trailing whitespace.
+// leading and trailing whitespace. Encoders will use HTML escape sequences for
+// runes that are not supported by the character set.
func Lookup(label string) (e encoding.Encoding, name string) {
e, err := htmlindex.Get(label)
if err != nil {
return nil, ""
}
name, _ = htmlindex.Name(e)
- return e, name
+ return &htmlEncoding{e}, name
+}
+
+type htmlEncoding struct{ encoding.Encoding }
+
+func (h *htmlEncoding) NewEncoder() *encoding.Encoder {
+ // HTML requires a non-terminating legacy encoder. We use HTML escapes to
+ // substitute unsupported code points.
+ return encoding.HTMLEscapeUnsupported(h.Encoding.NewEncoder())
}
// DetermineEncoding determines the encoding of an HTML document by examining
diff --git a/html/charset/charset_test.go b/html/charset/charset_test.go
index 31941ca8..f509448c 100644
--- a/html/charset/charset_test.go
+++ b/html/charset/charset_test.go
@@ -21,9 +21,12 @@ func transformString(t transform.Transformer, s string) (string, error) {
return string(b), err
}
-var testCases = []struct {
+type testCase struct {
utf8, other, otherEncoding string
-}{
+}
+
+// testCases for encoding and decoding.
+var testCases = []testCase{
{"Résumé", "Résumé", "utf8"},
{"Résumé", "R\xe9sum\xe9", "latin1"},
{"これは漢字です。", "S0\x8c0o0\"oW[g0Y0\x020", "UTF-16LE"},
@@ -86,6 +89,11 @@ func TestDecode(t *testing.T) {
}
func TestEncode(t *testing.T) {
+ testCases := append(testCases, []testCase{
+ // U+0144 LATIN SMALL LETTER N WITH ACUTE not supported by encoding.
+ {"Gdańsk", "Gdańsk", "ISO-8859-11"},
+ {"\ufffd", "�", "ISO-8859-11"},
+ }...)
for _, tc := range testCases {
e, _ := Lookup(tc.otherEncoding)
if e == nil {