mirror of
https://github.com/golang/net.git
synced 2026-03-31 10:27:08 +09:00
html/charset: handle unsupported code points for encoding
Change-Id: I11ffc61623496fae6b32e678c91f7609d71aefe5 Reviewed-on: https://go-review.googlesource.com/17961 Reviewed-by: Hyang-Ah Hana Kim <hyangah@gmail.com> Reviewed-by: Andy Balholm <andy@balholm.com>
This commit is contained in:
@@ -26,14 +26,23 @@ import (
|
||||
// Lookup returns the encoding with the specified label, and its canonical
|
||||
// name. It returns nil and the empty string if label is not one of the
|
||||
// standard encodings for HTML. Matching is case-insensitive and ignores
|
||||
// leading and trailing whitespace.
|
||||
// leading and trailing whitespace. Encoders will use HTML escape sequences for
|
||||
// runes that are not supported by the character set.
|
||||
func Lookup(label string) (e encoding.Encoding, name string) {
|
||||
e, err := htmlindex.Get(label)
|
||||
if err != nil {
|
||||
return nil, ""
|
||||
}
|
||||
name, _ = htmlindex.Name(e)
|
||||
return e, name
|
||||
return &htmlEncoding{e}, name
|
||||
}
|
||||
|
||||
type htmlEncoding struct{ encoding.Encoding }
|
||||
|
||||
func (h *htmlEncoding) NewEncoder() *encoding.Encoder {
|
||||
// HTML requires a non-terminating legacy encoder. We use HTML escapes to
|
||||
// substitute unsupported code points.
|
||||
return encoding.HTMLEscapeUnsupported(h.Encoding.NewEncoder())
|
||||
}
|
||||
|
||||
// DetermineEncoding determines the encoding of an HTML document by examining
|
||||
|
||||
@@ -21,9 +21,12 @@ func transformString(t transform.Transformer, s string) (string, error) {
|
||||
return string(b), err
|
||||
}
|
||||
|
||||
var testCases = []struct {
|
||||
type testCase struct {
|
||||
utf8, other, otherEncoding string
|
||||
}{
|
||||
}
|
||||
|
||||
// testCases for encoding and decoding.
|
||||
var testCases = []testCase{
|
||||
{"Résumé", "Résumé", "utf8"},
|
||||
{"Résumé", "R\xe9sum\xe9", "latin1"},
|
||||
{"これは漢字です。", "S0\x8c0o0\"oW[g0Y0\x020", "UTF-16LE"},
|
||||
@@ -86,6 +89,11 @@ func TestDecode(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestEncode(t *testing.T) {
|
||||
testCases := append(testCases, []testCase{
|
||||
// U+0144 LATIN SMALL LETTER N WITH ACUTE not supported by encoding.
|
||||
{"Gdańsk", "Gdańsk", "ISO-8859-11"},
|
||||
{"\ufffd", "�", "ISO-8859-11"},
|
||||
}...)
|
||||
for _, tc := range testCases {
|
||||
e, _ := Lookup(tc.otherEncoding)
|
||||
if e == nil {
|
||||
|
||||
Reference in New Issue
Block a user