mirror of
https://github.com/golang/net.git
synced 2026-03-31 18:37:08 +09:00
go.net/html/charset: add NewReader
NewReader is a convenience function for finding the encoding of an io.Reader and making a UTF-8 version of that Reader. R=nigeltao CC=golang-dev https://golang.org/cl/43510043
This commit is contained in:
committed by
Nigel Tao
parent
74213743f3
commit
3f04d1ffd7
@@ -6,6 +6,7 @@ package charset
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"mime"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
@@ -13,6 +14,7 @@ import (
|
||||
"code.google.com/p/go.net/html"
|
||||
"code.google.com/p/go.text/encoding"
|
||||
"code.google.com/p/go.text/encoding/charmap"
|
||||
"code.google.com/p/go.text/transform"
|
||||
)
|
||||
|
||||
// Lookup returns the encoding with the specified label, and its canonical
|
||||
@@ -83,6 +85,27 @@ func DetermineEncoding(content []byte, contentType string) (e encoding.Encoding,
|
||||
return charmap.Windows1252, "windows-1252", false
|
||||
}
|
||||
|
||||
// NewReader returns an io.Reader that converts the content of r to UTF-8.
|
||||
// It calls DetermineEncoding to find out what r's encoding is.
|
||||
func NewReader(r io.Reader, contentType string) (io.Reader, error) {
|
||||
preview := make([]byte, 1024)
|
||||
n, err := io.ReadFull(r, preview)
|
||||
switch {
|
||||
case err == io.ErrUnexpectedEOF:
|
||||
preview = preview[:n]
|
||||
r = bytes.NewReader(preview)
|
||||
case err != nil:
|
||||
return nil, err
|
||||
default:
|
||||
r = io.MultiReader(bytes.NewReader(preview), r)
|
||||
}
|
||||
|
||||
if e, _, _ := DetermineEncoding(preview, contentType); e != encoding.Nop {
|
||||
r = transform.NewReader(r, e.NewDecoder())
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func prescan(content []byte) (e encoding.Encoding, name string) {
|
||||
z := html.NewTokenizer(bytes.NewReader(content))
|
||||
for {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package charset
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"testing"
|
||||
@@ -143,6 +144,40 @@ func TestSniff(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestReader(t *testing.T) {
|
||||
for _, tc := range sniffTestCases {
|
||||
content, err := ioutil.ReadFile("testdata/" + tc.filename)
|
||||
if err != nil {
|
||||
t.Errorf("%s: error reading file: %v", tc.filename, err)
|
||||
continue
|
||||
}
|
||||
|
||||
r, err := NewReader(bytes.NewReader(content), tc.declared)
|
||||
if err != nil {
|
||||
t.Errorf("%s: error creating reader: %v", tc.filename, err)
|
||||
continue
|
||||
}
|
||||
|
||||
got, err := ioutil.ReadAll(r)
|
||||
if err != nil {
|
||||
t.Errorf("%s: error reading from charset.NewReader: %v", tc.filename, err)
|
||||
continue
|
||||
}
|
||||
|
||||
e, _ := Lookup(tc.want)
|
||||
want, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader(content), e.NewDecoder()))
|
||||
if err != nil {
|
||||
t.Errorf("%s: error decoding with hard-coded charset name: %v", tc.filename, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if !bytes.Equal(got, want) {
|
||||
t.Errorf("%s: got %q, want %q", tc.filename, got, want)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var metaTestCases = []struct {
|
||||
meta, want string
|
||||
}{
|
||||
|
||||
Reference in New Issue
Block a user