aboutsummaryrefslogtreecommitdiffstats
path: root/Godeps/_workspace/src/golang.org/x/text/encoding/encoding.go
diff options
context:
space:
mode:
Diffstat (limited to 'Godeps/_workspace/src/golang.org/x/text/encoding/encoding.go')
-rw-r--r--Godeps/_workspace/src/golang.org/x/text/encoding/encoding.go179
1 files changed, 179 insertions, 0 deletions
diff --git a/Godeps/_workspace/src/golang.org/x/text/encoding/encoding.go b/Godeps/_workspace/src/golang.org/x/text/encoding/encoding.go
new file mode 100644
index 000000000..2f99a02ac
--- /dev/null
+++ b/Godeps/_workspace/src/golang.org/x/text/encoding/encoding.go
@@ -0,0 +1,179 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package encoding defines an interface for character encodings, such as Shift
+// JIS and Windows 1252, that can convert to and from UTF-8.
+//
+// To convert the bytes of an io.Reader r from the encoding e to UTF-8:
+// rInUTF8 := transform.NewReader(r, e.NewDecoder())
+// and to convert from UTF-8 to the encoding e:
+// wInUTF8 := transform.NewWriter(w, e.NewEncoder())
+// In both cases, import "golang.org/x/text/transform".
+//
+// Encoding implementations are provided in other packages, such as
+// golang.org/x/text/encoding/charmap and
+// golang.org/x/text/encoding/japanese.
+package encoding
+
+import (
+ "errors"
+ "unicode/utf8"
+
+ "golang.org/x/text/transform"
+)
+
+// Encoding is a character set encoding that can be transformed to and from
+// UTF-8.
+type Encoding interface {
+ // NewDecoder returns a transformer that converts to UTF-8.
+ //
+ // Transforming source bytes that are not of that encoding will not
+ // result in an error per se. Each byte that cannot be transcoded will
+ // be represented in the output by the UTF-8 encoding of '\uFFFD', the
+ // replacement rune.
+ NewDecoder() transform.Transformer
+
+ // NewEncoder returns a transformer that converts from UTF-8.
+ //
+ // Transforming source bytes that are not valid UTF-8 will not result in
+ // an error per se. Each rune that cannot be transcoded will be
+ // represented in the output by an encoding-specific replacement such as
+ // "\x1a" (the ASCII substitute character) or "\xff\xfd". To return
+ // early with error instead, use transform.Chain to preprocess the data
+ // with a UTF8Validator.
+ NewEncoder() transform.Transformer
+}
+
+// ASCIISub is the ASCII substitute character, as recommended by
+// http://unicode.org/reports/tr36/#Text_Comparison
+const ASCIISub = '\x1a'
+
+// Nop is the nop encoding. Its transformed bytes are the same as the source
+// bytes; it does not replace invalid UTF-8 sequences.
+var Nop Encoding = nop{}
+
+type nop struct{}
+
+func (nop) NewDecoder() transform.Transformer {
+ return transform.Nop
+}
+
+func (nop) NewEncoder() transform.Transformer {
+ return transform.Nop
+}
+
+// Replacement is the replacement encoding. Decoding from the replacement
+// encoding yields a single '\uFFFD' replacement rune. Encoding from UTF-8 to
+// the replacement encoding yields the same as the source bytes except that
+// invalid UTF-8 is converted to '\uFFFD'.
+//
+// It is defined at http://encoding.spec.whatwg.org/#replacement
+var Replacement Encoding = replacement{}
+
+type replacement struct{}
+
+func (replacement) NewDecoder() transform.Transformer {
+ return replacementDecoder{}
+}
+
+func (replacement) NewEncoder() transform.Transformer {
+ return replacementEncoder{}
+}
+
+type replacementDecoder struct{ transform.NopResetter }
+
+func (replacementDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+ if len(dst) < 3 {
+ return 0, 0, transform.ErrShortDst
+ }
+ if atEOF {
+ const fffd = "\ufffd"
+ dst[0] = fffd[0]
+ dst[1] = fffd[1]
+ dst[2] = fffd[2]
+ nDst = 3
+ }
+ return nDst, len(src), nil
+}
+
+type replacementEncoder struct{ transform.NopResetter }
+
+func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+ r, size := rune(0), 0
+
+ for ; nSrc < len(src); nSrc += size {
+ r = rune(src[nSrc])
+
+ // Decode a 1-byte rune.
+ if r < utf8.RuneSelf {
+ size = 1
+
+ } else {
+ // Decode a multi-byte rune.
+ r, size = utf8.DecodeRune(src[nSrc:])
+ if size == 1 {
+ // All valid runes of size 1 (those below utf8.RuneSelf) were
+ // handled above. We have invalid UTF-8 or we haven't seen the
+ // full character yet.
+ if !atEOF && !utf8.FullRune(src[nSrc:]) {
+ err = transform.ErrShortSrc
+ break
+ }
+ r = '\ufffd'
+ }
+ }
+
+ if nDst+utf8.RuneLen(r) > len(dst) {
+ err = transform.ErrShortDst
+ break
+ }
+ nDst += utf8.EncodeRune(dst[nDst:], r)
+ }
+ return nDst, nSrc, err
+}
+
+// ErrInvalidUTF8 means that a transformer encountered invalid UTF-8.
+var ErrInvalidUTF8 = errors.New("encoding: invalid UTF-8")
+
+// UTF8Validator is a transformer that returns ErrInvalidUTF8 on the first
+// input byte that is not valid UTF-8.
+var UTF8Validator transform.Transformer = utf8Validator{}
+
+type utf8Validator struct{ transform.NopResetter }
+
+func (utf8Validator) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+ n := len(src)
+ if n > len(dst) {
+ n = len(dst)
+ }
+ for i := 0; i < n; {
+ if c := src[i]; c < utf8.RuneSelf {
+ dst[i] = c
+ i++
+ continue
+ }
+ _, size := utf8.DecodeRune(src[i:])
+ if size == 1 {
+ // All valid runes of size 1 (those below utf8.RuneSelf) were
+ // handled above. We have invalid UTF-8 or we haven't seen the
+ // full character yet.
+ err = ErrInvalidUTF8
+ if !atEOF && !utf8.FullRune(src[i:]) {
+ err = transform.ErrShortSrc
+ }
+ return i, i, err
+ }
+ if i+size > len(dst) {
+ return i, i, transform.ErrShortDst
+ }
+ for ; size > 0; size-- {
+ dst[i] = src[i]
+ i++
+ }
+ }
+ if len(src) > len(dst) {
+ err = transform.ErrShortDst
+ }
+ return n, n, err
+}