aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/golang.org/x/text/encoding
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/golang.org/x/text/encoding')
-rw-r--r--vendor/golang.org/x/text/encoding/charmap/charmap.go84
-rw-r--r--vendor/golang.org/x/text/encoding/charmap/maketables.go556
-rw-r--r--vendor/golang.org/x/text/encoding/charmap/tables.go190
-rw-r--r--vendor/golang.org/x/text/encoding/encoding.go2
-rw-r--r--vendor/golang.org/x/text/encoding/htmlindex/gen.go170
-rw-r--r--vendor/golang.org/x/text/encoding/htmlindex/htmlindex.go2
-rw-r--r--vendor/golang.org/x/text/encoding/htmlindex/tables.go7
-rw-r--r--vendor/golang.org/x/text/encoding/internal/identifier/gen.go137
-rw-r--r--vendor/golang.org/x/text/encoding/internal/identifier/identifier.go2
-rw-r--r--vendor/golang.org/x/text/encoding/internal/identifier/mib.go12
-rw-r--r--vendor/golang.org/x/text/encoding/japanese/eucjp.go72
-rw-r--r--vendor/golang.org/x/text/encoding/japanese/iso2022jp.go59
-rw-r--r--vendor/golang.org/x/text/encoding/japanese/maketables.go161
-rw-r--r--vendor/golang.org/x/text/encoding/japanese/shiftjis.go34
-rw-r--r--vendor/golang.org/x/text/encoding/korean/euckr.go39
-rw-r--r--vendor/golang.org/x/text/encoding/korean/maketables.go143
-rw-r--r--vendor/golang.org/x/text/encoding/simplifiedchinese/gbk.go60
-rw-r--r--vendor/golang.org/x/text/encoding/simplifiedchinese/hzgb2312.go49
-rw-r--r--vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go161
-rw-r--r--vendor/golang.org/x/text/encoding/traditionalchinese/big5.go25
-rw-r--r--vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go140
-rw-r--r--vendor/golang.org/x/text/encoding/unicode/unicode.go2
22 files changed, 345 insertions, 1762 deletions
diff --git a/vendor/golang.org/x/text/encoding/charmap/charmap.go b/vendor/golang.org/x/text/encoding/charmap/charmap.go
index 6e62a8374..e89ff0734 100644
--- a/vendor/golang.org/x/text/encoding/charmap/charmap.go
+++ b/vendor/golang.org/x/text/encoding/charmap/charmap.go
@@ -33,32 +33,32 @@ var (
ISO8859_8I encoding.Encoding = &iso8859_8I
iso8859_6E = internal.Encoding{
- ISO8859_6,
- "ISO-8859-6E",
- identifier.ISO88596E,
+ Encoding: ISO8859_6,
+ Name: "ISO-8859-6E",
+ MIB: identifier.ISO88596E,
}
iso8859_6I = internal.Encoding{
- ISO8859_6,
- "ISO-8859-6I",
- identifier.ISO88596I,
+ Encoding: ISO8859_6,
+ Name: "ISO-8859-6I",
+ MIB: identifier.ISO88596I,
}
iso8859_8E = internal.Encoding{
- ISO8859_8,
- "ISO-8859-8E",
- identifier.ISO88598E,
+ Encoding: ISO8859_8,
+ Name: "ISO-8859-8E",
+ MIB: identifier.ISO88598E,
}
iso8859_8I = internal.Encoding{
- ISO8859_8,
- "ISO-8859-8I",
- identifier.ISO88598I,
+ Encoding: ISO8859_8,
+ Name: "ISO-8859-8I",
+ MIB: identifier.ISO88598I,
}
)
// All is a list of all defined encodings in this package.
-var All = listAll
+var All []encoding.Encoding = listAll
// TODO: implement these encodings, in order of importance.
// ASCII, ISO8859_1: Rather common. Close to Windows 1252.
@@ -70,8 +70,8 @@ type utf8Enc struct {
data [3]byte
}
-// charmap describes an 8-bit character set encoding.
-type charmap struct {
+// Charmap is an 8-bit character set encoding.
+type Charmap struct {
// name is the encoding's name.
name string
// mib is the encoding type of this encoder.
@@ -79,7 +79,7 @@ type charmap struct {
// asciiSuperset states whether the encoding is a superset of ASCII.
asciiSuperset bool
// low is the lower bound of the encoded byte for a non-ASCII rune. If
- // charmap.asciiSuperset is true then this will be 0x80, otherwise 0x00.
+ // Charmap.asciiSuperset is true then this will be 0x80, otherwise 0x00.
low uint8
// replacement is the encoded replacement character.
replacement byte
@@ -91,26 +91,30 @@ type charmap struct {
encode [256]uint32
}
-func (m *charmap) NewDecoder() *encoding.Decoder {
+// NewDecoder implements the encoding.Encoding interface.
+func (m *Charmap) NewDecoder() *encoding.Decoder {
return &encoding.Decoder{Transformer: charmapDecoder{charmap: m}}
}
-func (m *charmap) NewEncoder() *encoding.Encoder {
+// NewEncoder implements the encoding.Encoding interface.
+func (m *Charmap) NewEncoder() *encoding.Encoder {
return &encoding.Encoder{Transformer: charmapEncoder{charmap: m}}
}
-func (m *charmap) String() string {
+// String returns the Charmap's name.
+func (m *Charmap) String() string {
return m.name
}
-func (m *charmap) ID() (mib identifier.MIB, other string) {
+// ID implements an internal interface.
+func (m *Charmap) ID() (mib identifier.MIB, other string) {
return m.mib, ""
}
// charmapDecoder implements transform.Transformer by decoding to UTF-8.
type charmapDecoder struct {
transform.NopResetter
- charmap *charmap
+ charmap *Charmap
}
func (m charmapDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@@ -142,10 +146,22 @@ func (m charmapDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int,
return nDst, nSrc, err
}
+// DecodeByte returns the Charmap's rune decoding of the byte b.
+func (m *Charmap) DecodeByte(b byte) rune {
+ switch x := &m.decode[b]; x.len {
+ case 1:
+ return rune(x.data[0])
+ case 2:
+ return rune(x.data[0]&0x1f)<<6 | rune(x.data[1]&0x3f)
+ default:
+ return rune(x.data[0]&0x0f)<<12 | rune(x.data[1]&0x3f)<<6 | rune(x.data[2]&0x3f)
+ }
+}
+
// charmapEncoder implements transform.Transformer by encoding from UTF-8.
type charmapEncoder struct {
transform.NopResetter
- charmap *charmap
+ charmap *Charmap
}
func (m charmapEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@@ -207,3 +223,27 @@ loop:
}
return nDst, nSrc, err
}
+
+// EncodeRune returns the Charmap's byte encoding of the rune r. ok is whether
+// r is in the Charmap's repertoire. If not, b is set to the Charmap's
+// replacement byte. This is often the ASCII substitute character '\x1a'.
+func (m *Charmap) EncodeRune(r rune) (b byte, ok bool) {
+ if r < utf8.RuneSelf && m.asciiSuperset {
+ return byte(r), true
+ }
+ for low, high := int(m.low), 0x100; ; {
+ if low >= high {
+ return m.replacement, false
+ }
+ mid := (low + high) / 2
+ got := m.encode[mid]
+ gotRune := rune(got & (1<<24 - 1))
+ if gotRune < r {
+ low = mid + 1
+ } else if gotRune > r {
+ high = mid
+ } else {
+ return byte(got >> 24), true
+ }
+ }
+}
diff --git a/vendor/golang.org/x/text/encoding/charmap/maketables.go b/vendor/golang.org/x/text/encoding/charmap/maketables.go
deleted file mode 100644
index a691acb14..000000000
--- a/vendor/golang.org/x/text/encoding/charmap/maketables.go
+++ /dev/null
@@ -1,556 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-import (
- "bufio"
- "fmt"
- "log"
- "net/http"
- "sort"
- "strings"
- "unicode/utf8"
-
- "golang.org/x/text/encoding"
- "golang.org/x/text/internal/gen"
-)
-
-const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
- "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
- ` !"#$%&'()*+,-./0123456789:;<=>?` +
- `@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +
- "`abcdefghijklmnopqrstuvwxyz{|}~\u007f"
-
-var encodings = []struct {
- name string
- mib string
- comment string
- varName string
- replacement byte
- mapping string
-}{
- {
- "IBM Code Page 037",
- "IBM037",
- "",
- "CodePage037",
- 0x3f,
- "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM037-2.1.2.ucm",
- },
- {
- "IBM Code Page 437",
- "PC8CodePage437",
- "",
- "CodePage437",
- encoding.ASCIISub,
- "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm",
- },
- {
- "IBM Code Page 850",
- "PC850Multilingual",
- "",
- "CodePage850",
- encoding.ASCIISub,
- "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm",
- },
- {
- "IBM Code Page 852",
- "PCp852",
- "",
- "CodePage852",
- encoding.ASCIISub,
- "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm",
- },
- {
- "IBM Code Page 855",
- "IBM855",
- "",
- "CodePage855",
- encoding.ASCIISub,
- "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm",
- },
- {
- "Windows Code Page 858", // PC latin1 with Euro
- "IBM00858",
- "",
- "CodePage858",
- encoding.ASCIISub,
- "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm",
- },
- {
- "IBM Code Page 860",
- "IBM860",
- "",
- "CodePage860",
- encoding.ASCIISub,
- "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm",
- },
- {
- "IBM Code Page 862",
- "PC862LatinHebrew",
- "",
- "CodePage862",
- encoding.ASCIISub,
- "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm",
- },
- {
- "IBM Code Page 863",
- "IBM863",
- "",
- "CodePage863",
- encoding.ASCIISub,
- "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm",
- },
- {
- "IBM Code Page 865",
- "IBM865",
- "",
- "CodePage865",
- encoding.ASCIISub,
- "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm",
- },
- {
- "IBM Code Page 866",
- "IBM866",
- "",
- "CodePage866",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-ibm866.txt",
- },
- {
- "IBM Code Page 1047",
- "IBM1047",
- "",
- "CodePage1047",
- 0x3f,
- "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM1047-2.1.2.ucm",
- },
- {
- "IBM Code Page 1140",
- "IBM01140",
- "",
- "CodePage1140",
- 0x3f,
- "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/ibm-1140_P100-1997.ucm",
- },
- {
- "ISO 8859-1",
- "ISOLatin1",
- "",
- "ISO8859_1",
- encoding.ASCIISub,
- "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm",
- },
- {
- "ISO 8859-2",
- "ISOLatin2",
- "",
- "ISO8859_2",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-iso-8859-2.txt",
- },
- {
- "ISO 8859-3",
- "ISOLatin3",
- "",
- "ISO8859_3",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-iso-8859-3.txt",
- },
- {
- "ISO 8859-4",
- "ISOLatin4",
- "",
- "ISO8859_4",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-iso-8859-4.txt",
- },
- {
- "ISO 8859-5",
- "ISOLatinCyrillic",
- "",
- "ISO8859_5",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-iso-8859-5.txt",
- },
- {
- "ISO 8859-6",
- "ISOLatinArabic",
- "",
- "ISO8859_6,ISO8859_6E,ISO8859_6I",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-iso-8859-6.txt",
- },
- {
- "ISO 8859-7",
- "ISOLatinGreek",
- "",
- "ISO8859_7",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-iso-8859-7.txt",
- },
- {
- "ISO 8859-8",
- "ISOLatinHebrew",
- "",
- "ISO8859_8,ISO8859_8E,ISO8859_8I",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-iso-8859-8.txt",
- },
- {
- "ISO 8859-9",
- "ISOLatin5",
- "",
- "ISO8859_9",
- encoding.ASCIISub,
- "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_9-1999.ucm",
- },
- {
- "ISO 8859-10",
- "ISOLatin6",
- "",
- "ISO8859_10",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-iso-8859-10.txt",
- },
- {
- "ISO 8859-13",
- "ISO885913",
- "",
- "ISO8859_13",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-iso-8859-13.txt",
- },
- {
- "ISO 8859-14",
- "ISO885914",
- "",
- "ISO8859_14",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-iso-8859-14.txt",
- },
- {
- "ISO 8859-15",
- "ISO885915",
- "",
- "ISO8859_15",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-iso-8859-15.txt",
- },
- {
- "ISO 8859-16",
- "ISO885916",
- "",
- "ISO8859_16",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-iso-8859-16.txt",
- },
- {
- "KOI8-R",
- "KOI8R",
- "",
- "KOI8R",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-koi8-r.txt",
- },
- {
- "KOI8-U",
- "KOI8U",
- "",
- "KOI8U",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-koi8-u.txt",
- },
- {
- "Macintosh",
- "Macintosh",
- "",
- "Macintosh",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-macintosh.txt",
- },
- {
- "Macintosh Cyrillic",
- "MacintoshCyrillic",
- "",
- "MacintoshCyrillic",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt",
- },
- {
- "Windows 874",
- "Windows874",
- "",
- "Windows874",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-windows-874.txt",
- },
- {
- "Windows 1250",
- "Windows1250",
- "",
- "Windows1250",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-windows-1250.txt",
- },
- {
- "Windows 1251",
- "Windows1251",
- "",
- "Windows1251",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-windows-1251.txt",
- },
- {
- "Windows 1252",
- "Windows1252",
- "",
- "Windows1252",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-windows-1252.txt",
- },
- {
- "Windows 1253",
- "Windows1253",
- "",
- "Windows1253",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-windows-1253.txt",
- },
- {
- "Windows 1254",
- "Windows1254",
- "",
- "Windows1254",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-windows-1254.txt",
- },
- {
- "Windows 1255",
- "Windows1255",
- "",
- "Windows1255",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-windows-1255.txt",
- },
- {
- "Windows 1256",
- "Windows1256",
- "",
- "Windows1256",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-windows-1256.txt",
- },
- {
- "Windows 1257",
- "Windows1257",
- "",
- "Windows1257",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-windows-1257.txt",
- },
- {
- "Windows 1258",
- "Windows1258",
- "",
- "Windows1258",
- encoding.ASCIISub,
- "http://encoding.spec.whatwg.org/index-windows-1258.txt",
- },
- {
- "X-User-Defined",
- "XUserDefined",
- "It is defined at http://encoding.spec.whatwg.org/#x-user-defined",
- "XUserDefined",
- encoding.ASCIISub,
- ascii +
- "\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" +
- "\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" +
- "\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" +
- "\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" +
- "\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" +
- "\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" +
- "\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" +
- "\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" +
- "\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" +
- "\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" +
- "\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" +
- "\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" +
- "\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" +
- "\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" +
- "\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" +
- "\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff",
- },
-}
-
-func getWHATWG(url string) string {
- res, err := http.Get(url)
- if err != nil {
- log.Fatalf("%q: Get: %v", url, err)
- }
- defer res.Body.Close()
-
- mapping := make([]rune, 128)
- for i := range mapping {
- mapping[i] = '\ufffd'
- }
-
- scanner := bufio.NewScanner(res.Body)
- for scanner.Scan() {
- s := strings.TrimSpace(scanner.Text())
- if s == "" || s[0] == '#' {
- continue
- }
- x, y := 0, 0
- if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil {
- log.Fatalf("could not parse %q", s)
- }
- if x < 0 || 128 <= x {
- log.Fatalf("code %d is out of range", x)
- }
- if 0x80 <= y && y < 0xa0 {
- // We diverge from the WHATWG spec by mapping control characters
- // in the range [0x80, 0xa0) to U+FFFD.
- continue
- }
- mapping[x] = rune(y)
- }
- return ascii + string(mapping)
-}
-
-func getUCM(url string) string {
- res, err := http.Get(url)
- if err != nil {
- log.Fatalf("%q: Get: %v", url, err)
- }
- defer res.Body.Close()
-
- mapping := make([]rune, 256)
- for i := range mapping {
- mapping[i] = '\ufffd'
- }
-
- charsFound := 0
- scanner := bufio.NewScanner(res.Body)
- for scanner.Scan() {
- s := strings.TrimSpace(scanner.Text())
- if s == "" || s[0] == '#' {
- continue
- }
- var c byte
- var r rune
- if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil {
- continue
- }
- mapping[c] = r
- charsFound++
- }
-
- if charsFound < 200 {
- log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound)
- }
-
- return string(mapping)
-}
-
-func main() {
- mibs := map[string]bool{}
- all := []string{}
-
- w := gen.NewCodeWriter()
- defer w.WriteGoFile("tables.go", "charmap")
-
- printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) }
-
- printf("import (\n")
- printf("\t\"golang.org/x/text/encoding\"\n")
- printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
- printf(")\n\n")
- for _, e := range encodings {
- varNames := strings.Split(e.varName, ",")
- all = append(all, varNames...)
- varName := varNames[0]
- switch {
- case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"):
- e.mapping = getWHATWG(e.mapping)
- case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"):
- e.mapping = getUCM(e.mapping)
- }
-
- asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
- if asciiSuperset {
- low = 0x80
- }
- lvn := 1
- if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") {
- lvn = 3
- }
- lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
- printf("// %s is the %s encoding.\n", varName, e.name)
- if e.comment != "" {
- printf("//\n// %s\n", e.comment)
- }
- printf("var %s encoding.Encoding = &%s\n\nvar %s = charmap{\nname: %q,\n",
- varName, lowerVarName, lowerVarName, e.name)
- if mibs[e.mib] {
- log.Fatalf("MIB type %q declared multiple times.", e.mib)
- }
- printf("mib: identifier.%s,\n", e.mib)
- printf("asciiSuperset: %t,\n", asciiSuperset)
- printf("low: 0x%02x,\n", low)
- printf("replacement: 0x%02x,\n", e.replacement)
-
- printf("decode: [256]utf8Enc{\n")
- i, backMapping := 0, map[rune]byte{}
- for _, c := range e.mapping {
- if _, ok := backMapping[c]; !ok && c != utf8.RuneError {
- backMapping[c] = byte(i)
- }
- var buf [8]byte
- n := utf8.EncodeRune(buf[:], c)
- if n > 3 {
- panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
- }
- printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
- if i%2 == 1 {
- printf("\n")
- }
- i++
- }
- printf("},\n")
-
- printf("encode: [256]uint32{\n")
- encode := make([]uint32, 0, 256)
- for c, i := range backMapping {
- encode = append(encode, uint32(i)<<24|uint32(c))
- }
- sort.Sort(byRune(encode))
- for len(encode) < cap(encode) {
- encode = append(encode, encode[len(encode)-1])
- }
- for i, enc := range encode {
- printf("0x%08x,", enc)
- if i%8 == 7 {
- printf("\n")
- }
- }
- printf("},\n}\n")
-
- // Add an estimate of the size of a single charmap{} struct value, which
- // includes two 256 elem arrays of 4 bytes and some extra fields, which
- // align to 3 uint64s on 64-bit architectures.
- w.Size += 2*4*256 + 3*8
- }
- // TODO: add proper line breaking.
- printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
-}
-
-type byRune []uint32
-
-func (b byRune) Len() int { return len(b) }
-func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff }
-func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
diff --git a/vendor/golang.org/x/text/encoding/charmap/tables.go b/vendor/golang.org/x/text/encoding/charmap/tables.go
index e36cd7adc..cf7281e9e 100644
--- a/vendor/golang.org/x/text/encoding/charmap/tables.go
+++ b/vendor/golang.org/x/text/encoding/charmap/tables.go
@@ -1,4 +1,4 @@
-// This file was generated by go generate; DO NOT EDIT
+// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package charmap
@@ -8,9 +8,9 @@ import (
)
// CodePage037 is the IBM Code Page 037 encoding.
-var CodePage037 encoding.Encoding = &codePage037
+var CodePage037 *Charmap = &codePage037
-var codePage037 = charmap{
+var codePage037 = Charmap{
name: "IBM Code Page 037",
mib: identifier.IBM037,
asciiSuperset: false,
@@ -183,9 +183,9 @@ var codePage037 = charmap{
}
// CodePage437 is the IBM Code Page 437 encoding.
-var CodePage437 encoding.Encoding = &codePage437
+var CodePage437 *Charmap = &codePage437
-var codePage437 = charmap{
+var codePage437 = Charmap{
name: "IBM Code Page 437",
mib: identifier.PC8CodePage437,
asciiSuperset: true,
@@ -358,9 +358,9 @@ var codePage437 = charmap{
}
// CodePage850 is the IBM Code Page 850 encoding.
-var CodePage850 encoding.Encoding = &codePage850
+var CodePage850 *Charmap = &codePage850
-var codePage850 = charmap{
+var codePage850 = Charmap{
name: "IBM Code Page 850",
mib: identifier.PC850Multilingual,
asciiSuperset: true,
@@ -533,9 +533,9 @@ var codePage850 = charmap{
}
// CodePage852 is the IBM Code Page 852 encoding.
-var CodePage852 encoding.Encoding = &codePage852
+var CodePage852 *Charmap = &codePage852
-var codePage852 = charmap{
+var codePage852 = Charmap{
name: "IBM Code Page 852",
mib: identifier.PCp852,
asciiSuperset: true,
@@ -708,9 +708,9 @@ var codePage852 = charmap{
}
// CodePage855 is the IBM Code Page 855 encoding.
-var CodePage855 encoding.Encoding = &codePage855
+var CodePage855 *Charmap = &codePage855
-var codePage855 = charmap{
+var codePage855 = Charmap{
name: "IBM Code Page 855",
mib: identifier.IBM855,
asciiSuperset: true,
@@ -883,9 +883,9 @@ var codePage855 = charmap{
}
// CodePage858 is the Windows Code Page 858 encoding.
-var CodePage858 encoding.Encoding = &codePage858
+var CodePage858 *Charmap = &codePage858
-var codePage858 = charmap{
+var codePage858 = Charmap{
name: "Windows Code Page 858",
mib: identifier.IBM00858,
asciiSuperset: true,
@@ -1058,9 +1058,9 @@ var codePage858 = charmap{
}
// CodePage860 is the IBM Code Page 860 encoding.
-var CodePage860 encoding.Encoding = &codePage860
+var CodePage860 *Charmap = &codePage860
-var codePage860 = charmap{
+var codePage860 = Charmap{
name: "IBM Code Page 860",
mib: identifier.IBM860,
asciiSuperset: true,
@@ -1233,9 +1233,9 @@ var codePage860 = charmap{
}
// CodePage862 is the IBM Code Page 862 encoding.
-var CodePage862 encoding.Encoding = &codePage862
+var CodePage862 *Charmap = &codePage862
-var codePage862 = charmap{
+var codePage862 = Charmap{
name: "IBM Code Page 862",
mib: identifier.PC862LatinHebrew,
asciiSuperset: true,
@@ -1408,9 +1408,9 @@ var codePage862 = charmap{
}
// CodePage863 is the IBM Code Page 863 encoding.
-var CodePage863 encoding.Encoding = &codePage863
+var CodePage863 *Charmap = &codePage863
-var codePage863 = charmap{
+var codePage863 = Charmap{
name: "IBM Code Page 863",
mib: identifier.IBM863,
asciiSuperset: true,
@@ -1583,9 +1583,9 @@ var codePage863 = charmap{
}
// CodePage865 is the IBM Code Page 865 encoding.
-var CodePage865 encoding.Encoding = &codePage865
+var CodePage865 *Charmap = &codePage865
-var codePage865 = charmap{
+var codePage865 = Charmap{
name: "IBM Code Page 865",
mib: identifier.IBM865,
asciiSuperset: true,
@@ -1758,9 +1758,9 @@ var codePage865 = charmap{
}
// CodePage866 is the IBM Code Page 866 encoding.
-var CodePage866 encoding.Encoding = &codePage866
+var CodePage866 *Charmap = &codePage866
-var codePage866 = charmap{
+var codePage866 = Charmap{
name: "IBM Code Page 866",
mib: identifier.IBM866,
asciiSuperset: true,
@@ -1933,9 +1933,9 @@ var codePage866 = charmap{
}
// CodePage1047 is the IBM Code Page 1047 encoding.
-var CodePage1047 encoding.Encoding = &codePage1047
+var CodePage1047 *Charmap = &codePage1047
-var codePage1047 = charmap{
+var codePage1047 = Charmap{
name: "IBM Code Page 1047",
mib: identifier.IBM1047,
asciiSuperset: false,
@@ -2108,9 +2108,9 @@ var codePage1047 = charmap{
}
// CodePage1140 is the IBM Code Page 1140 encoding.
-var CodePage1140 encoding.Encoding = &codePage1140
+var CodePage1140 *Charmap = &codePage1140
-var codePage1140 = charmap{
+var codePage1140 = Charmap{
name: "IBM Code Page 1140",
mib: identifier.IBM01140,
asciiSuperset: false,
@@ -2283,9 +2283,9 @@ var codePage1140 = charmap{
}
// ISO8859_1 is the ISO 8859-1 encoding.
-var ISO8859_1 encoding.Encoding = &iso8859_1
+var ISO8859_1 *Charmap = &iso8859_1
-var iso8859_1 = charmap{
+var iso8859_1 = Charmap{
name: "ISO 8859-1",
mib: identifier.ISOLatin1,
asciiSuperset: true,
@@ -2458,9 +2458,9 @@ var iso8859_1 = charmap{
}
// ISO8859_2 is the ISO 8859-2 encoding.
-var ISO8859_2 encoding.Encoding = &iso8859_2
+var ISO8859_2 *Charmap = &iso8859_2
-var iso8859_2 = charmap{
+var iso8859_2 = Charmap{
name: "ISO 8859-2",
mib: identifier.ISOLatin2,
asciiSuperset: true,
@@ -2633,9 +2633,9 @@ var iso8859_2 = charmap{
}
// ISO8859_3 is the ISO 8859-3 encoding.
-var ISO8859_3 encoding.Encoding = &iso8859_3
+var ISO8859_3 *Charmap = &iso8859_3
-var iso8859_3 = charmap{
+var iso8859_3 = Charmap{
name: "ISO 8859-3",
mib: identifier.ISOLatin3,
asciiSuperset: true,
@@ -2808,9 +2808,9 @@ var iso8859_3 = charmap{
}
// ISO8859_4 is the ISO 8859-4 encoding.
-var ISO8859_4 encoding.Encoding = &iso8859_4
+var ISO8859_4 *Charmap = &iso8859_4
-var iso8859_4 = charmap{
+var iso8859_4 = Charmap{
name: "ISO 8859-4",
mib: identifier.ISOLatin4,
asciiSuperset: true,
@@ -2983,9 +2983,9 @@ var iso8859_4 = charmap{
}
// ISO8859_5 is the ISO 8859-5 encoding.
-var ISO8859_5 encoding.Encoding = &iso8859_5
+var ISO8859_5 *Charmap = &iso8859_5
-var iso8859_5 = charmap{
+var iso8859_5 = Charmap{
name: "ISO 8859-5",
mib: identifier.ISOLatinCyrillic,
asciiSuperset: true,
@@ -3158,9 +3158,9 @@ var iso8859_5 = charmap{
}
// ISO8859_6 is the ISO 8859-6 encoding.
-var ISO8859_6 encoding.Encoding = &iso8859_6
+var ISO8859_6 *Charmap = &iso8859_6
-var iso8859_6 = charmap{
+var iso8859_6 = Charmap{
name: "ISO 8859-6",
mib: identifier.ISOLatinArabic,
asciiSuperset: true,
@@ -3333,9 +3333,9 @@ var iso8859_6 = charmap{
}
// ISO8859_7 is the ISO 8859-7 encoding.
-var ISO8859_7 encoding.Encoding = &iso8859_7
+var ISO8859_7 *Charmap = &iso8859_7
-var iso8859_7 = charmap{
+var iso8859_7 = Charmap{
name: "ISO 8859-7",
mib: identifier.ISOLatinGreek,
asciiSuperset: true,
@@ -3508,9 +3508,9 @@ var iso8859_7 = charmap{
}
// ISO8859_8 is the ISO 8859-8 encoding.
-var ISO8859_8 encoding.Encoding = &iso8859_8
+var ISO8859_8 *Charmap = &iso8859_8
-var iso8859_8 = charmap{
+var iso8859_8 = Charmap{
name: "ISO 8859-8",
mib: identifier.ISOLatinHebrew,
asciiSuperset: true,
@@ -3683,9 +3683,9 @@ var iso8859_8 = charmap{
}
// ISO8859_9 is the ISO 8859-9 encoding.
-var ISO8859_9 encoding.Encoding = &iso8859_9
+var ISO8859_9 *Charmap = &iso8859_9
-var iso8859_9 = charmap{
+var iso8859_9 = Charmap{
name: "ISO 8859-9",
mib: identifier.ISOLatin5,
asciiSuperset: true,
@@ -3858,9 +3858,9 @@ var iso8859_9 = charmap{
}
// ISO8859_10 is the ISO 8859-10 encoding.
-var ISO8859_10 encoding.Encoding = &iso8859_10
+var ISO8859_10 *Charmap = &iso8859_10
-var iso8859_10 = charmap{
+var iso8859_10 = Charmap{
name: "ISO 8859-10",
mib: identifier.ISOLatin6,
asciiSuperset: true,
@@ -4033,9 +4033,9 @@ var iso8859_10 = charmap{
}
// ISO8859_13 is the ISO 8859-13 encoding.
-var ISO8859_13 encoding.Encoding = &iso8859_13
+var ISO8859_13 *Charmap = &iso8859_13
-var iso8859_13 = charmap{
+var iso8859_13 = Charmap{
name: "ISO 8859-13",
mib: identifier.ISO885913,
asciiSuperset: true,
@@ -4208,9 +4208,9 @@ var iso8859_13 = charmap{
}
// ISO8859_14 is the ISO 8859-14 encoding.
-var ISO8859_14 encoding.Encoding = &iso8859_14
+var ISO8859_14 *Charmap = &iso8859_14
-var iso8859_14 = charmap{
+var iso8859_14 = Charmap{
name: "ISO 8859-14",
mib: identifier.ISO885914,
asciiSuperset: true,
@@ -4383,9 +4383,9 @@ var iso8859_14 = charmap{
}
// ISO8859_15 is the ISO 8859-15 encoding.
-var ISO8859_15 encoding.Encoding = &iso8859_15
+var ISO8859_15 *Charmap = &iso8859_15
-var iso8859_15 = charmap{
+var iso8859_15 = Charmap{
name: "ISO 8859-15",
mib: identifier.ISO885915,
asciiSuperset: true,
@@ -4558,9 +4558,9 @@ var iso8859_15 = charmap{
}
// ISO8859_16 is the ISO 8859-16 encoding.
-var ISO8859_16 encoding.Encoding = &iso8859_16
+var ISO8859_16 *Charmap = &iso8859_16
-var iso8859_16 = charmap{
+var iso8859_16 = Charmap{
name: "ISO 8859-16",
mib: identifier.ISO885916,
asciiSuperset: true,
@@ -4733,9 +4733,9 @@ var iso8859_16 = charmap{
}
// KOI8R is the KOI8-R encoding.
-var KOI8R encoding.Encoding = &koi8R
+var KOI8R *Charmap = &koi8R
-var koi8R = charmap{
+var koi8R = Charmap{
name: "KOI8-R",
mib: identifier.KOI8R,
asciiSuperset: true,
@@ -4908,9 +4908,9 @@ var koi8R = charmap{
}
// KOI8U is the KOI8-U encoding.
-var KOI8U encoding.Encoding = &koi8U
+var KOI8U *Charmap = &koi8U
-var koi8U = charmap{
+var koi8U = Charmap{
name: "KOI8-U",
mib: identifier.KOI8U,
asciiSuperset: true,
@@ -5083,9 +5083,9 @@ var koi8U = charmap{
}
// Macintosh is the Macintosh encoding.
-var Macintosh encoding.Encoding = &macintosh
+var Macintosh *Charmap = &macintosh
-var macintosh = charmap{
+var macintosh = Charmap{
name: "Macintosh",
mib: identifier.Macintosh,
asciiSuperset: true,
@@ -5258,9 +5258,9 @@ var macintosh = charmap{
}
// MacintoshCyrillic is the Macintosh Cyrillic encoding.
-var MacintoshCyrillic encoding.Encoding = &macintoshCyrillic
+var MacintoshCyrillic *Charmap = &macintoshCyrillic
-var macintoshCyrillic = charmap{
+var macintoshCyrillic = Charmap{
name: "Macintosh Cyrillic",
mib: identifier.MacintoshCyrillic,
asciiSuperset: true,
@@ -5433,9 +5433,9 @@ var macintoshCyrillic = charmap{
}
// Windows874 is the Windows 874 encoding.
-var Windows874 encoding.Encoding = &windows874
+var Windows874 *Charmap = &windows874
-var windows874 = charmap{
+var windows874 = Charmap{
name: "Windows 874",
mib: identifier.Windows874,
asciiSuperset: true,
@@ -5608,9 +5608,9 @@ var windows874 = charmap{
}
// Windows1250 is the Windows 1250 encoding.
-var Windows1250 encoding.Encoding = &windows1250
+var Windows1250 *Charmap = &windows1250
-var windows1250 = charmap{
+var windows1250 = Charmap{
name: "Windows 1250",
mib: identifier.Windows1250,
asciiSuperset: true,
@@ -5783,9 +5783,9 @@ var windows1250 = charmap{
}
// Windows1251 is the Windows 1251 encoding.
-var Windows1251 encoding.Encoding = &windows1251
+var Windows1251 *Charmap = &windows1251
-var windows1251 = charmap{
+var windows1251 = Charmap{
name: "Windows 1251",
mib: identifier.Windows1251,
asciiSuperset: true,
@@ -5958,9 +5958,9 @@ var windows1251 = charmap{
}
// Windows1252 is the Windows 1252 encoding.
-var Windows1252 encoding.Encoding = &windows1252
+var Windows1252 *Charmap = &windows1252
-var windows1252 = charmap{
+var windows1252 = Charmap{
name: "Windows 1252",
mib: identifier.Windows1252,
asciiSuperset: true,
@@ -6133,9 +6133,9 @@ var windows1252 = charmap{
}
// Windows1253 is the Windows 1253 encoding.
-var Windows1253 encoding.Encoding = &windows1253
+var Windows1253 *Charmap = &windows1253
-var windows1253 = charmap{
+var windows1253 = Charmap{
name: "Windows 1253",
mib: identifier.Windows1253,
asciiSuperset: true,
@@ -6308,9 +6308,9 @@ var windows1253 = charmap{
}
// Windows1254 is the Windows 1254 encoding.
-var Windows1254 encoding.Encoding = &windows1254
+var Windows1254 *Charmap = &windows1254
-var windows1254 = charmap{
+var windows1254 = Charmap{
name: "Windows 1254",
mib: identifier.Windows1254,
asciiSuperset: true,
@@ -6483,9 +6483,9 @@ var windows1254 = charmap{
}
// Windows1255 is the Windows 1255 encoding.
-var Windows1255 encoding.Encoding = &windows1255
+var Windows1255 *Charmap = &windows1255
-var windows1255 = charmap{
+var windows1255 = Charmap{
name: "Windows 1255",
mib: identifier.Windows1255,
asciiSuperset: true,
@@ -6593,7 +6593,7 @@ var windows1255 = charmap{
{2, [3]byte{0xd6, 0xb4, 0x00}}, {2, [3]byte{0xd6, 0xb5, 0x00}},
{2, [3]byte{0xd6, 0xb6, 0x00}}, {2, [3]byte{0xd6, 0xb7, 0x00}},
{2, [3]byte{0xd6, 0xb8, 0x00}}, {2, [3]byte{0xd6, 0xb9, 0x00}},
- {3, [3]byte{0xef, 0xbf, 0xbd}}, {2, [3]byte{0xd6, 0xbb, 0x00}},
+ {2, [3]byte{0xd6, 0xba, 0x00}}, {2, [3]byte{0xd6, 0xbb, 0x00}},
{2, [3]byte{0xd6, 0xbc, 0x00}}, {2, [3]byte{0xd6, 0xbd, 0x00}},
{2, [3]byte{0xd6, 0xbe, 0x00}}, {2, [3]byte{0xd6, 0xbf, 0x00}},
{2, [3]byte{0xd7, 0x80, 0x00}}, {2, [3]byte{0xd7, 0x81, 0x00}},
@@ -6643,24 +6643,24 @@ var windows1255 = charmap{
0xb20000b2, 0xb30000b3, 0xb40000b4, 0xb50000b5, 0xb60000b6, 0xb70000b7, 0xb80000b8, 0xb90000b9,
0xbb0000bb, 0xbc0000bc, 0xbd0000bd, 0xbe0000be, 0xbf0000bf, 0xaa0000d7, 0xba0000f7, 0x83000192,
0x880002c6, 0x980002dc, 0xc00005b0, 0xc10005b1, 0xc20005b2, 0xc30005b3, 0xc40005b4, 0xc50005b5,
- 0xc60005b6, 0xc70005b7, 0xc80005b8, 0xc90005b9, 0xcb0005bb, 0xcc0005bc, 0xcd0005bd, 0xce0005be,
- 0xcf0005bf, 0xd00005c0, 0xd10005c1, 0xd20005c2, 0xd30005c3, 0xe00005d0, 0xe10005d1, 0xe20005d2,
- 0xe30005d3, 0xe40005d4, 0xe50005d5, 0xe60005d6, 0xe70005d7, 0xe80005d8, 0xe90005d9, 0xea0005da,
- 0xeb0005db, 0xec0005dc, 0xed0005dd, 0xee0005de, 0xef0005df, 0xf00005e0, 0xf10005e1, 0xf20005e2,
- 0xf30005e3, 0xf40005e4, 0xf50005e5, 0xf60005e6, 0xf70005e7, 0xf80005e8, 0xf90005e9, 0xfa0005ea,
- 0xd40005f0, 0xd50005f1, 0xd60005f2, 0xd70005f3, 0xd80005f4, 0xfd00200e, 0xfe00200f, 0x96002013,
- 0x97002014, 0x91002018, 0x92002019, 0x8200201a, 0x9300201c, 0x9400201d, 0x8400201e, 0x86002020,
- 0x87002021, 0x95002022, 0x85002026, 0x89002030, 0x8b002039, 0x9b00203a, 0xa40020aa, 0x800020ac,
- 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122,
+ 0xc60005b6, 0xc70005b7, 0xc80005b8, 0xc90005b9, 0xca0005ba, 0xcb0005bb, 0xcc0005bc, 0xcd0005bd,
+ 0xce0005be, 0xcf0005bf, 0xd00005c0, 0xd10005c1, 0xd20005c2, 0xd30005c3, 0xe00005d0, 0xe10005d1,
+ 0xe20005d2, 0xe30005d3, 0xe40005d4, 0xe50005d5, 0xe60005d6, 0xe70005d7, 0xe80005d8, 0xe90005d9,
+ 0xea0005da, 0xeb0005db, 0xec0005dc, 0xed0005dd, 0xee0005de, 0xef0005df, 0xf00005e0, 0xf10005e1,
+ 0xf20005e2, 0xf30005e3, 0xf40005e4, 0xf50005e5, 0xf60005e6, 0xf70005e7, 0xf80005e8, 0xf90005e9,
+ 0xfa0005ea, 0xd40005f0, 0xd50005f1, 0xd60005f2, 0xd70005f3, 0xd80005f4, 0xfd00200e, 0xfe00200f,
+ 0x96002013, 0x97002014, 0x91002018, 0x92002019, 0x8200201a, 0x9300201c, 0x9400201d, 0x8400201e,
+ 0x86002020, 0x87002021, 0x95002022, 0x85002026, 0x89002030, 0x8b002039, 0x9b00203a, 0xa40020aa,
+ 0x800020ac, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122,
0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122,
0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122, 0x99002122,
},
}
// Windows1256 is the Windows 1256 encoding.
-var Windows1256 encoding.Encoding = &windows1256
+var Windows1256 *Charmap = &windows1256
-var windows1256 = charmap{
+var windows1256 = Charmap{
name: "Windows 1256",
mib: identifier.Windows1256,
asciiSuperset: true,
@@ -6833,9 +6833,9 @@ var windows1256 = charmap{
}
// Windows1257 is the Windows 1257 encoding.
-var Windows1257 encoding.Encoding = &windows1257
+var Windows1257 *Charmap = &windows1257
-var windows1257 = charmap{
+var windows1257 = Charmap{
name: "Windows 1257",
mib: identifier.Windows1257,
asciiSuperset: true,
@@ -7008,9 +7008,9 @@ var windows1257 = charmap{
}
// Windows1258 is the Windows 1258 encoding.
-var Windows1258 encoding.Encoding = &windows1258
+var Windows1258 *Charmap = &windows1258
-var windows1258 = charmap{
+var windows1258 = Charmap{
name: "Windows 1258",
mib: identifier.Windows1258,
asciiSuperset: true,
@@ -7185,9 +7185,9 @@ var windows1258 = charmap{
// XUserDefined is the X-User-Defined encoding.
//
// It is defined at http://encoding.spec.whatwg.org/#x-user-defined
-var XUserDefined encoding.Encoding = &xUserDefined
+var XUserDefined *Charmap = &xUserDefined
-var xUserDefined = charmap{
+var xUserDefined = Charmap{
name: "X-User-Defined",
mib: identifier.XUserDefined,
asciiSuperset: true,
diff --git a/vendor/golang.org/x/text/encoding/encoding.go b/vendor/golang.org/x/text/encoding/encoding.go
index 221f175c0..a0bd7cd4d 100644
--- a/vendor/golang.org/x/text/encoding/encoding.go
+++ b/vendor/golang.org/x/text/encoding/encoding.go
@@ -124,7 +124,7 @@ func (e *Encoder) Writer(w io.Writer) io.Writer {
}
// ASCIISub is the ASCII substitute character, as recommended by
-// http://unicode.org/reports/tr36/#Text_Comparison
+// https://unicode.org/reports/tr36/#Text_Comparison
const ASCIISub = '\x1a'
// Nop is the nop encoding. Its transformed bytes are the same as the source
diff --git a/vendor/golang.org/x/text/encoding/htmlindex/gen.go b/vendor/golang.org/x/text/encoding/htmlindex/gen.go
deleted file mode 100644
index 80a52f0d9..000000000
--- a/vendor/golang.org/x/text/encoding/htmlindex/gen.go
+++ /dev/null
@@ -1,170 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-import (
- "bytes"
- "encoding/json"
- "fmt"
- "log"
- "strings"
-
- "golang.org/x/text/internal/gen"
-)
-
-type group struct {
- Encodings []struct {
- Labels []string
- Name string
- }
-}
-
-func main() {
- gen.Init()
-
- r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
- var groups []group
- if err := json.NewDecoder(r).Decode(&groups); err != nil {
- log.Fatalf("Error reading encodings.json: %v", err)
- }
-
- w := &bytes.Buffer{}
- fmt.Fprintln(w, "type htmlEncoding byte")
- fmt.Fprintln(w, "const (")
- for i, g := range groups {
- for _, e := range g.Encodings {
- key := strings.ToLower(e.Name)
- name := consts[key]
- if name == "" {
- log.Fatalf("No const defined for %s.", key)
- }
- if i == 0 {
- fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
- } else {
- fmt.Fprintf(w, "%s\n", name)
- }
- }
- }
- fmt.Fprintln(w, "numEncodings")
- fmt.Fprint(w, ")\n\n")
-
- fmt.Fprintln(w, "var canonical = [numEncodings]string{")
- for _, g := range groups {
- for _, e := range g.Encodings {
- fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
- }
- }
- fmt.Fprint(w, "}\n\n")
-
- fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
- for _, g := range groups {
- for _, e := range g.Encodings {
- for _, l := range e.Labels {
- key := strings.ToLower(e.Name)
- name := consts[key]
- fmt.Fprintf(w, "%q: %s,\n", l, name)
- }
- }
- }
- fmt.Fprint(w, "}\n\n")
-
- var tags []string
- fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
- for _, loc := range locales {
- tags = append(tags, loc.tag)
- fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
- }
- fmt.Fprint(w, "}\n\n")
-
- fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
-
- gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
-}
-
-// consts maps canonical encoding name to internal constant.
-var consts = map[string]string{
- "utf-8": "utf8",
- "ibm866": "ibm866",
- "iso-8859-2": "iso8859_2",
- "iso-8859-3": "iso8859_3",
- "iso-8859-4": "iso8859_4",
- "iso-8859-5": "iso8859_5",
- "iso-8859-6": "iso8859_6",
- "iso-8859-7": "iso8859_7",
- "iso-8859-8": "iso8859_8",
- "iso-8859-8-i": "iso8859_8I",
- "iso-8859-10": "iso8859_10",
- "iso-8859-13": "iso8859_13",
- "iso-8859-14": "iso8859_14",
- "iso-8859-15": "iso8859_15",
- "iso-8859-16": "iso8859_16",
- "koi8-r": "koi8r",
- "koi8-u": "koi8u",
- "macintosh": "macintosh",
- "windows-874": "windows874",
- "windows-1250": "windows1250",
- "windows-1251": "windows1251",
- "windows-1252": "windows1252",
- "windows-1253": "windows1253",
- "windows-1254": "windows1254",
- "windows-1255": "windows1255",
- "windows-1256": "windows1256",
- "windows-1257": "windows1257",
- "windows-1258": "windows1258",
- "x-mac-cyrillic": "macintoshCyrillic",
- "gbk": "gbk",
- "gb18030": "gb18030",
- // "hz-gb-2312": "hzgb2312", // Was removed from WhatWG
- "big5": "big5",
- "euc-jp": "eucjp",
- "iso-2022-jp": "iso2022jp",
- "shift_jis": "shiftJIS",
- "euc-kr": "euckr",
- "replacement": "replacement",
- "utf-16be": "utf16be",
- "utf-16le": "utf16le",
- "x-user-defined": "xUserDefined",
-}
-
-// locales is taken from
-// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
-var locales = []struct{ tag, name string }{
- {"und", "windows-1252"}, // The default value.
- {"ar", "windows-1256"},
- {"ba", "windows-1251"},
- {"be", "windows-1251"},
- {"bg", "windows-1251"},
- {"cs", "windows-1250"},
- {"el", "iso-8859-7"},
- {"et", "windows-1257"},
- {"fa", "windows-1256"},
- {"he", "windows-1255"},
- {"hr", "windows-1250"},
- {"hu", "iso-8859-2"},
- {"ja", "shift_jis"},
- {"kk", "windows-1251"},
- {"ko", "euc-kr"},
- {"ku", "windows-1254"},
- {"ky", "windows-1251"},
- {"lt", "windows-1257"},
- {"lv", "windows-1257"},
- {"mk", "windows-1251"},
- {"pl", "iso-8859-2"},
- {"ru", "windows-1251"},
- {"sah", "windows-1251"},
- {"sk", "windows-1250"},
- {"sl", "iso-8859-2"},
- {"sr", "windows-1251"},
- {"tg", "windows-1251"},
- {"th", "windows-874"},
- {"tr", "windows-1254"},
- {"tt", "windows-1251"},
- {"uk", "windows-1251"},
- {"vi", "windows-1258"},
- {"zh-hans", "gb18030"},
- {"zh-hant", "big5"},
-}
diff --git a/vendor/golang.org/x/text/encoding/htmlindex/htmlindex.go b/vendor/golang.org/x/text/encoding/htmlindex/htmlindex.go
index 70f2ac4bc..bdc7d15dd 100644
--- a/vendor/golang.org/x/text/encoding/htmlindex/htmlindex.go
+++ b/vendor/golang.org/x/text/encoding/htmlindex/htmlindex.go
@@ -50,7 +50,7 @@ func LanguageDefault(tag language.Tag) string {
for _, t := range strings.Split(locales, " ") {
tags = append(tags, language.MustParse(t))
}
- matcher = language.NewMatcher(tags)
+ matcher = language.NewMatcher(tags, language.PreferSameScript(true))
})
_, i, _ := matcher.Match(tag)
return canonical[localeMap[i]] // Default is Windows-1252.
diff --git a/vendor/golang.org/x/text/encoding/htmlindex/tables.go b/vendor/golang.org/x/text/encoding/htmlindex/tables.go
index 78950d3c3..f074e2c6d 100644
--- a/vendor/golang.org/x/text/encoding/htmlindex/tables.go
+++ b/vendor/golang.org/x/text/encoding/htmlindex/tables.go
@@ -1,4 +1,4 @@
-// This file was generated by go generate; DO NOT EDIT
+// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package htmlindex
@@ -306,6 +306,7 @@ var nameMap = map[string]htmlEncoding{
"iso-2022-cn": replacement,
"iso-2022-cn-ext": replacement,
"iso-2022-kr": replacement,
+ "replacement": replacement,
"utf-16be": utf16be,
"utf-16": utf16le,
"utf-16le": utf16le,
@@ -313,7 +314,7 @@ var nameMap = map[string]htmlEncoding{
}
var localeMap = []htmlEncoding{
- windows1252, // und
+ windows1252, // und_Latn
windows1256, // ar
windows1251, // ba
windows1251, // be
@@ -349,4 +350,4 @@ var localeMap = []htmlEncoding{
big5, // zh-hant
}
-const locales = "und ar ba be bg cs el et fa he hr hu ja kk ko ku ky lt lv mk pl ru sah sk sl sr tg th tr tt uk vi zh-hans zh-hant"
+const locales = "und_Latn ar ba be bg cs el et fa he hr hu ja kk ko ku ky lt lv mk pl ru sah sk sl sr tg th tr tt uk vi zh-hans zh-hant"
diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/gen.go b/vendor/golang.org/x/text/encoding/internal/identifier/gen.go
deleted file mode 100644
index 0c8eba7e5..000000000
--- a/vendor/golang.org/x/text/encoding/internal/identifier/gen.go
+++ /dev/null
@@ -1,137 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-import (
- "bytes"
- "encoding/xml"
- "fmt"
- "io"
- "log"
- "strings"
-
- "golang.org/x/text/internal/gen"
-)
-
-type registry struct {
- XMLName xml.Name `xml:"registry"`
- Updated string `xml:"updated"`
- Registry []struct {
- ID string `xml:"id,attr"`
- Record []struct {
- Name string `xml:"name"`
- Xref []struct {
- Type string `xml:"type,attr"`
- Data string `xml:"data,attr"`
- } `xml:"xref"`
- Desc struct {
- Data string `xml:",innerxml"`
- // Any []struct {
- // Data string `xml:",chardata"`
- // } `xml:",any"`
- // Data string `xml:",chardata"`
- } `xml:"description,"`
- MIB string `xml:"value"`
- Alias []string `xml:"alias"`
- MIME string `xml:"preferred_alias"`
- } `xml:"record"`
- } `xml:"registry"`
-}
-
-func main() {
- r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
- reg := &registry{}
- if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
- log.Fatalf("Error decoding charset registry: %v", err)
- }
- if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
- log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
- }
-
- w := &bytes.Buffer{}
- fmt.Fprintf(w, "const (\n")
- for _, rec := range reg.Registry[0].Record {
- constName := ""
- for _, a := range rec.Alias {
- if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 {
- // Some of the constant definitions have comments in them. Strip those.
- constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0])
- }
- }
- if constName == "" {
- switch rec.MIB {
- case "2085":
- constName = "HZGB2312" // Not listed as alias for some reason.
- default:
- log.Fatalf("No cs alias defined for %s.", rec.MIB)
- }
- }
- if rec.MIME != "" {
- rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME)
- }
- fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME)
- if len(rec.Desc.Data) > 0 {
- fmt.Fprint(w, "// ")
- d := xml.NewDecoder(strings.NewReader(rec.Desc.Data))
- inElem := true
- attr := ""
- for {
- t, err := d.Token()
- if err != nil {
- if err != io.EOF {
- log.Fatal(err)
- }
- break
- }
- switch x := t.(type) {
- case xml.CharData:
- attr = "" // Don't need attribute info.
- a := bytes.Split([]byte(x), []byte("\n"))
- for i, b := range a {
- if b = bytes.TrimSpace(b); len(b) != 0 {
- if !inElem && i > 0 {
- fmt.Fprint(w, "\n// ")
- }
- inElem = false
- fmt.Fprintf(w, "%s ", string(b))
- }
- }
- case xml.StartElement:
- if x.Name.Local == "xref" {
- inElem = true
- use := false
- for _, a := range x.Attr {
- if a.Name.Local == "type" {
- use = use || a.Value != "person"
- }
- if a.Name.Local == "data" && use {
- attr = a.Value + " "
- }
- }
- }
- case xml.EndElement:
- inElem = false
- fmt.Fprint(w, attr)
- }
- }
- fmt.Fprint(w, "\n")
- }
- for _, x := range rec.Xref {
- switch x.Type {
- case "rfc":
- fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data))
- case "uri":
- fmt.Fprintf(w, "// Reference: %s\n", x.Data)
- }
- }
- fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB)
- fmt.Fprintln(w)
- }
- fmt.Fprintln(w, ")")
-
- gen.WriteGoFile("mib.go", "identifier", w.Bytes())
-}
diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/identifier.go b/vendor/golang.org/x/text/encoding/internal/identifier/identifier.go
index 7351b4ef8..5c9b85c28 100644
--- a/vendor/golang.org/x/text/encoding/internal/identifier/identifier.go
+++ b/vendor/golang.org/x/text/encoding/internal/identifier/identifier.go
@@ -34,7 +34,7 @@ package identifier
// - http://www.iana.org/assignments/character-sets/character-sets.xhtml
// - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib
// - http://www.ietf.org/rfc/rfc2978.txt
-// - http://www.unicode.org/reports/tr22/
+// - https://www.unicode.org/reports/tr22/
// - http://www.w3.org/TR/encoding/
// - https://encoding.spec.whatwg.org/
// - https://encoding.spec.whatwg.org/encodings.json
diff --git a/vendor/golang.org/x/text/encoding/internal/identifier/mib.go b/vendor/golang.org/x/text/encoding/internal/identifier/mib.go
index 915abfa29..8cc29021c 100644
--- a/vendor/golang.org/x/text/encoding/internal/identifier/mib.go
+++ b/vendor/golang.org/x/text/encoding/internal/identifier/mib.go
@@ -1,4 +1,4 @@
-// This file was generated by go generate; DO NOT EDIT
+// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package identifier
@@ -884,27 +884,27 @@ const (
// CESU8 is the MIB identifier with IANA name CESU-8.
//
- // http://www.unicode.org/unicode/reports/tr26
+ // https://www.unicode.org/unicode/reports/tr26
CESU8 MIB = 1016
// UTF32 is the MIB identifier with IANA name UTF-32.
//
- // http://www.unicode.org/unicode/reports/tr19/
+ // https://www.unicode.org/unicode/reports/tr19/
UTF32 MIB = 1017
// UTF32BE is the MIB identifier with IANA name UTF-32BE.
//
- // http://www.unicode.org/unicode/reports/tr19/
+ // https://www.unicode.org/unicode/reports/tr19/
UTF32BE MIB = 1018
// UTF32LE is the MIB identifier with IANA name UTF-32LE.
//
- // http://www.unicode.org/unicode/reports/tr19/
+ // https://www.unicode.org/unicode/reports/tr19/
UTF32LE MIB = 1019
// BOCU1 is the MIB identifier with IANA name BOCU-1.
//
- // http://www.unicode.org/notes/tn6/
+ // https://www.unicode.org/notes/tn6/
BOCU1 MIB = 1020
// Windows30Latin1 is the MIB identifier with IANA name ISO-8859-1-Windows-3.0-Latin-1.
diff --git a/vendor/golang.org/x/text/encoding/japanese/eucjp.go b/vendor/golang.org/x/text/encoding/japanese/eucjp.go
index 40f9b05f3..79313fa58 100644
--- a/vendor/golang.org/x/text/encoding/japanese/eucjp.go
+++ b/vendor/golang.org/x/text/encoding/japanese/eucjp.go
@@ -5,7 +5,6 @@
package japanese
import (
- "errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@@ -23,10 +22,9 @@ var eucJP = internal.Encoding{
identifier.EUCPkdFmtJapanese,
}
-var errInvalidEUCJP = errors.New("japanese: invalid EUC-JP encoding")
-
type eucJPDecoder struct{ transform.NopResetter }
+// See https://encoding.spec.whatwg.org/#euc-jp-decoder.
func (eucJPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
r, size := rune(0), 0
loop:
@@ -37,60 +35,79 @@ loop:
case c0 == 0x8e:
if nSrc+1 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ r, size = utf8.RuneError, 1
+ break
}
c1 := src[nSrc+1]
- if c1 < 0xa1 || 0xdf < c1 {
- err = errInvalidEUCJP
- break loop
+ switch {
+ case c1 < 0xa1:
+ r, size = utf8.RuneError, 1
+ case c1 > 0xdf:
+ r, size = utf8.RuneError, 2
+ if c1 == 0xff {
+ size = 1
+ }
+ default:
+ r, size = rune(c1)+(0xff61-0xa1), 2
}
- r, size = rune(c1)+(0xff61-0xa1), 2
-
case c0 == 0x8f:
if nSrc+2 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ r, size = utf8.RuneError, 1
+ if p := nSrc + 1; p < len(src) && 0xa1 <= src[p] && src[p] < 0xfe {
+ size = 2
+ }
+ break
}
c1 := src[nSrc+1]
if c1 < 0xa1 || 0xfe < c1 {
- err = errInvalidEUCJP
- break loop
+ r, size = utf8.RuneError, 1
+ break
}
c2 := src[nSrc+2]
if c2 < 0xa1 || 0xfe < c2 {
- err = errInvalidEUCJP
- break loop
+ r, size = utf8.RuneError, 2
+ break
}
- r, size = '\ufffd', 3
+ r, size = utf8.RuneError, 3
if i := int(c1-0xa1)*94 + int(c2-0xa1); i < len(jis0212Decode) {
r = rune(jis0212Decode[i])
if r == 0 {
- r = '\ufffd'
+ r = utf8.RuneError
}
}
case 0xa1 <= c0 && c0 <= 0xfe:
if nSrc+1 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ r, size = utf8.RuneError, 1
+ break
}
c1 := src[nSrc+1]
if c1 < 0xa1 || 0xfe < c1 {
- err = errInvalidEUCJP
- break loop
+ r, size = utf8.RuneError, 1
+ break
}
- r, size = '\ufffd', 2
+ r, size = utf8.RuneError, 2
if i := int(c0-0xa1)*94 + int(c1-0xa1); i < len(jis0208Decode) {
r = rune(jis0208Decode[i])
if r == 0 {
- r = '\ufffd'
+ r = utf8.RuneError
}
}
default:
- err = errInvalidEUCJP
- break loop
+ r, size = utf8.RuneError, 1
}
if nDst+utf8.RuneLen(r) > len(dst) {
@@ -99,9 +116,6 @@ loop:
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
- if atEOF && err == transform.ErrShortSrc {
- err = errInvalidEUCJP
- }
return nDst, nSrc, err
}
diff --git a/vendor/golang.org/x/text/encoding/japanese/iso2022jp.go b/vendor/golang.org/x/text/encoding/japanese/iso2022jp.go
index b63e7d5d8..613226df5 100644
--- a/vendor/golang.org/x/text/encoding/japanese/iso2022jp.go
+++ b/vendor/golang.org/x/text/encoding/japanese/iso2022jp.go
@@ -5,7 +5,6 @@
package japanese
import (
- "errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@@ -31,8 +30,6 @@ func iso2022JPNewEncoder() transform.Transformer {
return new(iso2022JPEncoder)
}
-var errInvalidISO2022JP = errors.New("japanese: invalid ISO-2022-JP encoding")
-
const (
asciiState = iota
katakanaState
@@ -50,45 +47,51 @@ func (d *iso2022JPDecoder) Reset() {
func (d *iso2022JPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
r, size := rune(0), 0
-loop:
for ; nSrc < len(src); nSrc += size {
c0 := src[nSrc]
if c0 >= utf8.RuneSelf {
- err = errInvalidISO2022JP
- break loop
+ r, size = '\ufffd', 1
+ goto write
}
if c0 == asciiEsc {
if nSrc+2 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ return nDst, nSrc, transform.ErrShortSrc
+ }
+ // TODO: is it correct to only skip 1??
+ r, size = '\ufffd', 1
+ goto write
}
size = 3
c1 := src[nSrc+1]
c2 := src[nSrc+2]
switch {
- case c1 == '$' && (c2 == '@' || c2 == 'B'):
+ case c1 == '$' && (c2 == '@' || c2 == 'B'): // 0x24 {0x40, 0x42}
*d = jis0208State
continue
- case c1 == '$' && c2 == '(':
+ case c1 == '$' && c2 == '(': // 0x24 0x28
if nSrc+3 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ return nDst, nSrc, transform.ErrShortSrc
+ }
+ r, size = '\ufffd', 1
+ goto write
}
size = 4
- if src[nSrc]+3 == 'D' {
+ if src[nSrc+3] == 'D' {
*d = jis0212State
continue
}
- case c1 == '(' && (c2 == 'B' || c2 == 'J'):
+ case c1 == '(' && (c2 == 'B' || c2 == 'J'): // 0x28 {0x42, 0x4A}
*d = asciiState
continue
- case c1 == '(' && c2 == 'I':
+ case c1 == '(' && c2 == 'I': // 0x28 0x49
*d = katakanaState
continue
}
- err = errInvalidISO2022JP
- break loop
+ r, size = '\ufffd', 1
+ goto write
}
switch *d {
@@ -97,8 +100,8 @@ loop:
case katakanaState:
if c0 < 0x21 || 0x60 <= c0 {
- err = errInvalidISO2022JP
- break loop
+ r, size = '\ufffd', 1
+ goto write
}
r, size = rune(c0)+(0xff61-0x21), 1
@@ -106,11 +109,14 @@ loop:
if c0 == 0x0a {
*d = asciiState
r, size = rune(c0), 1
- break
+ goto write
}
if nSrc+1 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ return nDst, nSrc, transform.ErrShortSrc
+ }
+ r, size = '\ufffd', 1
+ goto write
}
size = 2
c1 := src[nSrc+1]
@@ -121,22 +127,19 @@ loop:
r = rune(jis0212Decode[i])
} else {
r = '\ufffd'
- break
+ goto write
}
if r == 0 {
r = '\ufffd'
}
}
+ write:
if nDst+utf8.RuneLen(r) > len(dst) {
- err = transform.ErrShortDst
- break loop
+ return nDst, nSrc, transform.ErrShortDst
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
- if atEOF && err == transform.ErrShortSrc {
- err = errInvalidISO2022JP
- }
return nDst, nSrc, err
}
diff --git a/vendor/golang.org/x/text/encoding/japanese/maketables.go b/vendor/golang.org/x/text/encoding/japanese/maketables.go
deleted file mode 100644
index d6c10deb0..000000000
--- a/vendor/golang.org/x/text/encoding/japanese/maketables.go
+++ /dev/null
@@ -1,161 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-// This program generates tables.go:
-// go run maketables.go | gofmt > tables.go
-
-// TODO: Emoji extensions?
-// http://www.unicode.org/faq/emoji_dingbats.html
-// http://www.unicode.org/Public/UNIDATA/EmojiSources.txt
-
-import (
- "bufio"
- "fmt"
- "log"
- "net/http"
- "sort"
- "strings"
-)
-
-type entry struct {
- jisCode, table int
-}
-
-func main() {
- fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
- fmt.Printf("// Package japanese provides Japanese encodings such as EUC-JP and Shift JIS.\n")
- fmt.Printf(`package japanese // import "golang.org/x/text/encoding/japanese"` + "\n\n")
-
- reverse := [65536]entry{}
- for i := range reverse {
- reverse[i].table = -1
- }
-
- tables := []struct {
- url string
- name string
- }{
- {"http://encoding.spec.whatwg.org/index-jis0208.txt", "0208"},
- {"http://encoding.spec.whatwg.org/index-jis0212.txt", "0212"},
- }
- for i, table := range tables {
- res, err := http.Get(table.url)
- if err != nil {
- log.Fatalf("%q: Get: %v", table.url, err)
- }
- defer res.Body.Close()
-
- mapping := [65536]uint16{}
-
- scanner := bufio.NewScanner(res.Body)
- for scanner.Scan() {
- s := strings.TrimSpace(scanner.Text())
- if s == "" || s[0] == '#' {
- continue
- }
- x, y := 0, uint16(0)
- if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
- log.Fatalf("%q: could not parse %q", table.url, s)
- }
- if x < 0 || 120*94 <= x {
- log.Fatalf("%q: JIS code %d is out of range", table.url, x)
- }
- mapping[x] = y
- if reverse[y].table == -1 {
- reverse[y] = entry{jisCode: x, table: i}
- }
- }
- if err := scanner.Err(); err != nil {
- log.Fatalf("%q: scanner error: %v", table.url, err)
- }
-
- fmt.Printf("// jis%sDecode is the decoding table from JIS %s code to Unicode.\n// It is defined at %s\n",
- table.name, table.name, table.url)
- fmt.Printf("var jis%sDecode = [...]uint16{\n", table.name)
- for i, m := range mapping {
- if m != 0 {
- fmt.Printf("\t%d: 0x%04X,\n", i, m)
- }
- }
- fmt.Printf("}\n\n")
- }
-
- // Any run of at least separation continuous zero entries in the reverse map will
- // be a separate encode table.
- const separation = 1024
-
- intervals := []interval(nil)
- low, high := -1, -1
- for i, v := range reverse {
- if v.table == -1 {
- continue
- }
- if low < 0 {
- low = i
- } else if i-high >= separation {
- if high >= 0 {
- intervals = append(intervals, interval{low, high})
- }
- low = i
- }
- high = i + 1
- }
- if high >= 0 {
- intervals = append(intervals, interval{low, high})
- }
- sort.Sort(byDecreasingLength(intervals))
-
- fmt.Printf("const (\n")
- fmt.Printf("\tjis0208 = 1\n")
- fmt.Printf("\tjis0212 = 2\n")
- fmt.Printf("\tcodeMask = 0x7f\n")
- fmt.Printf("\tcodeShift = 7\n")
- fmt.Printf("\ttableShift = 14\n")
- fmt.Printf(")\n\n")
-
- fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
- fmt.Printf("// encodeX are the encoding tables from Unicode to JIS code,\n")
- fmt.Printf("// sorted by decreasing length.\n")
- for i, v := range intervals {
- fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
- }
- fmt.Printf("//\n")
- fmt.Printf("// The high two bits of the value record whether the JIS code comes from the\n")
- fmt.Printf("// JIS0208 table (high bits == 1) or the JIS0212 table (high bits == 2).\n")
- fmt.Printf("// The low 14 bits are two 7-bit unsigned integers j1 and j2 that form the\n")
- fmt.Printf("// JIS code (94*j1 + j2) within that table.\n")
- fmt.Printf("\n")
-
- for i, v := range intervals {
- fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
- fmt.Printf("var encode%d = [...]uint16{\n", i)
- for j := v.low; j < v.high; j++ {
- x := reverse[j]
- if x.table == -1 {
- continue
- }
- fmt.Printf("\t%d - %d: jis%s<<14 | 0x%02X<<7 | 0x%02X,\n",
- j, v.low, tables[x.table].name, x.jisCode/94, x.jisCode%94)
- }
- fmt.Printf("}\n\n")
- }
-}
-
-// interval is a half-open interval [low, high).
-type interval struct {
- low, high int
-}
-
-func (i interval) len() int { return i.high - i.low }
-
-// byDecreasingLength sorts intervals by decreasing length.
-type byDecreasingLength []interval
-
-func (b byDecreasingLength) Len() int { return len(b) }
-func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
-func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
diff --git a/vendor/golang.org/x/text/encoding/japanese/shiftjis.go b/vendor/golang.org/x/text/encoding/japanese/shiftjis.go
index 099aecc31..16fd8a6e3 100644
--- a/vendor/golang.org/x/text/encoding/japanese/shiftjis.go
+++ b/vendor/golang.org/x/text/encoding/japanese/shiftjis.go
@@ -5,7 +5,6 @@
package japanese
import (
- "errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@@ -24,8 +23,6 @@ var shiftJIS = internal.Encoding{
identifier.ShiftJIS,
}
-var errInvalidShiftJIS = errors.New("japanese: invalid Shift JIS encoding")
-
type shiftJISDecoder struct{ transform.NopResetter }
func (shiftJISDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@@ -48,28 +45,32 @@ loop:
c0 = 2*c0 - 0x21
if nSrc+1 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ r, size = '\ufffd', 1
+ goto write
}
c1 := src[nSrc+1]
switch {
case c1 < 0x40:
- err = errInvalidShiftJIS
- break loop
+ r, size = '\ufffd', 1 // c1 is ASCII so output on next round
+ goto write
case c1 < 0x7f:
c0--
c1 -= 0x40
case c1 == 0x7f:
- err = errInvalidShiftJIS
- break loop
+ r, size = '\ufffd', 1 // c1 is ASCII so output on next round
+ goto write
case c1 < 0x9f:
c0--
c1 -= 0x41
case c1 < 0xfd:
c1 -= 0x9f
default:
- err = errInvalidShiftJIS
- break loop
+ r, size = '\ufffd', 2
+ goto write
}
r, size = '\ufffd', 2
if i := int(c0)*94 + int(c1); i < len(jis0208Decode) {
@@ -79,20 +80,19 @@ loop:
}
}
+ case c0 == 0x80:
+ r, size = 0x80, 1
+
default:
- err = errInvalidShiftJIS
- break loop
+ r, size = '\ufffd', 1
}
-
+ write:
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
- if atEOF && err == transform.ErrShortSrc {
- err = errInvalidShiftJIS
- }
return nDst, nSrc, err
}
diff --git a/vendor/golang.org/x/text/encoding/korean/euckr.go b/vendor/golang.org/x/text/encoding/korean/euckr.go
index a4b9ff178..034337f5d 100644
--- a/vendor/golang.org/x/text/encoding/korean/euckr.go
+++ b/vendor/golang.org/x/text/encoding/korean/euckr.go
@@ -5,7 +5,6 @@
package korean
import (
- "errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@@ -26,8 +25,6 @@ var eucKR = internal.Encoding{
identifier.EUCKR,
}
-var errInvalidEUCKR = errors.New("korean: invalid EUC-KR encoding")
-
type eucKRDecoder struct{ transform.NopResetter }
func (eucKRDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@@ -40,10 +37,15 @@ loop:
case 0x81 <= c0 && c0 < 0xff:
if nSrc+1 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ r, size = utf8.RuneError, 1
+ break
}
c1 := src[nSrc+1]
+ size = 2
if c0 < 0xc7 {
r = 178 * rune(c0-0x81)
switch {
@@ -54,39 +56,36 @@ loop:
case 0x81 <= c1 && c1 < 0xff:
r += rune(c1) - (0x81 - 2*26)
default:
- err = errInvalidEUCKR
- break loop
+ goto decError
}
} else if 0xa1 <= c1 && c1 < 0xff {
r = 178*(0xc7-0x81) + rune(c0-0xc7)*94 + rune(c1-0xa1)
} else {
- err = errInvalidEUCKR
- break loop
+ goto decError
}
if int(r) < len(decode) {
r = rune(decode[r])
- if r == 0 {
- r = '\ufffd'
+ if r != 0 {
+ break
}
- } else {
- r = '\ufffd'
}
- size = 2
+ decError:
+ r = utf8.RuneError
+ if c1 < utf8.RuneSelf {
+ size = 1
+ }
default:
- err = errInvalidEUCKR
- break loop
+ r, size = utf8.RuneError, 1
+ break
}
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
- break loop
+ break
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
- if atEOF && err == transform.ErrShortSrc {
- err = errInvalidEUCKR
- }
return nDst, nSrc, err
}
diff --git a/vendor/golang.org/x/text/encoding/korean/maketables.go b/vendor/golang.org/x/text/encoding/korean/maketables.go
deleted file mode 100644
index c84034fb6..000000000
--- a/vendor/golang.org/x/text/encoding/korean/maketables.go
+++ /dev/null
@@ -1,143 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-// This program generates tables.go:
-// go run maketables.go | gofmt > tables.go
-
-import (
- "bufio"
- "fmt"
- "log"
- "net/http"
- "sort"
- "strings"
-)
-
-func main() {
- fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
- fmt.Printf("// Package korean provides Korean encodings such as EUC-KR.\n")
- fmt.Printf(`package korean // import "golang.org/x/text/encoding/korean"` + "\n\n")
-
- res, err := http.Get("http://encoding.spec.whatwg.org/index-euc-kr.txt")
- if err != nil {
- log.Fatalf("Get: %v", err)
- }
- defer res.Body.Close()
-
- mapping := [65536]uint16{}
- reverse := [65536]uint16{}
-
- scanner := bufio.NewScanner(res.Body)
- for scanner.Scan() {
- s := strings.TrimSpace(scanner.Text())
- if s == "" || s[0] == '#' {
- continue
- }
- x, y := uint16(0), uint16(0)
- if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
- log.Fatalf("could not parse %q", s)
- }
- if x < 0 || 178*(0xc7-0x81)+(0xfe-0xc7)*94+(0xff-0xa1) <= x {
- log.Fatalf("EUC-KR code %d is out of range", x)
- }
- mapping[x] = y
- if reverse[y] == 0 {
- c0, c1 := uint16(0), uint16(0)
- if x < 178*(0xc7-0x81) {
- c0 = uint16(x/178) + 0x81
- c1 = uint16(x % 178)
- switch {
- case c1 < 1*26:
- c1 += 0x41
- case c1 < 2*26:
- c1 += 0x47
- default:
- c1 += 0x4d
- }
- } else {
- x -= 178 * (0xc7 - 0x81)
- c0 = uint16(x/94) + 0xc7
- c1 = uint16(x%94) + 0xa1
- }
- reverse[y] = c0<<8 | c1
- }
- }
- if err := scanner.Err(); err != nil {
- log.Fatalf("scanner error: %v", err)
- }
-
- fmt.Printf("// decode is the decoding table from EUC-KR code to Unicode.\n")
- fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-euc-kr.txt\n")
- fmt.Printf("var decode = [...]uint16{\n")
- for i, v := range mapping {
- if v != 0 {
- fmt.Printf("\t%d: 0x%04X,\n", i, v)
- }
- }
- fmt.Printf("}\n\n")
-
- // Any run of at least separation continuous zero entries in the reverse map will
- // be a separate encode table.
- const separation = 1024
-
- intervals := []interval(nil)
- low, high := -1, -1
- for i, v := range reverse {
- if v == 0 {
- continue
- }
- if low < 0 {
- low = i
- } else if i-high >= separation {
- if high >= 0 {
- intervals = append(intervals, interval{low, high})
- }
- low = i
- }
- high = i + 1
- }
- if high >= 0 {
- intervals = append(intervals, interval{low, high})
- }
- sort.Sort(byDecreasingLength(intervals))
-
- fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
- fmt.Printf("// encodeX are the encoding tables from Unicode to EUC-KR code,\n")
- fmt.Printf("// sorted by decreasing length.\n")
- for i, v := range intervals {
- fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
- }
- fmt.Printf("\n")
-
- for i, v := range intervals {
- fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
- fmt.Printf("var encode%d = [...]uint16{\n", i)
- for j := v.low; j < v.high; j++ {
- x := reverse[j]
- if x == 0 {
- continue
- }
- fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
- }
- fmt.Printf("}\n\n")
- }
-}
-
-// interval is a half-open interval [low, high).
-type interval struct {
- low, high int
-}
-
-func (i interval) len() int { return i.high - i.low }
-
-// byDecreasingLength sorts intervals by decreasing length.
-type byDecreasingLength []interval
-
-func (b byDecreasingLength) Len() int { return len(b) }
-func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
-func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
diff --git a/vendor/golang.org/x/text/encoding/simplifiedchinese/gbk.go b/vendor/golang.org/x/text/encoding/simplifiedchinese/gbk.go
index e0b15bbcc..b89c45b03 100644
--- a/vendor/golang.org/x/text/encoding/simplifiedchinese/gbk.go
+++ b/vendor/golang.org/x/text/encoding/simplifiedchinese/gbk.go
@@ -5,7 +5,6 @@
package simplifiedchinese
import (
- "errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@@ -40,11 +39,6 @@ var gbk18030 = internal.Encoding{
identifier.GB18030,
}
-var (
- errInvalidGB18030 = errors.New("simplifiedchinese: invalid GB18030 encoding")
- errInvalidGBK = errors.New("simplifiedchinese: invalid GBK encoding")
-)
-
type gbkDecoder struct {
transform.NopResetter
gb18030 bool
@@ -66,8 +60,12 @@ loop:
case c0 < 0xff:
if nSrc+1 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ r, size = utf8.RuneError, 1
+ goto write
}
c1 := src[nSrc+1]
switch {
@@ -77,18 +75,24 @@ loop:
c1 -= 0x41
case d.gb18030 && 0x30 <= c1 && c1 < 0x40:
if nSrc+3 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ // The second byte here is always ASCII, so we can set size
+ // to 1 in all cases.
+ r, size = utf8.RuneError, 1
+ goto write
}
c2 := src[nSrc+2]
if c2 < 0x81 || 0xff <= c2 {
- err = errInvalidGB18030
- break loop
+ r, size = utf8.RuneError, 1
+ goto write
}
c3 := src[nSrc+3]
if c3 < 0x30 || 0x3a <= c3 {
- err = errInvalidGB18030
- break loop
+ r, size = utf8.RuneError, 1
+ goto write
}
size = 4
r = ((rune(c0-0x81)*10+rune(c1-0x30))*126+rune(c2-0x81))*10 + rune(c3-0x30)
@@ -109,17 +113,13 @@ loop:
r -= 189000
if 0 <= r && r < 0x100000 {
r += 0x10000
- goto write
- }
- err = errInvalidGB18030
- break loop
- default:
- if d.gb18030 {
- err = errInvalidGB18030
} else {
- err = errInvalidGBK
+ r, size = utf8.RuneError, 1
}
- break loop
+ goto write
+ default:
+ r, size = utf8.RuneError, 1
+ goto write
}
r, size = '\ufffd', 2
if i := int(c0-0x81)*190 + int(c1); i < len(decode) {
@@ -130,12 +130,7 @@ loop:
}
default:
- if d.gb18030 {
- err = errInvalidGB18030
- } else {
- err = errInvalidGBK
- }
- break loop
+ r, size = utf8.RuneError, 1
}
write:
@@ -145,13 +140,6 @@ loop:
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
- if atEOF && err == transform.ErrShortSrc {
- if d.gb18030 {
- err = errInvalidGB18030
- } else {
- err = errInvalidGBK
- }
- }
return nDst, nSrc, err
}
diff --git a/vendor/golang.org/x/text/encoding/simplifiedchinese/hzgb2312.go b/vendor/golang.org/x/text/encoding/simplifiedchinese/hzgb2312.go
index 85de6b1e6..eb3157f0b 100644
--- a/vendor/golang.org/x/text/encoding/simplifiedchinese/hzgb2312.go
+++ b/vendor/golang.org/x/text/encoding/simplifiedchinese/hzgb2312.go
@@ -5,7 +5,6 @@
package simplifiedchinese
import (
- "errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@@ -31,8 +30,6 @@ func hzGB2312NewEncoder() transform.Transformer {
return new(hzGB2312Encoder)
}
-var errInvalidHZGB2312 = errors.New("simplifiedchinese: invalid HZ-GB2312 encoding")
-
const (
asciiState = iota
gbState
@@ -50,14 +47,18 @@ loop:
for ; nSrc < len(src); nSrc += size {
c0 := src[nSrc]
if c0 >= utf8.RuneSelf {
- err = errInvalidHZGB2312
- break loop
+ r, size = utf8.RuneError, 1
+ goto write
}
if c0 == '~' {
if nSrc+1 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ r = utf8.RuneError
+ goto write
}
size = 2
switch src[nSrc+1] {
@@ -78,8 +79,8 @@ loop:
case '\n':
continue
default:
- err = errInvalidHZGB2312
- break loop
+ r = utf8.RuneError
+ goto write
}
}
@@ -87,33 +88,37 @@ loop:
r, size = rune(c0), 1
} else {
if nSrc+1 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ r, size = utf8.RuneError, 1
+ goto write
}
+ size = 2
c1 := src[nSrc+1]
if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {
- err = errInvalidHZGB2312
- break loop
- }
-
- r, size = '\ufffd', 2
- if i := int(c0-0x01)*190 + int(c1+0x3f); i < len(decode) {
+ // error
+ } else if i := int(c0-0x01)*190 + int(c1+0x3f); i < len(decode) {
r = rune(decode[i])
- if r == 0 {
- r = '\ufffd'
+ if r != 0 {
+ goto write
}
}
+ if c1 > utf8.RuneSelf {
+ // Be consistent and always treat non-ASCII as a single error.
+ size = 1
+ }
+ r = utf8.RuneError
}
+ write:
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
}
nDst += utf8.EncodeRune(dst[nDst:], r)
}
- if atEOF && err == transform.ErrShortSrc {
- err = errInvalidHZGB2312
- }
return nDst, nSrc, err
}
diff --git a/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go b/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go
deleted file mode 100644
index 55016c786..000000000
--- a/vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go
+++ /dev/null
@@ -1,161 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-// This program generates tables.go:
-// go run maketables.go | gofmt > tables.go
-
-import (
- "bufio"
- "fmt"
- "log"
- "net/http"
- "sort"
- "strings"
-)
-
-func main() {
- fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
- fmt.Printf("// Package simplifiedchinese provides Simplified Chinese encodings such as GBK.\n")
- fmt.Printf(`package simplifiedchinese // import "golang.org/x/text/encoding/simplifiedchinese"` + "\n\n")
-
- printGB18030()
- printGBK()
-}
-
-func printGB18030() {
- res, err := http.Get("http://encoding.spec.whatwg.org/index-gb18030.txt")
- if err != nil {
- log.Fatalf("Get: %v", err)
- }
- defer res.Body.Close()
-
- fmt.Printf("// gb18030 is the table from http://encoding.spec.whatwg.org/index-gb18030.txt\n")
- fmt.Printf("var gb18030 = [...][2]uint16{\n")
- scanner := bufio.NewScanner(res.Body)
- for scanner.Scan() {
- s := strings.TrimSpace(scanner.Text())
- if s == "" || s[0] == '#' {
- continue
- }
- x, y := uint32(0), uint32(0)
- if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
- log.Fatalf("could not parse %q", s)
- }
- if x < 0x10000 && y < 0x10000 {
- fmt.Printf("\t{0x%04x, 0x%04x},\n", x, y)
- }
- }
- fmt.Printf("}\n\n")
-}
-
-func printGBK() {
- res, err := http.Get("http://encoding.spec.whatwg.org/index-gbk.txt")
- if err != nil {
- log.Fatalf("Get: %v", err)
- }
- defer res.Body.Close()
-
- mapping := [65536]uint16{}
- reverse := [65536]uint16{}
-
- scanner := bufio.NewScanner(res.Body)
- for scanner.Scan() {
- s := strings.TrimSpace(scanner.Text())
- if s == "" || s[0] == '#' {
- continue
- }
- x, y := uint16(0), uint16(0)
- if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
- log.Fatalf("could not parse %q", s)
- }
- if x < 0 || 126*190 <= x {
- log.Fatalf("GBK code %d is out of range", x)
- }
- mapping[x] = y
- if reverse[y] == 0 {
- c0, c1 := x/190, x%190
- if c1 >= 0x3f {
- c1++
- }
- reverse[y] = (0x81+c0)<<8 | (0x40 + c1)
- }
- }
- if err := scanner.Err(); err != nil {
- log.Fatalf("scanner error: %v", err)
- }
-
- fmt.Printf("// decode is the decoding table from GBK code to Unicode.\n")
- fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-gbk.txt\n")
- fmt.Printf("var decode = [...]uint16{\n")
- for i, v := range mapping {
- if v != 0 {
- fmt.Printf("\t%d: 0x%04X,\n", i, v)
- }
- }
- fmt.Printf("}\n\n")
-
- // Any run of at least separation continuous zero entries in the reverse map will
- // be a separate encode table.
- const separation = 1024
-
- intervals := []interval(nil)
- low, high := -1, -1
- for i, v := range reverse {
- if v == 0 {
- continue
- }
- if low < 0 {
- low = i
- } else if i-high >= separation {
- if high >= 0 {
- intervals = append(intervals, interval{low, high})
- }
- low = i
- }
- high = i + 1
- }
- if high >= 0 {
- intervals = append(intervals, interval{low, high})
- }
- sort.Sort(byDecreasingLength(intervals))
-
- fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
- fmt.Printf("// encodeX are the encoding tables from Unicode to GBK code,\n")
- fmt.Printf("// sorted by decreasing length.\n")
- for i, v := range intervals {
- fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
- }
- fmt.Printf("\n")
-
- for i, v := range intervals {
- fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
- fmt.Printf("var encode%d = [...]uint16{\n", i)
- for j := v.low; j < v.high; j++ {
- x := reverse[j]
- if x == 0 {
- continue
- }
- fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
- }
- fmt.Printf("}\n\n")
- }
-}
-
-// interval is a half-open interval [low, high).
-type interval struct {
- low, high int
-}
-
-func (i interval) len() int { return i.high - i.low }
-
-// byDecreasingLength sorts intervals by decreasing length.
-type byDecreasingLength []interval
-
-func (b byDecreasingLength) Len() int { return len(b) }
-func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
-func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
diff --git a/vendor/golang.org/x/text/encoding/traditionalchinese/big5.go b/vendor/golang.org/x/text/encoding/traditionalchinese/big5.go
index 275821f5d..1fcddde08 100644
--- a/vendor/golang.org/x/text/encoding/traditionalchinese/big5.go
+++ b/vendor/golang.org/x/text/encoding/traditionalchinese/big5.go
@@ -5,7 +5,6 @@
package traditionalchinese
import (
- "errors"
"unicode/utf8"
"golang.org/x/text/encoding"
@@ -26,8 +25,6 @@ var big5 = internal.Encoding{
identifier.Big5,
}
-var errInvalidBig5 = errors.New("traditionalchinese: invalid Big5 encoding")
-
type big5Decoder struct{ transform.NopResetter }
func (big5Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
@@ -40,8 +37,12 @@ loop:
case 0x81 <= c0 && c0 < 0xff:
if nSrc+1 >= len(src) {
- err = transform.ErrShortSrc
- break loop
+ if !atEOF {
+ err = transform.ErrShortSrc
+ break loop
+ }
+ r, size = utf8.RuneError, 1
+ goto write
}
c1 := src[nSrc+1]
switch {
@@ -49,9 +50,12 @@ loop:
c1 -= 0x40
case 0xa1 <= c1 && c1 < 0xff:
c1 -= 0x62
+ case c1 < 0x40:
+ r, size = utf8.RuneError, 1
+ goto write
default:
- err = errInvalidBig5
- break loop
+ r, size = utf8.RuneError, 2
+ goto write
}
r, size = '\ufffd', 2
if i := int(c0-0x81)*157 + int(c1); i < len(decode) {
@@ -80,10 +84,10 @@ loop:
}
default:
- err = errInvalidBig5
- break loop
+ r, size = utf8.RuneError, 1
}
+ write:
if nDst+utf8.RuneLen(r) > len(dst) {
err = transform.ErrShortDst
break loop
@@ -99,9 +103,6 @@ loop:
nDst += copy(dst[nDst:], s)
continue loop
}
- if atEOF && err == transform.ErrShortSrc {
- err = errInvalidBig5
- }
return nDst, nSrc, err
}
diff --git a/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go b/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go
deleted file mode 100644
index cf7fdb31a..000000000
--- a/vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go
+++ /dev/null
@@ -1,140 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build ignore
-
-package main
-
-// This program generates tables.go:
-// go run maketables.go | gofmt > tables.go
-
-import (
- "bufio"
- "fmt"
- "log"
- "net/http"
- "sort"
- "strings"
-)
-
-func main() {
- fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
- fmt.Printf("// Package traditionalchinese provides Traditional Chinese encodings such as Big5.\n")
- fmt.Printf(`package traditionalchinese // import "golang.org/x/text/encoding/traditionalchinese"` + "\n\n")
-
- res, err := http.Get("http://encoding.spec.whatwg.org/index-big5.txt")
- if err != nil {
- log.Fatalf("Get: %v", err)
- }
- defer res.Body.Close()
-
- mapping := [65536]uint32{}
- reverse := [65536 * 4]uint16{}
-
- scanner := bufio.NewScanner(res.Body)
- for scanner.Scan() {
- s := strings.TrimSpace(scanner.Text())
- if s == "" || s[0] == '#' {
- continue
- }
- x, y := uint16(0), uint32(0)
- if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
- log.Fatalf("could not parse %q", s)
- }
- if x < 0 || 126*157 <= x {
- log.Fatalf("Big5 code %d is out of range", x)
- }
- mapping[x] = y
-
- // The WHATWG spec http://encoding.spec.whatwg.org/#indexes says that
- // "The index pointer for code point in index is the first pointer
- // corresponding to code point in index", which would normally mean
- // that the code below should be guarded by "if reverse[y] == 0", but
- // last instead of first seems to match the behavior of
- // "iconv -f UTF-8 -t BIG5". For example, U+8005 者 occurs twice in
- // http://encoding.spec.whatwg.org/index-big5.txt, as index 2148
- // (encoded as "\x8e\xcd") and index 6543 (encoded as "\xaa\xcc")
- // and "echo 者 | iconv -f UTF-8 -t BIG5 | xxd" gives "\xaa\xcc".
- c0, c1 := x/157, x%157
- if c1 < 0x3f {
- c1 += 0x40
- } else {
- c1 += 0x62
- }
- reverse[y] = (0x81+c0)<<8 | c1
- }
- if err := scanner.Err(); err != nil {
- log.Fatalf("scanner error: %v", err)
- }
-
- fmt.Printf("// decode is the decoding table from Big5 code to Unicode.\n")
- fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-big5.txt\n")
- fmt.Printf("var decode = [...]uint32{\n")
- for i, v := range mapping {
- if v != 0 {
- fmt.Printf("\t%d: 0x%08X,\n", i, v)
- }
- }
- fmt.Printf("}\n\n")
-
- // Any run of at least separation continuous zero entries in the reverse map will
- // be a separate encode table.
- const separation = 1024
-
- intervals := []interval(nil)
- low, high := -1, -1
- for i, v := range reverse {
- if v == 0 {
- continue
- }
- if low < 0 {
- low = i
- } else if i-high >= separation {
- if high >= 0 {
- intervals = append(intervals, interval{low, high})
- }
- low = i
- }
- high = i + 1
- }
- if high >= 0 {
- intervals = append(intervals, interval{low, high})
- }
- sort.Sort(byDecreasingLength(intervals))
-
- fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
- fmt.Printf("// encodeX are the encoding tables from Unicode to Big5 code,\n")
- fmt.Printf("// sorted by decreasing length.\n")
- for i, v := range intervals {
- fmt.Printf("// encode%d: %5d entries for runes in [%6d, %6d).\n", i, v.len(), v.low, v.high)
- }
- fmt.Printf("\n")
-
- for i, v := range intervals {
- fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
- fmt.Printf("var encode%d = [...]uint16{\n", i)
- for j := v.low; j < v.high; j++ {
- x := reverse[j]
- if x == 0 {
- continue
- }
- fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
- }
- fmt.Printf("}\n\n")
- }
-}
-
-// interval is a half-open interval [low, high).
-type interval struct {
- low, high int
-}
-
-func (i interval) len() int { return i.high - i.low }
-
-// byDecreasingLength sorts intervals by decreasing length.
-type byDecreasingLength []interval
-
-func (b byDecreasingLength) Len() int { return len(b) }
-func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
-func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
diff --git a/vendor/golang.org/x/text/encoding/unicode/unicode.go b/vendor/golang.org/x/text/encoding/unicode/unicode.go
index 579cadfb1..4850ff365 100644
--- a/vendor/golang.org/x/text/encoding/unicode/unicode.go
+++ b/vendor/golang.org/x/text/encoding/unicode/unicode.go
@@ -145,7 +145,7 @@ func (utf8Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err e
// and consumed in a greater context that implies a certain endianness, use
// IgnoreBOM. Otherwise, use ExpectBOM and always produce and consume a BOM.
//
-// In the language of http://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM
+// In the language of https://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM
// corresponds to "Where the precise type of the data stream is known... the
// BOM should not be used" and ExpectBOM corresponds to "A particular
// protocol... may require use of the BOM".