aboutsummaryrefslogtreecommitdiffstats
path: root/Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go
diff options
context:
space:
mode:
Diffstat (limited to 'Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go')
-rw-r--r--Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go151
1 files changed, 114 insertions, 37 deletions
diff --git a/Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go b/Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go
index fc87c2103..8e1db0202 100644
--- a/Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go
+++ b/Godeps/_workspace/src/golang.org/x/text/encoding/charmap/maketables.go
@@ -6,9 +6,6 @@
package main
-// This program generates tables.go:
-// go run maketables.go | gofmt > tables.go
-
import (
"bufio"
"fmt"
@@ -19,6 +16,7 @@ import (
"unicode/utf8"
"golang.org/x/text/encoding"
+ "golang.org/x/text/internal/gen"
)
const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
@@ -41,11 +39,47 @@ var encodings = []struct {
"",
"CodePage437",
encoding.ASCIISub,
- ascii +
- "ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒ" +
- "áíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐" +
- "└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀" +
- "αßΓπΣσµτΦΘΩδ∞∅∈∩≡±≥≤⌠⌡÷≈°•·√ⁿ²∎\u00a0",
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm",
+ },
+ {
+ "IBM Code Page 850",
+ "PC850Multilingual",
+ "",
+ "CodePage850",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm",
+ },
+ {
+ "IBM Code Page 852",
+ "PCp852",
+ "",
+ "CodePage852",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm",
+ },
+ {
+ "IBM Code Page 855",
+ "IBM855",
+ "",
+ "CodePage855",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm",
+ },
+ {
+ "Windows Code Page 858", // PC latin1 with Euro
+ "IBM00858",
+ "",
+ "CodePage858",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm",
+ },
+ {
+ "IBM Code Page 862",
+ "PC862LatinHebrew",
+ "",
+ "CodePage862",
+ encoding.ASCIISub,
+ "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm",
},
{
"IBM Code Page 866",
@@ -324,23 +358,63 @@ func getWHATWG(url string) string {
return ascii + string(mapping)
}
+func getUCM(url string) string {
+ res, err := http.Get(url)
+ if err != nil {
+ log.Fatalf("%q: Get: %v", url, err)
+ }
+ defer res.Body.Close()
+
+ mapping := make([]rune, 256)
+ for i := range mapping {
+ mapping[i] = '\ufffd'
+ }
+
+ charsFound := 0
+ scanner := bufio.NewScanner(res.Body)
+ for scanner.Scan() {
+ s := strings.TrimSpace(scanner.Text())
+ if s == "" || s[0] == '#' {
+ continue
+ }
+ var c byte
+ var r rune
+ if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil {
+ continue
+ }
+ mapping[c] = r
+ charsFound++
+ }
+
+ if charsFound < 200 {
+ log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound)
+ }
+
+ return string(mapping)
+}
+
func main() {
mibs := map[string]bool{}
all := []string{}
- buf := make([]byte, 8)
- fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
- fmt.Printf("package charmap\n\n")
- fmt.Printf("import (\n")
- fmt.Printf("\t\"golang.org/x/text/encoding\"\n")
- fmt.Printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
- fmt.Printf(")\n\n")
+ w := gen.NewCodeWriter()
+ defer w.WriteGoFile("tables.go", "charmap")
+
+ printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) }
+
+ printf("import (\n")
+ printf("\t\"golang.org/x/text/encoding\"\n")
+ printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
+ printf(")\n\n")
for _, e := range encodings {
varNames := strings.Split(e.varName, ",")
all = append(all, varNames...)
varName := varNames[0]
- if strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/") {
+ switch {
+ case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"):
e.mapping = getWHATWG(e.mapping)
+ case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"):
+ e.mapping = getUCM(e.mapping)
}
asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
@@ -352,42 +426,40 @@ func main() {
lvn = 3
}
lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
- fmt.Printf("// %s is the %s encoding.\n", varName, e.name)
+ printf("// %s is the %s encoding.\n", varName, e.name)
if e.comment != "" {
- fmt.Printf("//\n// %s\n", e.comment)
+ printf("//\n// %s\n", e.comment)
}
- fmt.Printf("var %s encoding.Encoding = &%s\n\nvar %s = charmap{\nname: %q,\n",
+ printf("var %s encoding.Encoding = &%s\n\nvar %s = charmap{\nname: %q,\n",
varName, lowerVarName, lowerVarName, e.name)
if mibs[e.mib] {
log.Fatalf("MIB type %q declared multiple times.", e.mib)
}
- fmt.Printf("mib: identifier.%s,\n", e.mib)
- fmt.Printf("asciiSuperset: %t,\n", asciiSuperset)
- fmt.Printf("low: 0x%02x,\n", low)
- fmt.Printf("replacement: 0x%02x,\n", e.replacement)
+ printf("mib: identifier.%s,\n", e.mib)
+ printf("asciiSuperset: %t,\n", asciiSuperset)
+ printf("low: 0x%02x,\n", low)
+ printf("replacement: 0x%02x,\n", e.replacement)
- fmt.Printf("decode: [256]utf8Enc{\n")
+ printf("decode: [256]utf8Enc{\n")
i, backMapping := 0, map[rune]byte{}
for _, c := range e.mapping {
- if _, ok := backMapping[c]; !ok {
+ if _, ok := backMapping[c]; !ok && c != utf8.RuneError {
backMapping[c] = byte(i)
}
- for j := range buf {
- buf[j] = 0
- }
- n := utf8.EncodeRune(buf, c)
+ var buf [8]byte
+ n := utf8.EncodeRune(buf[:], c)
if n > 3 {
panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
}
- fmt.Printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
+ printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
if i%2 == 1 {
- fmt.Printf("\n")
+ printf("\n")
}
i++
}
- fmt.Printf("},\n")
+ printf("},\n")
- fmt.Printf("encode: [256]uint32{\n")
+ printf("encode: [256]uint32{\n")
encode := make([]uint32, 0, 256)
for c, i := range backMapping {
encode = append(encode, uint32(i)<<24|uint32(c))
@@ -397,15 +469,20 @@ func main() {
encode = append(encode, encode[len(encode)-1])
}
for i, enc := range encode {
- fmt.Printf("0x%08x,", enc)
+ printf("0x%08x,", enc)
if i%8 == 7 {
- fmt.Printf("\n")
+ printf("\n")
}
}
- fmt.Printf("},\n}\n")
+ printf("},\n}\n")
+
+ // Add an estimate of the size of a single charmap{} struct value, which
+ // includes two 256 elem arrays of 4 bytes and some extra fields, which
+ // align to 3 uint64s on 64-bit architectures.
+ w.Size += 2*4*256 + 3*8
}
// TODO: add proper line breaking.
- fmt.Printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
+ printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
}
type byRune []uint32