aboutsummaryrefslogblamecommitdiffstats
path: root/vendor/golang.org/x/text/unicode/cldr/makexml.go
blob: 6114d01cbce5039100bb2bf7cba913142fd2e96f (plain) (tree)















































































































































































































































































































































































































                                                                                                                              
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build ignore

// This tool generates types for the various XML formats of CLDR.
package main

import (
    "archive/zip"
    "bytes"
    "encoding/xml"
    "flag"
    "fmt"
    "io"
    "io/ioutil"
    "log"
    "os"
    "regexp"
    "strings"

    "golang.org/x/text/internal/gen"
)

var outputFile = flag.String("output", "xml.go", "output file name")

func main() {
    flag.Parse()

    r := gen.OpenCLDRCoreZip()
    buffer, err := ioutil.ReadAll(r)
    if err != nil {
        log.Fatal("Could not read zip file")
    }
    r.Close()
    z, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
    if err != nil {
        log.Fatalf("Could not read zip archive: %v", err)
    }

    var buf bytes.Buffer

    version := gen.CLDRVersion()

    for _, dtd := range files {
        for _, f := range z.File {
            if strings.HasSuffix(f.Name, dtd.file+".dtd") {
                r, err := f.Open()
                failOnError(err)

                b := makeBuilder(&buf, dtd)
                b.parseDTD(r)
                b.resolve(b.index[dtd.top[0]])
                b.write()
                if b.version != "" && version != b.version {
                    println(f.Name)
                    log.Fatalf("main: inconsistent versions: found %s; want %s", b.version, version)
                }
                break
            }
        }
    }
    fmt.Fprintln(&buf, "// Version is the version of CLDR from which the XML definitions are generated.")
    fmt.Fprintf(&buf, "const Version = %q\n", version)

    gen.WriteGoFile(*outputFile, "cldr", buf.Bytes())
}

func failOnError(err error) {
    if err != nil {
        log.New(os.Stderr, "", log.Lshortfile).Output(2, err.Error())
        os.Exit(1)
    }
}

// configuration data per DTD type
type dtd struct {
    file string   // base file name
    root string   // Go name of the root XML element
    top  []string // create a different type for this section

    skipElem    []string // hard-coded or deprecated elements
    skipAttr    []string // attributes to exclude
    predefined  []string // hard-coded elements exist of the form <name>Elem
    forceRepeat []string // elements to make slices despite DTD
}

var files = []dtd{
    {
        file: "ldmlBCP47",
        root: "LDMLBCP47",
        top:  []string{"ldmlBCP47"},
        skipElem: []string{
            "cldrVersion", // deprecated, not used
        },
    },
    {
        file: "ldmlSupplemental",
        root: "SupplementalData",
        top:  []string{"supplementalData"},
        skipElem: []string{
            "cldrVersion", // deprecated, not used
        },
        forceRepeat: []string{
            "plurals", // data defined in plurals.xml and ordinals.xml
        },
    },
    {
        file: "ldml",
        root: "LDML",
        top: []string{
            "ldml", "collation", "calendar", "timeZoneNames", "localeDisplayNames", "numbers",
        },
        skipElem: []string{
            "cp",       // not used anywhere
            "special",  // not used anywhere
            "fallback", // deprecated, not used
            "alias",    // in Common
            "default",  // in Common
        },
        skipAttr: []string{
            "hiraganaQuarternary", // typo in DTD, correct version included as well
        },
        predefined: []string{"rules"},
    },
}

var comments = map[string]string{
    "ldmlBCP47": `
// LDMLBCP47 holds information on allowable values for various variables in LDML.
`,
    "supplementalData": `
// SupplementalData holds information relevant for internationalization
// and proper use of CLDR, but that is not contained in the locale hierarchy.
`,
    "ldml": `
// LDML is the top-level type for locale-specific data.
`,
    "collation": `
// Collation contains rules that specify a certain sort-order,
// as a tailoring of the root order. 
// The parsed rules are obtained by passing a RuleProcessor to Collation's
// Process method.
`,
    "calendar": `
// Calendar specifies the fields used for formatting and parsing dates and times.
// The month and quarter names are identified numerically, starting at 1.
// The day (of the week) names are identified with short strings, since there is
// no universally-accepted numeric designation.
`,
    "dates": `
// Dates contains information regarding the format and parsing of dates and times.
`,
    "localeDisplayNames": `
// LocaleDisplayNames specifies localized display names for for scripts, languages,
// countries, currencies, and variants.
`,
    "numbers": `
// Numbers supplies information for formatting and parsing numbers and currencies.
`,
}

type element struct {
    name      string // XML element name
    category  string // elements contained by this element
    signature string // category + attrKey*

    attr []*attribute // attributes supported by this element.
    sub  []struct {   // parsed and evaluated sub elements of this element.
        e      *element
        repeat bool // true if the element needs to be a slice
    }

    resolved bool // prevent multiple resolutions of this element.
}

type attribute struct {
    name string
    key  string
    list []string

    tag string // Go tag
}

var (
    reHead  = regexp.MustCompile(` *(\w+) +([\w\-]+)`)
    reAttr  = regexp.MustCompile(` *(\w+) *(?:(\w+)|\(([\w\- \|]+)\)) *(?:#([A-Z]*) *(?:\"([\.\d+])\")?)? *("[\w\-:]*")?`)
    reElem  = regexp.MustCompile(`^ *(EMPTY|ANY|\(.*\)[\*\+\?]?) *$`)
    reToken = regexp.MustCompile(`\w\-`)
)

// builder is used to read in the DTD files from CLDR and generate Go code
// to be used with the encoding/xml package.
type builder struct {
    w       io.Writer
    index   map[string]*element
    elem    []*element
    info    dtd
    version string
}

func makeBuilder(w io.Writer, d dtd) builder {
    return builder{
        w:     w,
        index: make(map[string]*element),
        elem:  []*element{},
        info:  d,
    }
}

// parseDTD parses a DTD file.
func (b *builder) parseDTD(r io.Reader) {
    for d := xml.NewDecoder(r); ; {
        t, err := d.Token()
        if t == nil {
            break
        }
        failOnError(err)
        dir, ok := t.(xml.Directive)
        if !ok {
            continue
        }
        m := reHead.FindSubmatch(dir)
        dir = dir[len(m[0]):]
        ename := string(m[2])
        el, elementFound := b.index[ename]
        switch string(m[1]) {
        case "ELEMENT":
            if elementFound {
                log.Fatal("parseDTD: duplicate entry for element %q", ename)
            }
            m := reElem.FindSubmatch(dir)
            if m == nil {
                log.Fatalf("parseDTD: invalid element %q", string(dir))
            }
            if len(m[0]) != len(dir) {
                log.Fatal("parseDTD: invalid element %q", string(dir), len(dir), len(m[0]), string(m[0]))
            }
            s := string(m[1])
            el = &element{
                name:     ename,
                category: s,
            }
            b.index[ename] = el
        case "ATTLIST":
            if !elementFound {
                log.Fatalf("parseDTD: unknown element %q", ename)
            }
            s := string(dir)
            m := reAttr.FindStringSubmatch(s)
            if m == nil {
                log.Fatal(fmt.Errorf("parseDTD: invalid attribute %q", string(dir)))
            }
            if m[4] == "FIXED" {
                b.version = m[5]
            } else {
                switch m[1] {
                case "draft", "references", "alt", "validSubLocales", "standard" /* in Common */ :
                case "type", "choice":
                default:
                    el.attr = append(el.attr, &attribute{
                        name: m[1],
                        key:  s,
                        list: reToken.FindAllString(m[3], -1),
                    })
                    el.signature = fmt.Sprintf("%s=%s+%s", el.signature, m[1], m[2])
                }
            }
        }
    }
}

var reCat = regexp.MustCompile(`[ ,\|]*(?:(\(|\)|\#?[\w_-]+)([\*\+\?]?))?`)

// resolve takes a parsed element and converts it into structured data
// that can be used to generate the XML code.
func (b *builder) resolve(e *element) {
    if e.resolved {
        return
    }
    b.elem = append(b.elem, e)
    e.resolved = true
    s := e.category
    found := make(map[string]bool)
    sequenceStart := []int{}
    for len(s) > 0 {
        m := reCat.FindStringSubmatch(s)
        if m == nil {
            log.Fatalf("%s: invalid category string %q", e.name, s)
        }
        repeat := m[2] == "*" || m[2] == "+" || in(b.info.forceRepeat, m[1])
        switch m[1] {
        case "":
        case "(":
            sequenceStart = append(sequenceStart, len(e.sub))
        case ")":
            if len(sequenceStart) == 0 {
                log.Fatalf("%s: unmatched closing parenthesis", e.name)
            }
            for i := sequenceStart[len(sequenceStart)-1]; i < len(e.sub); i++ {
                e.sub[i].repeat = e.sub[i].repeat || repeat
            }
            sequenceStart = sequenceStart[:len(sequenceStart)-1]
        default:
            if in(b.info.skipElem, m[1]) {
            } else if sub, ok := b.index[m[1]]; ok {
                if !found[sub.name] {
                    e.sub = append(e.sub, struct {
                        e      *element
                        repeat bool
                    }{sub, repeat})
                    found[sub.name] = true
                    b.resolve(sub)
                }
            } else if m[1] == "#PCDATA" || m[1] == "ANY" {
            } else if m[1] != "EMPTY" {
                log.Fatalf("resolve:%s: element %q not found", e.name, m[1])
            }
        }
        s = s[len(m[0]):]
    }
}

// return true if s is contained in set.
func in(set []string, s string) bool {
    for _, v := range set {
        if v == s {
            return true
        }
    }
    return false
}

var repl = strings.NewReplacer("-", " ", "_", " ")

// title puts the first character or each character following '_' in title case and
// removes all occurrences of '_'.
func title(s string) string {
    return strings.Replace(strings.Title(repl.Replace(s)), " ", "", -1)
}

// writeElem generates Go code for a single element, recursively.
func (b *builder) writeElem(tab int, e *element) {
    p := func(f string, x ...interface{}) {
        f = strings.Replace(f, "\n", "\n"+strings.Repeat("\t", tab), -1)
        fmt.Fprintf(b.w, f, x...)
    }
    if len(e.sub) == 0 && len(e.attr) == 0 {
        p("Common")
        return
    }
    p("struct {")
    tab++
    p("\nCommon")
    for _, attr := range e.attr {
        if !in(b.info.skipAttr, attr.name) {
            p("\n%s string `xml:\"%s,attr\"`", title(attr.name), attr.name)
        }
    }
    for _, sub := range e.sub {
        if in(b.info.predefined, sub.e.name) {
            p("\n%sElem", sub.e.name)
            continue
        }
        if in(b.info.skipElem, sub.e.name) {
            continue
        }
        p("\n%s ", title(sub.e.name))
        if sub.repeat {
            p("[]")
        }
        p("*")
        if in(b.info.top, sub.e.name) {
            p(title(sub.e.name))
        } else {
            b.writeElem(tab, sub.e)
        }
        p(" `xml:\"%s\"`", sub.e.name)
    }
    tab--
    p("\n}")
}

// write generates the Go XML code.
func (b *builder) write() {
    for i, name := range b.info.top {
        e := b.index[name]
        if e != nil {
            fmt.Fprintf(b.w, comments[name])
            name := title(e.name)
            if i == 0 {
                name = b.info.root
            }
            fmt.Fprintf(b.w, "type %s ", name)
            b.writeElem(0, e)
            fmt.Fprint(b.w, "\n")
        }
    }
}