aboutsummaryrefslogblamecommitdiffstats
path: root/vendor/github.com/naoina/go-stringutil/da.go
blob: 8fe65165962e73c8a3bc8df7db5cd6791514d891 (plain) (tree)




























































































































































































































































                                                                                                                                                     
package stringutil

import (
    "fmt"
    "sort"
    "unicode/utf8"
)

const (
    terminationCharacter = '#'
)

func mustDoubleArray(da *doubleArray, err error) *doubleArray {
    if err != nil {
        panic(err)
    }
    return da
}

func (da *doubleArray) Build(keys []string) error {
    records := makeRecords(keys)
    if err := da.build(records, 1, 0, make(map[int]struct{})); err != nil {
        return err
    }
    return nil
}

type doubleArray struct {
    bc   []baseCheck
    node []int
}

func newDoubleArray(keys []string) (*doubleArray, error) {
    da := &doubleArray{
        bc:   []baseCheck{0},
        node: []int{-1}, // A start index is adjusting to 1 because 0 will be used as a mark of non-existent node.
    }
    if err := da.Build(keys); err != nil {
        return nil, err
    }
    return da, nil
}

// baseCheck contains BASE, CHECK and Extra flags.
// From the top, 22bits of BASE, 2bits of Extra flags and 8bits of CHECK.
//
//  BASE (22bit) | Extra flags (2bit) | CHECK (8bit)
// |----------------------|--|--------|
// 32                    10  8         0
type baseCheck uint32

func (bc baseCheck) Base() int {
    return int(bc >> 10)
}

func (bc *baseCheck) SetBase(base int) {
    *bc |= baseCheck(base) << 10
}

func (bc baseCheck) Check() byte {
    return byte(bc)
}

func (bc *baseCheck) SetCheck(check byte) {
    *bc |= baseCheck(check)
}

func (bc baseCheck) IsEmpty() bool {
    return bc&0xfffffcff == 0
}

func (da *doubleArray) Lookup(path string) (length int) {
    idx := 1
    tmpIdx := idx
    for i := 0; i < len(path); i++ {
        c := path[i]
        tmpIdx = da.nextIndex(da.bc[tmpIdx].Base(), c)
        if tmpIdx >= len(da.bc) || da.bc[tmpIdx].Check() != c {
            break
        }
        idx = tmpIdx
    }
    if next := da.nextIndex(da.bc[idx].Base(), terminationCharacter); next < len(da.bc) && da.bc[next].Check() == terminationCharacter {
        return da.node[da.bc[next].Base()]
    }
    return -1
}

func (da *doubleArray) LookupByBytes(path []byte) (length int) {
    idx := 1
    tmpIdx := idx
    for i := 0; i < len(path); i++ {
        c := path[i]
        tmpIdx = da.nextIndex(da.bc[tmpIdx].Base(), c)
        if tmpIdx >= len(da.bc) || da.bc[tmpIdx].Check() != c {
            break
        }
        idx = tmpIdx
    }
    if next := da.nextIndex(da.bc[idx].Base(), terminationCharacter); next < len(da.bc) && da.bc[next].Check() == terminationCharacter {
        return da.node[da.bc[next].Base()]
    }
    return -1
}

func (da *doubleArray) build(srcs []record, idx, depth int, usedBase map[int]struct{}) error {
    sort.Stable(recordSlice(srcs))
    base, siblings, leaf, err := da.arrange(srcs, idx, depth, usedBase)
    if err != nil {
        return err
    }
    if leaf != nil {
        da.bc[idx].SetBase(len(da.node))
        da.node = append(da.node, leaf.value)
    }
    for _, sib := range siblings {
        da.setCheck(da.nextIndex(base, sib.c), sib.c)
    }
    for _, sib := range siblings {
        if err := da.build(srcs[sib.start:sib.end], da.nextIndex(base, sib.c), depth+1, usedBase); err != nil {
            return err
        }
    }
    return nil
}

func (da *doubleArray) setBase(i, base int) {
    da.bc[i].SetBase(base)
}

func (da *doubleArray) setCheck(i int, check byte) {
    da.bc[i].SetCheck(check)
}

func (da *doubleArray) findEmptyIndex(start int) int {
    i := start
    for ; i < len(da.bc); i++ {
        if da.bc[i].IsEmpty() {
            break
        }
    }
    return i
}

// findBase returns good BASE.
func (da *doubleArray) findBase(siblings []sibling, start int, usedBase map[int]struct{}) (base int) {
    for idx, firstChar := start+1, siblings[0].c; ; idx = da.findEmptyIndex(idx + 1) {
        base = da.nextIndex(idx, firstChar)
        if _, used := usedBase[base]; used {
            continue
        }
        i := 0
        for ; i < len(siblings); i++ {
            next := da.nextIndex(base, siblings[i].c)
            if len(da.bc) <= next {
                da.bc = append(da.bc, make([]baseCheck, next-len(da.bc)+1)...)
            }
            if !da.bc[next].IsEmpty() {
                break
            }
        }
        if i == len(siblings) {
            break
        }
    }
    usedBase[base] = struct{}{}
    return base
}

func (da *doubleArray) arrange(records []record, idx, depth int, usedBase map[int]struct{}) (base int, siblings []sibling, leaf *record, err error) {
    siblings, leaf, err = makeSiblings(records, depth)
    if err != nil {
        return -1, nil, nil, err
    }
    if len(siblings) < 1 {
        return -1, nil, leaf, nil
    }
    base = da.findBase(siblings, idx, usedBase)
    da.setBase(idx, base)
    return base, siblings, leaf, err
}

type sibling struct {
    start int
    end   int
    c     byte
}

func (da *doubleArray) nextIndex(base int, c byte) int {
    return base ^ int(c)
}

func makeSiblings(records []record, depth int) (sib []sibling, leaf *record, err error) {
    var (
        pc byte
        n  int
    )
    for i, r := range records {
        if len(r.key) <= depth {
            leaf = &r
            continue
        }
        c := r.key[depth]
        switch {
        case pc < c:
            sib = append(sib, sibling{start: i, c: c})
        case pc == c:
            continue
        default:
            return nil, nil, fmt.Errorf("stringutil: BUG: records hasn't been sorted")
        }
        if n > 0 {
            sib[n-1].end = i
        }
        pc = c
        n++
    }
    if n == 0 {
        return nil, leaf, nil
    }
    sib[n-1].end = len(records)
    return sib, leaf, nil
}

type record struct {
    key   string
    value int
}

func makeRecords(srcs []string) (records []record) {
    termChar := string(terminationCharacter)
    for _, s := range srcs {
        records = append(records, record{
            key:   string(s + termChar),
            value: utf8.RuneCountInString(s),
        })
    }
    return records
}

type recordSlice []record

func (rs recordSlice) Len() int {
    return len(rs)
}

func (rs recordSlice) Less(i, j int) bool {
    return rs[i].key < rs[j].key
}

func (rs recordSlice) Swap(i, j int) {
    rs[i], rs[j] = rs[j], rs[i]
}